input : cat a.txt
i am fine sriram
krishna how r u , how r u sriram , How r u jadu
thank you , Thankyou sir, thank you
how r u krishna ? how r u sriram?
required output:
i am fine sriram
krishna how r u , sriram How jadu
thank you , Thankyou sir,
how r u krishna ? sriram?
Script usage: ./merge.pl -file=a.txt
#!/usr/bin/perl -ws
#objective of this script is to remove duplicate words that occur in every line of a file on per line basis, same words can occur for once in every line
our ($file); # switch variable to take file name from command line
my @words; # array of words
my %seen=(); # initalizing a hash
if( !defined $file )
{
print "Usage: $0 -file=<filename>\n";
exit 255;
}
unless(open(FH1,"$file")) # check the file , if it doesnot exist throw error
{
print "couldnot open file: $!\n";
}
if ( -r "$file" ) #if file is readable
{
while (< FH1 >)
{
chomp;
my @words = split();
foreach (@words)
{
unless( $seen{$_} ) # every word if it's not seen then increment it in hash value and assign it as hash key
{
$seen{$_}++ ;
push @nodup, $_; # push all words to an array , $_ is default variable that holds anything
}
}
push @nodup,"\n"; # after a line,put "\n" at the end before going to next line
%seen = (); # reinitialize hash before going to next line and traversing every word
}
}
close(FH1); # close the file at the end
print "@nodup\n"; # print the modifed text at the end
Please find awk based shell script for the same here click this
ReplyDelete