0
|
1 #!/usr/bin/perl -w
|
|
2 use strict;
|
|
3
|
|
4 ############################################################################
|
|
5 # script to remove position or column from a multi-Fasta file
|
|
6 # in function of a given character
|
|
7 ############################################################################
|
|
8
|
|
9
|
|
10 my $inFile = $ARGV[0]; #'example_seq.fasta';
|
|
11 my $char = $ARGV[1]; #'N';
|
|
12 my @headers = ();
|
|
13 my @sequences = ();
|
|
14 my $index = 0;
|
|
15 my $outFile = 'results.fna';
|
|
16 open(IN,'<',$inFile) or die "Unable to read file $inFile: $!\n";
|
|
17 while( defined( my $line = <IN> ) ){
|
|
18 chomp($line);
|
|
19 if( $line =~ m/^>/ ){
|
|
20 $headers[$index] = $line;
|
|
21 $index++;
|
|
22 }
|
|
23 else{
|
|
24 $sequences[$index-1] .= $line;
|
|
25 }
|
|
26 }
|
|
27 close(IN);
|
|
28 my %lookup = ();
|
|
29 for(my $i=0;$i<=$#sequences;$i++){
|
|
30 my $seq = $sequences[$i];
|
|
31 my $len = length($seq);
|
|
32 for(my $j=0;$j<$len;$j++){
|
|
33 my $residue = substr($seq,$j,1);
|
|
34 if( $residue eq $char ){
|
|
35 $lookup{$j} = 1;
|
|
36 }
|
|
37 }
|
|
38 }
|
|
39 #print "# Skipped the following positions (zero indexed):\n";
|
|
40 #print "# ",join(", ", sort {$a <=> $b} keys (%lookup)), "\n";
|
|
41 #print "# Cleaned sequences:\n";
|
|
42 #open(OUT,'>',$outFile) or die "Unable to write file $outFile: $!\n";
|
|
43 for(my $i=0;$i<=$#headers;$i++){
|
|
44 my $head = $headers[$i];
|
|
45 my $seq = $sequences[$i];
|
|
46 my $len = length($seq);
|
|
47 my $out = '';
|
|
48 for(my $j=0;$j<$len;$j++){
|
|
49 my $residue = substr($seq,$j,1);
|
|
50 $out .= $residue unless exists $lookup{$j};
|
|
51 }
|
|
52 print $head, "\n", $out, "\n";
|
|
53 #print OUT $head, "\n", $out, "\n";
|
|
54 }
|
|
55 #close(OUT);
|
|
56 #print "\n";
|
|
57 #print "End of program! Your result is written in file $outFile\n";
|