annotate removeChar.pl @ 1:b6eb9111d7af draft default tip

Uploaded
author dcouvin
date Fri, 17 Sep 2021 19:39:54 +0000
parents 587281a1acec
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
587281a1acec Uploaded
dcouvin
parents:
diff changeset
1 #!/usr/bin/perl -w
587281a1acec Uploaded
dcouvin
parents:
diff changeset
2 use strict;
587281a1acec Uploaded
dcouvin
parents:
diff changeset
3
587281a1acec Uploaded
dcouvin
parents:
diff changeset
4 ############################################################################
587281a1acec Uploaded
dcouvin
parents:
diff changeset
5 # script to remove position or column from a multi-Fasta file
587281a1acec Uploaded
dcouvin
parents:
diff changeset
6 # in function of a given character
587281a1acec Uploaded
dcouvin
parents:
diff changeset
7 ############################################################################
587281a1acec Uploaded
dcouvin
parents:
diff changeset
8
587281a1acec Uploaded
dcouvin
parents:
diff changeset
9
587281a1acec Uploaded
dcouvin
parents:
diff changeset
10 my $inFile = $ARGV[0]; #'example_seq.fasta';
587281a1acec Uploaded
dcouvin
parents:
diff changeset
11 my $char = $ARGV[1]; #'N';
587281a1acec Uploaded
dcouvin
parents:
diff changeset
12 my @headers = ();
587281a1acec Uploaded
dcouvin
parents:
diff changeset
13 my @sequences = ();
587281a1acec Uploaded
dcouvin
parents:
diff changeset
14 my $index = 0;
587281a1acec Uploaded
dcouvin
parents:
diff changeset
15 my $outFile = 'results.fna';
587281a1acec Uploaded
dcouvin
parents:
diff changeset
16 open(IN,'<',$inFile) or die "Unable to read file $inFile: $!\n";
587281a1acec Uploaded
dcouvin
parents:
diff changeset
17 while( defined( my $line = <IN> ) ){
587281a1acec Uploaded
dcouvin
parents:
diff changeset
18 chomp($line);
587281a1acec Uploaded
dcouvin
parents:
diff changeset
19 if( $line =~ m/^>/ ){
587281a1acec Uploaded
dcouvin
parents:
diff changeset
20 $headers[$index] = $line;
587281a1acec Uploaded
dcouvin
parents:
diff changeset
21 $index++;
587281a1acec Uploaded
dcouvin
parents:
diff changeset
22 }
587281a1acec Uploaded
dcouvin
parents:
diff changeset
23 else{
587281a1acec Uploaded
dcouvin
parents:
diff changeset
24 $sequences[$index-1] .= $line;
587281a1acec Uploaded
dcouvin
parents:
diff changeset
25 }
587281a1acec Uploaded
dcouvin
parents:
diff changeset
26 }
587281a1acec Uploaded
dcouvin
parents:
diff changeset
27 close(IN);
587281a1acec Uploaded
dcouvin
parents:
diff changeset
28 my %lookup = ();
587281a1acec Uploaded
dcouvin
parents:
diff changeset
29 for(my $i=0;$i<=$#sequences;$i++){
587281a1acec Uploaded
dcouvin
parents:
diff changeset
30 my $seq = $sequences[$i];
587281a1acec Uploaded
dcouvin
parents:
diff changeset
31 my $len = length($seq);
587281a1acec Uploaded
dcouvin
parents:
diff changeset
32 for(my $j=0;$j<$len;$j++){
587281a1acec Uploaded
dcouvin
parents:
diff changeset
33 my $residue = substr($seq,$j,1);
587281a1acec Uploaded
dcouvin
parents:
diff changeset
34 if( $residue eq $char ){
587281a1acec Uploaded
dcouvin
parents:
diff changeset
35 $lookup{$j} = 1;
587281a1acec Uploaded
dcouvin
parents:
diff changeset
36 }
587281a1acec Uploaded
dcouvin
parents:
diff changeset
37 }
587281a1acec Uploaded
dcouvin
parents:
diff changeset
38 }
587281a1acec Uploaded
dcouvin
parents:
diff changeset
39 #print "# Skipped the following positions (zero indexed):\n";
587281a1acec Uploaded
dcouvin
parents:
diff changeset
40 #print "# ",join(", ", sort {$a <=> $b} keys (%lookup)), "\n";
587281a1acec Uploaded
dcouvin
parents:
diff changeset
41 #print "# Cleaned sequences:\n";
587281a1acec Uploaded
dcouvin
parents:
diff changeset
42 #open(OUT,'>',$outFile) or die "Unable to write file $outFile: $!\n";
587281a1acec Uploaded
dcouvin
parents:
diff changeset
43 for(my $i=0;$i<=$#headers;$i++){
587281a1acec Uploaded
dcouvin
parents:
diff changeset
44 my $head = $headers[$i];
587281a1acec Uploaded
dcouvin
parents:
diff changeset
45 my $seq = $sequences[$i];
587281a1acec Uploaded
dcouvin
parents:
diff changeset
46 my $len = length($seq);
587281a1acec Uploaded
dcouvin
parents:
diff changeset
47 my $out = '';
587281a1acec Uploaded
dcouvin
parents:
diff changeset
48 for(my $j=0;$j<$len;$j++){
587281a1acec Uploaded
dcouvin
parents:
diff changeset
49 my $residue = substr($seq,$j,1);
587281a1acec Uploaded
dcouvin
parents:
diff changeset
50 $out .= $residue unless exists $lookup{$j};
587281a1acec Uploaded
dcouvin
parents:
diff changeset
51 }
587281a1acec Uploaded
dcouvin
parents:
diff changeset
52 print $head, "\n", $out, "\n";
587281a1acec Uploaded
dcouvin
parents:
diff changeset
53 #print OUT $head, "\n", $out, "\n";
587281a1acec Uploaded
dcouvin
parents:
diff changeset
54 }
587281a1acec Uploaded
dcouvin
parents:
diff changeset
55 #close(OUT);
587281a1acec Uploaded
dcouvin
parents:
diff changeset
56 #print "\n";
587281a1acec Uploaded
dcouvin
parents:
diff changeset
57 #print "End of program! Your result is written in file $outFile\n";