# HG changeset patch # User dcouvin # Date 1631906985 0 # Node ID 587281a1acece1fe59fe8bcd9fb5828790aa92b7 Uploaded diff -r 000000000000 -r 587281a1acec input.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/input.fasta Fri Sep 17 19:29:45 2021 +0000 @@ -0,0 +1,8 @@ +>sequence1 +atgcatgcatgcacgatcgatcgat--gca-tgcac +>sequence2 +aaacatgcatgcacgatcgatcgatgtatg---cac +>sequence3 +atgcatgcatgcactatcgatcgat-gcata--aac +>sequence4 +atgcatgcacgcatgatcgatcga-tgca--tgcac diff -r 000000000000 -r 587281a1acec removeChar.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/removeChar.pl Fri Sep 17 19:29:45 2021 +0000 @@ -0,0 +1,57 @@ +#!/usr/bin/perl -w +use strict; + +############################################################################ +# script to remove position or column from a multi-Fasta file +# in function of a given character +############################################################################ + + +my $inFile = $ARGV[0]; #'example_seq.fasta'; +my $char = $ARGV[1]; #'N'; +my @headers = (); +my @sequences = (); +my $index = 0; +my $outFile = 'results.fna'; +open(IN,'<',$inFile) or die "Unable to read file $inFile: $!\n"; +while( defined( my $line = ) ){ + chomp($line); + if( $line =~ m/^>/ ){ + $headers[$index] = $line; + $index++; + } + else{ + $sequences[$index-1] .= $line; + } +} +close(IN); +my %lookup = (); +for(my $i=0;$i<=$#sequences;$i++){ + my $seq = $sequences[$i]; + my $len = length($seq); + for(my $j=0;$j<$len;$j++){ + my $residue = substr($seq,$j,1); + if( $residue eq $char ){ + $lookup{$j} = 1; + } + } +} +#print "# Skipped the following positions (zero indexed):\n"; +#print "# ",join(", ", sort {$a <=> $b} keys (%lookup)), "\n"; +#print "# Cleaned sequences:\n"; +#open(OUT,'>',$outFile) or die "Unable to write file $outFile: $!\n"; +for(my $i=0;$i<=$#headers;$i++){ + my $head = $headers[$i]; + my $seq = $sequences[$i]; + my $len = length($seq); + my $out = ''; + for(my $j=0;$j<$len;$j++){ + my $residue = substr($seq,$j,1); + $out .= $residue unless exists $lookup{$j}; + } + print $head, "\n", $out, "\n"; + #print OUT $head, "\n", $out, "\n"; +} +#close(OUT); +#print "\n"; +#print "End of program! Your result is written in file $outFile\n"; diff -r 000000000000 -r 587281a1acec removeChar.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/removeChar.xml Fri Sep 17 19:29:45 2021 +0000 @@ -0,0 +1,46 @@ + + allows to remove positions (or columns) from a multi-Fasta alignment file in function of a given character + + + + "$output" + + + +]]> + + + + + + + + + + + + + + + + +