Mercurial > repos > nml > fasta_extract
diff fa-extract-few.pl @ 0:75e70a6d8d60 draft
Uploaded
author | nml |
---|---|
date | Mon, 06 Feb 2017 10:27:59 -0500 |
parents | |
children | 21888a4371d1 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fa-extract-few.pl Mon Feb 06 10:27:59 2017 -0500 @@ -0,0 +1,77 @@ +#!/usr/bin/perl -w +use strict; +use Bio::SeqIO; + +my(@Options, $verbose, $inverse, $file,$list,$exact); +setOptions(); + +my $in = Bio::SeqIO->new(-file=>$file, -format=>'Fasta'); +my $out = Bio::SeqIO->new(-fh=>\*STDOUT, -format=>'Fasta'); +my $nread=0; +my $nwrote=0; + +my $pattern = join('|', @ARGV); + +if ( $list) { + my @list; + open my $in,'<',$list; + while ( <$in>) { + chomp; + push @list,$_; + } + close $in; + $pattern = join ('|',@list); +} + +while (my $seq = $in->next_seq) { + $nread++; + my $match = ($seq->description =~ m/($pattern)/ or $seq->display_id =~ m/($pattern)/); + if ($exact) { + $match = ($seq->display_id =~ m/^($pattern)$/); + } + #print STDERR "Found match: ",$seq->display_id, " ", $seq->description, "\n" if $verbose; + if ($match ^ $inverse) { # rare use for XOR ! + $out->write_seq($seq); + $nwrote++; + } +} + +#print STDERR "Read $nread sequences, wrote $nwrote, with pattern: $pattern\n"; +exit(0); +#---------------------------------------------------------------------- +# Option setting routines + +sub setOptions { + use Getopt::Long; + + @Options = ( + {OPT=>"h|help", VAR=>\&usage, DESC=>"This help"}, + {OPT=>"verbose!", VAR=>\$verbose, DEFAULT=>0, DESC=>"Verbose"}, + {OPT=>"v|inverse!", VAR=>\$inverse, DEFAULT=>0, DESC=>"Output NON-matching sequences instead"}, + {OPT=>"f|file=s", VAR=>\$file, DEFAULT=>"", DESC=>"The fasta file to extract sequences from"}, + {OPT=>"exact", VAR=>\$exact, DEFAULT=>"", DESC=>"Exact matches for display id only"}, + {OPT=>"l|list=s", VAR=>\$list, DEFAULT=>"", DESC=>"List of pattern to look from"}, + ); + + (!@ARGV) && (usage()); + + &GetOptions(map {$_->{OPT}, $_->{VAR}} @Options) || usage(); + + # Now setup default values. + foreach (@Options) { + if (defined($_->{DEFAULT}) && !defined(${$_->{VAR}})) { + ${$_->{VAR}} = $_->{DEFAULT}; + } + } +} + +sub usage { + print "Usage: $0 [options] id1 [id2 ...] < input.fasta > output.fasta\n"; + foreach (@Options) { + printf " --%-13s %s%s.\n",$_->{OPT},$_->{DESC}, + defined($_->{DEFAULT}) ? " (default '$_->{DEFAULT}')" : ""; + } + exit(1); +} + +#----------------------------------------------------------------------