Mercurial > repos > nml > fasta_extract
annotate fa-extract-few.pl @ 5:21888a4371d1 draft default tip
planemo upload for repository https://toolshed.g2.bx.psu.edu/view/nml/fasta_extract commit df418d3445a03025bf38d27b1272fb913be548f1-dirty
author | nml |
---|---|
date | Wed, 20 Sep 2017 13:25:29 -0400 |
parents | 75e70a6d8d60 |
children |
rev | line source |
---|---|
5
21888a4371d1
planemo upload for repository https://toolshed.g2.bx.psu.edu/view/nml/fasta_extract commit df418d3445a03025bf38d27b1272fb913be548f1-dirty
nml
parents:
0
diff
changeset
|
1 #!/usr/bin/env perl -w |
0 | 2 use strict; |
3 use Bio::SeqIO; | |
4 | |
5 my(@Options, $verbose, $inverse, $file,$list,$exact); | |
6 setOptions(); | |
7 | |
8 my $in = Bio::SeqIO->new(-file=>$file, -format=>'Fasta'); | |
9 my $out = Bio::SeqIO->new(-fh=>\*STDOUT, -format=>'Fasta'); | |
10 my $nread=0; | |
11 my $nwrote=0; | |
12 | |
13 my $pattern = join('|', @ARGV); | |
14 | |
15 if ( $list) { | |
16 my @list; | |
17 open my $in,'<',$list; | |
18 while ( <$in>) { | |
19 chomp; | |
20 push @list,$_; | |
21 } | |
22 close $in; | |
23 $pattern = join ('|',@list); | |
24 } | |
25 | |
26 while (my $seq = $in->next_seq) { | |
27 $nread++; | |
28 my $match = ($seq->description =~ m/($pattern)/ or $seq->display_id =~ m/($pattern)/); | |
29 if ($exact) { | |
30 $match = ($seq->display_id =~ m/^($pattern)$/); | |
31 } | |
32 #print STDERR "Found match: ",$seq->display_id, " ", $seq->description, "\n" if $verbose; | |
33 if ($match ^ $inverse) { # rare use for XOR ! | |
34 $out->write_seq($seq); | |
35 $nwrote++; | |
36 } | |
37 } | |
38 | |
39 #print STDERR "Read $nread sequences, wrote $nwrote, with pattern: $pattern\n"; | |
40 exit(0); | |
41 #---------------------------------------------------------------------- | |
42 # Option setting routines | |
43 | |
44 sub setOptions { | |
45 use Getopt::Long; | |
46 | |
47 @Options = ( | |
48 {OPT=>"h|help", VAR=>\&usage, DESC=>"This help"}, | |
49 {OPT=>"verbose!", VAR=>\$verbose, DEFAULT=>0, DESC=>"Verbose"}, | |
50 {OPT=>"v|inverse!", VAR=>\$inverse, DEFAULT=>0, DESC=>"Output NON-matching sequences instead"}, | |
51 {OPT=>"f|file=s", VAR=>\$file, DEFAULT=>"", DESC=>"The fasta file to extract sequences from"}, | |
52 {OPT=>"exact", VAR=>\$exact, DEFAULT=>"", DESC=>"Exact matches for display id only"}, | |
53 {OPT=>"l|list=s", VAR=>\$list, DEFAULT=>"", DESC=>"List of pattern to look from"}, | |
54 ); | |
55 | |
56 (!@ARGV) && (usage()); | |
57 | |
58 &GetOptions(map {$_->{OPT}, $_->{VAR}} @Options) || usage(); | |
59 | |
60 # Now setup default values. | |
61 foreach (@Options) { | |
62 if (defined($_->{DEFAULT}) && !defined(${$_->{VAR}})) { | |
63 ${$_->{VAR}} = $_->{DEFAULT}; | |
64 } | |
65 } | |
66 } | |
67 | |
68 sub usage { | |
69 print "Usage: $0 [options] id1 [id2 ...] < input.fasta > output.fasta\n"; | |
70 foreach (@Options) { | |
71 printf " --%-13s %s%s.\n",$_->{OPT},$_->{DESC}, | |
72 defined($_->{DEFAULT}) ? " (default '$_->{DEFAULT}')" : ""; | |
73 } | |
74 exit(1); | |
75 } | |
76 | |
77 #---------------------------------------------------------------------- |