annotate fa-extract-few.pl @ 0:75e70a6d8d60 draft

Uploaded
author nml
date Mon, 06 Feb 2017 10:27:59 -0500
parents
children 21888a4371d1
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
1 #!/usr/bin/perl -w
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
2 use strict;
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
3 use Bio::SeqIO;
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
4
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
5 my(@Options, $verbose, $inverse, $file,$list,$exact);
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
6 setOptions();
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
7
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
8 my $in = Bio::SeqIO->new(-file=>$file, -format=>'Fasta');
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
9 my $out = Bio::SeqIO->new(-fh=>\*STDOUT, -format=>'Fasta');
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
10 my $nread=0;
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
11 my $nwrote=0;
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
12
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
13 my $pattern = join('|', @ARGV);
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
14
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
15 if ( $list) {
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
16 my @list;
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
17 open my $in,'<',$list;
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
18 while ( <$in>) {
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
19 chomp;
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
20 push @list,$_;
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
21 }
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
22 close $in;
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
23 $pattern = join ('|',@list);
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
24 }
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
25
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
26 while (my $seq = $in->next_seq) {
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
27 $nread++;
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
28 my $match = ($seq->description =~ m/($pattern)/ or $seq->display_id =~ m/($pattern)/);
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
29 if ($exact) {
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
30 $match = ($seq->display_id =~ m/^($pattern)$/);
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
31 }
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
32 #print STDERR "Found match: ",$seq->display_id, " ", $seq->description, "\n" if $verbose;
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
33 if ($match ^ $inverse) { # rare use for XOR !
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
34 $out->write_seq($seq);
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
35 $nwrote++;
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
36 }
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
37 }
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
38
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
39 #print STDERR "Read $nread sequences, wrote $nwrote, with pattern: $pattern\n";
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
40 exit(0);
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
41 #----------------------------------------------------------------------
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
42 # Option setting routines
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
43
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
44 sub setOptions {
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
45 use Getopt::Long;
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
46
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
47 @Options = (
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
48 {OPT=>"h|help", VAR=>\&usage, DESC=>"This help"},
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
49 {OPT=>"verbose!", VAR=>\$verbose, DEFAULT=>0, DESC=>"Verbose"},
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
50 {OPT=>"v|inverse!", VAR=>\$inverse, DEFAULT=>0, DESC=>"Output NON-matching sequences instead"},
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
51 {OPT=>"f|file=s", VAR=>\$file, DEFAULT=>"", DESC=>"The fasta file to extract sequences from"},
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
52 {OPT=>"exact", VAR=>\$exact, DEFAULT=>"", DESC=>"Exact matches for display id only"},
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
53 {OPT=>"l|list=s", VAR=>\$list, DEFAULT=>"", DESC=>"List of pattern to look from"},
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
54 );
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
55
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
56 (!@ARGV) && (usage());
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
57
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
58 &GetOptions(map {$_->{OPT}, $_->{VAR}} @Options) || usage();
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
59
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
60 # Now setup default values.
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
61 foreach (@Options) {
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
62 if (defined($_->{DEFAULT}) && !defined(${$_->{VAR}})) {
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
63 ${$_->{VAR}} = $_->{DEFAULT};
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
64 }
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
65 }
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
66 }
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
67
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
68 sub usage {
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
69 print "Usage: $0 [options] id1 [id2 ...] < input.fasta > output.fasta\n";
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
70 foreach (@Options) {
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
71 printf " --%-13s %s%s.\n",$_->{OPT},$_->{DESC},
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
72 defined($_->{DEFAULT}) ? " (default '$_->{DEFAULT}')" : "";
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
73 }
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
74 exit(1);
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
75 }
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
76
75e70a6d8d60 Uploaded
nml
parents:
diff changeset
77 #----------------------------------------------------------------------