annotate fa-extract-few.pl @ 1:5dfc014a8b3a draft default tip

Uploaded
author simon-gladman
date Tue, 25 Jun 2013 01:48:56 -0400
parents bcb73a63ffee
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
1 #!/usr/bin/perl -w
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
2 use strict;
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
3 use Bio::SeqIO;
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
4
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
5 my(@Options, $verbose, $inverse, $file);
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
6 setOptions();
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
7
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
8 my $in = Bio::SeqIO->new(-file=>$file, -format=>'Fasta');
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
9 my $out = Bio::SeqIO->new(-fh=>\*STDOUT, -format=>'Fasta');
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
10 my $nread=0;
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
11 my $nwrote=0;
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
12
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
13 my $pattern = join('|', @ARGV);
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
14
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
15 while (my $seq = $in->next_seq) {
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
16 $nread++;
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
17 my $match = ($seq->description =~ m/($pattern)/ or $seq->display_id =~ m/($pattern)/);
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
18 #print STDERR "Found match: ",$seq->display_id, " ", $seq->description, "\n" if $verbose;
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
19 if ($match ^ $inverse) { # rare use for XOR !
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
20 $out->write_seq($seq);
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
21 $nwrote++;
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
22 }
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
23 }
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
24
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
25 #print STDERR "Read $nread sequences, wrote $nwrote, with pattern: $pattern\n";
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
26 exit(0);
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
27 #----------------------------------------------------------------------
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
28 # Option setting routines
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
29
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
30 sub setOptions {
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
31 use Getopt::Long;
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
32
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
33 @Options = (
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
34 {OPT=>"h|help", VAR=>\&usage, DESC=>"This help"},
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
35 {OPT=>"verbose!", VAR=>\$verbose, DEFAULT=>0, DESC=>"Verbose"},
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
36 {OPT=>"v|inverse!", VAR=>\$inverse, DEFAULT=>0, DESC=>"Output NON-matching sequences instead"},
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
37 {OPT=>"f|file=s", VAR=>\$file, DEFAULT=>"", DESC=>"The fasta file to extract sequences from"},
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
38 );
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
39
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
40 (!@ARGV) && (usage());
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
41
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
42 &GetOptions(map {$_->{OPT}, $_->{VAR}} @Options) || usage();
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
43
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
44 # Now setup default values.
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
45 foreach (@Options) {
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
46 if (defined($_->{DEFAULT}) && !defined(${$_->{VAR}})) {
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
47 ${$_->{VAR}} = $_->{DEFAULT};
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
48 }
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
49 }
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
50 }
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
51
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
52 sub usage {
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
53 print "Usage: $0 [options] id1 [id2 ...] < input.fasta > output.fasta\n";
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
54 foreach (@Options) {
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
55 printf " --%-13s %s%s.\n",$_->{OPT},$_->{DESC},
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
56 defined($_->{DEFAULT}) ? " (default '$_->{DEFAULT}')" : "";
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
57 }
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
58 exit(1);
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
59 }
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
60
bcb73a63ffee Uploaded
simon-gladman
parents:
diff changeset
61 #----------------------------------------------------------------------