Mercurial > repos > nml > fasta_extract

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fa-extract-few.pl	Mon Feb 06 10:27:59 2017 -0500
@@ -0,0 +1,77 @@
+#!/usr/bin/perl -w
+use strict;
+use Bio::SeqIO;
+
+my(@Options, $verbose, $inverse, $file,$list,$exact);
+setOptions();
+
+my $in = Bio::SeqIO->new(-file=>$file, -format=>'Fasta');
+my $out = Bio::SeqIO->new(-fh=>\*STDOUT, -format=>'Fasta');
+my $nread=0;
+my $nwrote=0;
+
+my $pattern = join('|', @ARGV);
+
+if ( $list) {
+    my @list;
+    open my $in,'<',$list;
+    while ( <$in>) {
+        chomp;
+        push @list,$_;
+    }
+    close $in;
+    $pattern = join ('|',@list);
+}
+
+while (my $seq = $in->next_seq) {
+  $nread++;
+  my $match = ($seq->description =~ m/($pattern)/ or $seq->display_id =~ m/($pattern)/);
+  if ($exact) {
+	$match = ($seq->display_id =~ m/^($pattern)$/);
+  }
+  #print STDERR "Found match: ",$seq->display_id, " ", $seq->description, "\n" if $verbose;
+  if ($match ^ $inverse) {  # rare use for XOR !
+    $out->write_seq($seq);
+    $nwrote++;
+  }
+}
+
+#print STDERR "Read $nread sequences, wrote $nwrote, with pattern: $pattern\n";
+exit(0);
+#----------------------------------------------------------------------
+# Option setting routines
+
+sub setOptions {
+  use Getopt::Long;
+
+  @Options = (
+    {OPT=>"h|help",    VAR=>\&usage,             DESC=>"This help"},
+    {OPT=>"verbose!",  VAR=>\$verbose, DEFAULT=>0, DESC=>"Verbose"},
+    {OPT=>"v|inverse!",  VAR=>\$inverse, DEFAULT=>0, DESC=>"Output NON-matching sequences instead"},
+    {OPT=>"f|file=s",   VAR=>\$file, DEFAULT=>"", DESC=>"The fasta file to extract sequences from"},
+    {OPT=>"exact",   VAR=>\$exact, DEFAULT=>"", DESC=>"Exact matches for display id only"},
+    {OPT=>"l|list=s",   VAR=>\$list, DEFAULT=>"", DESC=>"List of pattern to look from"},
+  );
+
+  (!@ARGV) && (usage());
+
+  &GetOptions(map {$_->{OPT}, $_->{VAR}} @Options) || usage();
+
+  # Now setup default values.
+  foreach (@Options) {
+    if (defined($_->{DEFAULT}) && !defined(${$_->{VAR}})) {
+      ${$_->{VAR}} = $_->{DEFAULT};
+    }
+  }
+}
+
+sub usage {
+  print "Usage: $0 [options] id1 [id2 ...] < input.fasta > output.fasta\n";
+  foreach (@Options) {
+    printf "  --%-13s %s%s.\n",$_->{OPT},$_->{DESC},
+           defined($_->{DEFAULT}) ? " (default '$_->{DEFAULT}')" : "";
+  }
+  exit(1);
+}
+
+#----------------------------------------------------------------------
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fa-extract-sequence.xml	Mon Feb 06 10:27:59 2017 -0500
@@ -0,0 +1,76 @@
+<tool id="fa-extract-sequence" name="Fasta Extract Sequence" version="1.0.0">
+    <description>Extract a single sequence from a fasta file.</description>
+     <requirements>
+        <requirement type="package" version="5.18.1">perl</requirement>
+        <requirement type="package" version="1.6.924">bioperl</requirement>
+    </requirements>
+    <command interpreter="perl">
+        fa-extract-few.pl
+        -f $dataset
+	$exact
+        $inverse
+        #if $file_or_type.select == "list"
+          --list $file_or_type.list_file
+        #else
+          $file_or_type.id
+        #end if
+
+        >
+        $output
+    </command>
+    <inputs>
+        <param name="dataset" type="data" format="fasta" label="fasta or multifasta file" help="fasta dataset to get statistics for."/>
+        <param name="exact" type="boolean" truevalue="--exact" label="Exact matches only" help="Will only match exact matches for fasta id"/>
+        <param name="inverse" type="boolean" truevalue="--inverse" label="Entries NOT matching" help="Will return the sequences not matching the given ids"/>
+
+        <conditional name="file_or_type">
+          <param name="select" type="select" label="List file or single pattern">
+            <option value="list">List file</option>
+            <option value="single">Single Pattern</option>
+          </param>
+          <when value="list">
+            <param name="list_file" type="data" format="txt" help="List of pattern to find." label="List file"/>
+          </when>
+          <when value="single">
+            <param name="id" type="text" label="Sequence ID (or partial)" help="Name of the sequence to extract. Will also match partial names and return all matches." />
+          </when>
+        </conditional>
+
+
+    </inputs>
+    <outputs>
+        <data name="output" format="fasta" label="${tool.name} on ${on_string}: Fasta"/>
+    </outputs>
+    <tests>
+      <test>
+        <output/>
+      </test>
+    </tests>
+    <help>
+**Fasta Extract Sequence**
+Extracts a fasta sequence from a multfasta by id (exact or partial)
+
+Latest author:
+Written by Philip Mabon   - Public Health Agency of Canada
+
+Original authors:
+Written by Torsten Seemann - Victorian Bioinformatics Consortium
+
+Wrapped by Simon Gladman - Victorian Bioinformatics Consortium
+
+
+------
+
+Outputs in fasta format.
+
+------
+
+Inputs:
+
+Fasta dataset
+
+Sequence id
+    </help>
+    <citations>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Mon Feb 06 10:27:59 2017 -0500
@@ -0,0 +1,23 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="perl" version="5.18.1">
+        <repository changeset_revision="35f117d7396b" name="package_perl_5_18" owner="iuc" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="bioperl" version="1.6.924">
+        <install version="1.0">
+            <actions>
+                <action type="setup_perl_environment">
+                    <repository changeset_revision="35f117d7396b" name="package_perl_5_18" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu">
+                        <package name="perl" version="5.18.1" />
+                    </repository>
+                   <!-- allow downloading and installing a Perl package from cpan.org-->
+                   <package>XML::Parser</package>
+                   <package>http://search.cpan.org/CPAN/authors/id/C/CJ/CJFIELDS/BioPerl-1.6.924.tar.gz</package>
+                </action>
+            </actions>
+        </install>
+        <readme>
+            Bundle::BioPerl
+        </readme>
+    </package>
+</tool_dependency>