changeset 27:8be4f23d1018

Add bam-dump and test data.
author Matt Shirley <mdshw5@gmail.com>
date Mon, 30 Mar 2015 22:40:59 -0400
parents 7d80b2b24270
children fdc981664a43
files bam_dump.xml fastq_dump.xml test-data/fastq_dump_result.fastq
diffstat 3 files changed, 176 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bam_dump.xml	Mon Mar 30 22:40:59 2015 -0400
@@ -0,0 +1,88 @@
+<tool id="bam_dump" name="Extract reads" version="1.1.2">
+  <description> in BAM format from NCBI SRA.</description>
+  <command>
+    sam-dump --log-level fatal
+    #if str( $region ) != "":
+      --aligned-region $region
+    #end if
+    #if str( $matepairDist ) != "":
+      --matepair-distance $matepairDist
+    #end if
+    #if str( $minMapq ) != "":
+      --minmapq $minMapq
+    #end if
+    #if $header == "yes":
+      --header
+    #else:
+      --no-header
+    #end if
+    #if str( $alignments ) == "both":
+      --unaligned
+    #end if
+    #if str( $alignments ) == "unaligned":
+      --unaligned-spots-only
+    #end if
+    #if (str( $primary ) == "yes") and (str ( $alignments != "unaligned") ):
+      --primary
+    #end if
+    #if $input.input_select == "file":
+      $input.file
+    #elif $input.input_select == "accession_number":
+      $input.accession
+    #elif $input.input_select == "text":
+      `cat $input.text`
+    #end if
+    | samtools view -Sb - > $output
+  </command>
+  <version_string>sam-dump --version</version_string>
+  <inputs>
+    <conditional name="input">
+      <param name="input_select" type="select" label="select input type">
+        <option value="accession_number">SRR accession</option>
+        <option value="file">SRA archive in current history</option>
+        <option value="text">text file containing SRR accession</option>
+      </param>
+      <when value="file">
+        <param format="sra" name="file" type="data" label="sra archive"/>
+      </when>
+      <when value="accession_number">
+        <param format="text" name="accession" type="text" label="accession"/>
+      </when>
+      <when value="text">
+        <param format="txt" name="text" type="data" label="text file"/>
+      </when>
+    </conditional>
+    <param format="text" name="region" type="text" label="aligned region"/>
+    <param format="text" name="matepairDist" type="text" label="mate-pair distance (from-to|unknown)"/>
+    <param format="text" name="header" type="select" value="yes">
+      <label>output BAM header</label>
+      <option value="yes">Yes</option>
+      <option value="no">No</option>
+    </param>
+    <param format="text" name="alignments" type="select" value="both">
+      <label>aligned or unaligned reads</label>
+      <option value="both">both</option>
+      <option value="aligned">aligned only</option>
+        <option value="unaligned">unaligned only</option>
+    </param>
+    <param format="text" name="primary" type="select" value="no">
+      <label>only primary aligments</label>
+      <option value="no">No</option>
+      <option value="yes">Yes</option>
+    </param>
+    <param format="text" name="minMapq" type="text" label="minimum mapping quality"/>
+  </inputs>
+  <outputs>
+    <data name="output" format="bam"/>
+  </outputs>
+  <requirements>
+    <requirement type="package" version="2.4.5">sra_toolkit</requirement>
+    <requirement type="package" version="1.2">samtools</requirement>
+  </requirements>
+  <help>
+    This tool extracts reads from sra archives using sam-dump.
+    Browse the NCBI SRA for SRR accessions at http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=studies.
+    The sam-dump program is developed at NCBI, and is available at: http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software.
+    Contact Matt Shirley at mdshw5@gmail.com for support and bug reports.
+  </help>
+</tool>
--- a/fastq_dump.xml	Mon Mar 30 22:11:38 2015 -0400
+++ b/fastq_dump.xml	Mon Mar 30 22:40:59 2015 -0400
@@ -114,6 +114,14 @@
   <requirements>
     <requirement type="package" version="2.4.5">sra_toolkit</requirement>
   </requirements>
+  <tests>
+    <test>
+      <param name="input_select" value="accession_number"/>
+      <param name="accession" value="SRR000001"/>
+      <param name="maxID" value="5"/>
+      <output name="output" file="fastq_dump_result.fastq" ftype="fastq" />
+    </test>
+  </tests>
   <help>
     This tool extracts reads from SRA archives using fastq-dump.
     Browse the NCBI SRA for SRR accessions at http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=studies.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fastq_dump_result.fastq	Mon Mar 30 22:40:59 2015 -0400
@@ -0,0 +1,80 @@
+@EM7LVYS01C1LWG_Adapter/1
+TCAG
++SRR000001.1 EM7LVYS01C1LWG length=4
+=;8G
+@EM7LVYS01C1LWG_Mate1/2
+GGGGGAGCTTAAATTTGAAACTAGAAAAATTTTGAACAAAATAATCATAATTGTTAGCTGATGAAAAACTAGAAAAGATTTTCTGAGT
++SRR000001.1 EM7LVYS01C1LWG length=88
+C91*#==<C=EA.EA/<B=(<<:=HC90'FB5&;B:<GC6(=D=<<==C=C==B<=<<<=;<<GC8.#<<9=FB4%<8EA4%87:<<8
+@EM7LVYS01C1LWG_Linker/3
+GTTGGAACCGAAAGGGTTTGAATTCAAACCCTTTCGGTTCCAAC
++SRR000001.1 EM7LVYS01C1LWG length=44
+=B;C<@8>5=C?*A<&A<&<=49/2A='@;#A<&<A9C=@9B::
+@EM7LVYS01C1LWG_Mate2/4
+GGTATCCCGTAGTGTGCATTCATCCCTGCTCTGGATACAGTCAGCTCCCAAATTCCATAAACAACTCCTTTGTAAGTAACCTCCTTTTGACAGGGGGTACTGAGCGGGCTGGCAAGGCN
++SRR000001.1 EM7LVYS01C1LWG length=119
+B:<;=C?+<<;<===<=;C<==<FB0=<=<<<D=9=;;=<=<=<;=FB2FB2C<C<;=FB0<C==;C<D@-<=B:<=C=C;<C=GD7*=;:=HD90'==<<=<=:FB0<<C<;C=C=<!
+@EM7LVYS01B2EMP_Adapter/1
+TCAG
++SRR000001.2 EM7LVYS01B2EMP length=4
+=<8F
+@EM7LVYS01B2EMP_Mate1/2
+GGGGGGGTTACACGTGCAGATTTGTTACACGGGTGTACTGTGAGGTTTGGGGTACGAATGATCCCGTTACCTAGATAGTGAGCATGGAACCC
++SRR000001.2 EM7LVYS01B2EMP length=92
+@71-*&#D=<=<=<===<<:FB1=C=;<=<FA/==<<=<===<D=FB0FB4%<<=<B;=;;;FB2=D=<C<=;=<;<==<==<:;D=C=FB1
+@EM7LVYS01B2EMP_Linker/3
+GTTGGAACCGAAAGGGTTTGAATTCAAACCCTTTCGGTTCCAAC
++SRR000001.2 EM7LVYS01B2EMP length=44
+<C<>6=4>5:@<%@;$@<%=<3;1:D@,D@-A='=C=D=B:C<7
+@EM7LVYS01B2EMP_Mate2/4
+AATGTGCAGGGCTCAGGTCAGCATTAGGGTCAGGTTCTTAGGAAAAGAAAGAGCAAAAACAATGAAACACAATACAAAGTAAAGAACACTGAGCGGGCTGGCAAGGCN
++SRR000001.2 EM7LVYS01B2EMP length=108
+9.<===;;EA.<<<<C;;===<;C=<FB1<;=C=C<<C==D=GC6)<?;"<=<=HC9/%;B;==FB0<<<C<=<;FB2<;B=(8D=<:<98<;=EA/=<A9;C;>6=!
+@EM7LVYS01C2YO0_Adapter/1
+TCAG
++SRR000001.3 EM7LVYS01C2YO0 length=4
+<==G
+@EM7LVYS01C2YO0_Mate1/2
+GGGGGGAGATACATCCTTTTCAAGAAGACGTAGAACATTTATTAAAATTGACCACATGCTGAGATACACCGAGAAACTCTCAAATTTGGAAGGACTGAAATCATACAAAGTACGTTTTCTTACTACAATGCAATTAAGTTGGAAATCAAATAGCAAAAATAAAATAAAAACTATTTATAT
++SRR000001.3 EM7LVYS01C2YO0 length=180
+A82-)$6==8:::<B;FB5&=C==C<<<<==<<C;<<FB/:C<EA3#@8;=C;:=<<<===<;<;;=<C<<<;FB0<;<<<EA/FB/C<C=@83;<<FB07=<<<<D@,8=;<=GC6)<B:<<<=;A9<=;C;C<<3:B:C<E@-==EA.=98<HD90&=HD8+4GC8-"2;5FB19<43
+@EM7LVYS01C2YO0_Linker/3
+GTTGGAACCGAAAGGGTTTGAATTCAAACCCTTTCGGTTCCAAC
++SRR000001.3 EM7LVYS01C2YO0 length=44
+9C=D=B;B<<FB1D?+FB0=C<B;<FB1FB1D@,<B<B<C<C=<
+@EM7LVYS01C2YO0_Mate2/4
+TTAAACTTAAAAAAATTTTTTCTAGTTATTTTATTTTGTTTGAAACAGAAATCAACTGAGCGGGCTGGCAAGGCNNNNN
++SRR000001.3 EM7LVYS01C2YO0 length=79
+C=FB1<C<GB82-(#GB92,';(5:9.=FB4%8GC8,:FB0<D?+=<5?;"7:=46868;8D?+:<@8=D==59!!!!!
+@EM7LVYS01CD8YZ_Adapter/1
+TCAG
++SRR000001.4 EM7LVYS01CD8YZ length=4
+=<:<
+@EM7LVYS01CD8YZ_Mate1/2
+ATCATTACAGACGGAACAGAAATAACTCAGGCAAGCCAGCTGCAACGAGAGGCAGGGCCCAGCGACAGCGCCTGGGTGGGCCTGG
++SRR000001.4 EM7LVYS01CD8YZ length=85
+<<<=C=<=<<<=C;?6=<<FB2<B;;<<=A9=?7<B:<<=<<<>6=<=<=A:<=C?*C?*=;=<<<<<==C<<B=(;EA2A9<D@
+@EM7LVYS01CD8YZ_Linker/3
+GTTGGAACCGAAAGGGTTTGAATTCAAACCCTTTCGGTTCCAAC
++SRR000001.4 EM7LVYS01CD8YZ length=44
+,C<@8;2?6<A<&B=(C?+=>6A:8C?+@;$C?*=D=C=B:@8B
+@EM7LVYS01CD8YZ_Mate2/4
+CAGGAGGTCGAGGTTGCAGTGAGCCAAATCATGCTACTGTACAGTCTGGGTGACAGAGTGAGACCCTGTCTGAGCGGGCTGGCAAGGCN
++SRR000001.4 EM7LVYS01CD8YZ length=89
+:=C==>6;=<<A9?7==;=<=<;B;EA2==<<=<=<<8:<=<7;==8D@,<<=<;<<==<=<<E@-=;7<<<<7;C?*6<C<<C<C<<!
+@EM7LVYS01CB53W_Adapter/1
+TCAG
++SRR000001.5 EM7LVYS01CB53W length=4
+<=<H
+@EM7LVYS01CB53W_Mate1/2
+GGGGGCG
++SRR000001.5 EM7LVYS01CB53W length=7
+C92+%8B
+@EM7LVYS01CB53W_Linker/3
+GTTGGAACCGAAAGGGTTTGAATTCAAACCCTTTCGGTTCCAAC
++SRR000001.5 EM7LVYS01CB53W length=44
+:?7A9@7>6=D@-EA.C?*<A9?66D@,B>)B=(<@8D=C=>5=
+@EM7LVYS01CB53W_Mate2/4
+AGCCTCAAGGGCCAAAGAATGGTGTTCTCAAACTGCCATGCCACAGGACAAACTGAGCTCGCTGAGGGGACAGGAGAAGAAACGTGGGGCCATGAGGTCTCCTGAGCGGGCTGGCAAGGC
++SRR000001.5 EM7LVYS01CB53W length=120
+<<A9<<B;EA.C<C>*;C=;C<=9A::<:FB0<<=B;=<;C<<;;D=8<FB0<=<=<<=8;;=:=HD8,=<<A<:<A9;FB0<:=GC6(B;;:<=D=9=<C<=:=3=EA.==D=<@7C;: