Mercurial > repos > iuc > sra_tools
changeset 6:30775c836c77 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/sra-tools commit ee50324111351323cc294e051a6fab1733a89ec1
author | iuc |
---|---|
date | Wed, 22 Mar 2017 05:23:31 -0400 |
parents | 26b7446bb17e |
children | c7620aa7e1f0 |
files | fastq_dump.xml test-data/DRR015708_forward.fastqsanger test-data/DRR015708_reverse.fastqsanger test-data/ERR027433_forward.fastqsanger test-data/ERR027433_reverse.fastqsanger test-data/SRR1993644.fastqsanger test-data/list_pe test-data/list_pe2 test-data/list_se |
diffstat | 9 files changed, 155 insertions(+), 16 deletions(-) [+] |
line wrap: on
line diff
--- a/fastq_dump.xml Wed Dec 07 09:10:29 2016 -0500 +++ b/fastq_dump.xml Wed Mar 22 05:23:31 2017 -0400 @@ -1,4 +1,4 @@ -<tool id="fastq_dump" name="Extract reads" version="@VERSION@"> +<tool id="fastq_dump" name="Extract reads" version="@VERSION@.1"> <description>in FASTQ/A format from NCBI SRA.</description> <macros> <import>sra_macros.xml</import> @@ -93,20 +93,19 @@ ; - #if str( $outputformat ) == "fasta": - - for f in *_2.fasta ; do mv "\$f" "`basename \$f _2.fasta`_reverse.fasta" ; mv "`basename \$f _2.fasta`_1.fasta" "`basename \$f _2.fasta`_forward.fasta" ; done && - for f in *_1.fasta; do mv "\$f" "`basename \$f _1.fasta`__single.fasta"; done - - #else: - - for f in *_2.fastq ; do mv "\$f" "`basename \$f _2.fastq`_reverse.fastq" ; mv "`basename \$f _2.fastq`_1.fastq" "`basename \$f _2.fastq`_forward.fastq" ; done && - for f in *_1.fastq; do mv "\$f" "`basename \$f _1.fastq`__single.fastq"; done - - #end if + for i in `ls *.fast* | cut -f 1 -d '_' | uniq` ; do + count=`ls \$i* | wc -l` ; + data=(\$(ls -d \$i*)); + + if [ "\$count" -eq 2 ]; then + mv "\${data[0]}" "\${data[0]}"_forward.$outputformat; mv "\${data[1]}" "\${data[1]}"_reverse.$outputformat ; + elif [ "\$count" -eq 1 ]; then + mv "\${data[0]}" "\${data[0]}"__single.$outputformat ; + fi; + done #end if @@ -153,13 +152,13 @@ identifier in the nested collection and identifier_1 is either forward or reverse (for instance samp1_forward.fq). --> - <discover_datasets pattern="(?P<identifier_0>[^_]+)_(?P<identifier_1>[^_]+)\.fastq" ext="fastqsanger" visible="false" /> - <discover_datasets pattern="(?P<identifier_0>[^_]+)_(?P<identifier_1>[^_]+)\.fasta" ext="fasta" visible="false" /> + <discover_datasets pattern="(?P<identifier_0>[^_]+)_\d+.fastq_(?P<identifier_1>[^_]+)\.fastq" ext="fastqsanger" visible="false" /> + <discover_datasets pattern="(?P<identifier_0>[^_]+)_\d+.fasta_(?P<identifier_1>[^_]+)\.fasta" ext="fasta" visible="false" /> </collection> <collection name="output_collection" type='list' label="Single-end Fast(q|a)"> <filter>input['input_select'] == "file_list"</filter> - <discover_datasets pattern="(?P<designation>.+)__single\.fastq" directory="." ext='fastqsanger'/> - <discover_datasets pattern="(?P<designation>.+)__single\.fasta" directory="." ext='fasta'/> + <discover_datasets pattern="(?P<designation>.+)_\d+.fastq__single\.fastq" directory="." ext='fastqsanger'/> + <discover_datasets pattern="(?P<designation>.+)_\d+.fasta__single\.fasta" directory="." ext='fasta'/> </collection> <data format="fastqsanger" name="output_accession" > <filter>input['input_select'] == "accession_number"</filter> @@ -194,6 +193,43 @@ <param name="maxID" value="5"/> <output name="output_accession" file="fastq_dump_result.fastq" ftype="fastqsanger"/> </test> + <test> + <param name="input_select" value="file_list"/> + <param name="outputformat" value="fastqsanger"/> + <param name="file_list" value="list_pe"/> + <param name="maxID" value="5"/> + <output_collection name="list_paired" type="list:paired"> + <element name="DRR015708"> + <element name="forward" file="DRR015708_forward.fastqsanger"> + </element> + <element name="reverse" file="DRR015708_reverse.fastqsanger"> + </element> + </element> + </output_collection> + </test> + <test> + <param name="input_select" value="file_list"/> + <param name="outputformat" value="fastqsanger"/> + <param name="file_list" value="list_pe2"/> + <param name="maxID" value="5"/> + <output_collection name="list_paired" type="list:paired"> + <element name="ERR027433"> + <element name="forward" file="ERR027433_forward.fastqsanger"> + </element> + <element name="reverse" file="ERR027433_reverse.fastqsanger"> + </element> + </element> + </output_collection> + </test> + <test> + <param name="input_select" value="file_list"/> + <param name="outputformat" value="fastqsanger"/> + <param name="file_list" value="list_se"/> + <param name="maxID" value="5"/> + <output_collection name="output_collection" type="list"> + <element name="SRR1993644" file="SRR1993644.fastqsanger"/> + </output_collection> + </test> </tests> <help> This tool extracts reads from SRA archives using fastq-dump.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/DRR015708_forward.fastqsanger Wed Mar 22 05:23:31 2017 -0400 @@ -0,0 +1,20 @@ +@HWI-ST895_0148_AC1GFWACXX:5:1101:1258:1938/1 +NTATTGTAGCGCACTATGAGGTCGCTCATTANTNTACATCNNCATCCAAATTTCAGCATNANTTTNNNNNTGTTTGATATTCTCAAAGNGATNAACGTTT ++DRR015708.1 HWI-ST895_0148_AC1GFWACXX:5:1101:1258:1938 length=100 +#1=DDFFFHGHHHJIIIIGIJFHIJJJJJJI#0#08DFII##-7@FHIIJJJJGEHEEE#,#,,;#####,,;?DCCDEFEEFCDDDD#+2+#+2<?<CC +@HWI-ST895_0148_AC1GFWACXX:5:1101:1667:1930/1 +NTTCTAAACTCATTCCAGAAGTAATTGATGCGTCACCAATAATTGCAATATGTTTTCTATTGATTTGATTGATTTTATCTGCTGTTGCCATCCCAACAAT ++DRR015708.2 HWI-ST895_0148_AC1GFWACXX:5:1101:1667:1930 length=100 +#4=DDFFFHHHHHJJJJJJIIHIJJJJJJJJIHJJJJJIJJJJJJJJJJJJJIJJJJJJJJJJJJJJIIIICHHGEHHGIFEHHHHFFDFFFDDCEB@AC +@HWI-ST895_0148_AC1GFWACXX:5:1101:1833:1936/1 +NTCACCTACAACTCGAATATTCGAATGATATGTGATATTGTTAGATGATTTTGGCATGCTTGCAGAAAGTGCATAAACACCTGTATTGATTCCCGAATTT ++DRR015708.3 HWI-ST895_0148_AC1GFWACXX:5:1101:1833:1936 length=100 +#4=DFFFFHHHHHIJIJJIHIIJIIJJJJIJIHIIGGIIJHIEHGGHGHIIJJJIIJJJJJHHCHGGGE@FHGHIIJIEHHEDCCDCCDEEED@AB/=A# +@HWI-ST895_0148_AC1GFWACXX:5:1101:2003:1864/1 +NCTTAAAACTTATTAAATGAATCAATTAGATAAAAGACTTTTTTTGTTAGATGCTTATGCCTTAATTTTTAGAGGATATTATGCATTTATCAAAAATCCA ++DRR015708.4 HWI-ST895_0148_AC1GFWACXX:5:1101:2003:1864 length=100 +#4=DFFFFHHHHHJJJIJJJJJJJJJJJJJJJJIHGDGHIJJJJJJGHHJGIJIJJJJJJJIJIJJJJJHFFHGFDDFEEFEECCEEDEEDDCC3<A5>C +@HWI-ST895_0148_AC1GFWACXX:5:1101:2399:1887/1 +NTAAAGGTTATTGTGCTTCACAGAAAATGCATTATTATGGGTATAAACTTCATGCGGTTTGCTCAGCGGAAGGTGTCTTTCAAAGTTTGGATATTAGTCC ++DRR015708.5 HWI-ST895_0148_AC1GFWACXX:5:1101:2399:1887 length=100 +#1=DDFFEHHHHHGIJJJJGIJJJJJJJJJJJJIJJJJJJJCFGFIIHJIJHIJJJJHIJJJJIJIJIFHBDE;@ACEEEDDCCCCCDDDCCDCCFECCC
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/DRR015708_reverse.fastqsanger Wed Mar 22 05:23:31 2017 -0400 @@ -0,0 +1,20 @@ +@HWI-ST895_0148_AC1GFWACXX:5:1101:1258:1938/2 +CATTANNAGNNANNNTTNNNNGANNNNNTCGANNNNNANTTTTAANCGNNNNNCTCTTNGAGAATATCAAANAGNTNAAAAATTANGCTGANNNTNGNAT ++DRR015708.1 HWI-ST895_0148_AC1GFWACXX:5:1101:1258:1938 length=100 +CCCFF##4<##2###32####22#####11?F#####0#07CHGI#.;#####--;BB#,;?ADDDEEDDD#,8#,#,58<?BD:#+2?@?######### +@HWI-ST895_0148_AC1GFWACXX:5:1101:1667:1930/2 +GAAAGATGGTATTTCTGGTTTTCCAACGCGTACAGAAAGTGAGTTTGATACATTTGGAACGGGACATTCTTCTACATCTATTTCAGCAATTGTTGGGATG ++DRR015708.2 HWI-ST895_0148_AC1GFWACXX:5:1101:1667:1930 length=100 +BBCFFDFFHCFHHJJJJJIJJJJJJJIJJJJJJJIHGIIDHGIFIIJIIGHIIHIIJDHIIJH=DFFFFFEEEEEDEDDDEDEDDDDDCDDDCDDDBBDB +@HWI-ST895_0148_AC1GFWACXX:5:1101:1833:1936/2 +AAATATAATTGGAGAATCAACTTTATTTTCGAATACAATTCCAAATCAAAAAGAAGATAAAACACTAGAATTATCTCAAAAGAATTCAACTCAAAAAGAT ++DRR015708.3 HWI-ST895_0148_AC1GFWACXX:5:1101:1833:1936 length=100 +CCCFFFFFHHHHGJIJIJJIJJJJJJJJJJJIJJJJJJJJJJJJEIJJJJHIJJIJJJIJJJIJJIJJJIJIJJJHHHGEHFFDFF>C@C>CEC;A538: +@HWI-ST895_0148_AC1GFWACXX:5:1101:2003:1864/2 +ATTGAAACCTTTTGAGTTGATTCGTGGATTTTTGATAAATGCATAATATCCTCTAAAAATTAAGGCATAAGCATCTAACAAAAAAAGTCTTTTATCTAAT ++DRR015708.4 HWI-ST895_0148_AC1GFWACXX:5:1101:2003:1864 length=100 +CCCFFFFFHHHHHJEGGHIHIIJJHHJHIJIJJJGJIJJJJJJJJJJJJJIIIJIJJIJIJJIJJJJIGHEHIIHHHHFHFFEDDDDCDEEDDDDDDEDD +@HWI-ST895_0148_AC1GFWACXX:5:1101:2399:1887/2 +TTGGTATTCTGCGGATAAATATCCTTTATCAGCTAAGAGCGTACAATTTTTAAACTGCTGTTTTATATCTTTCAGATAATGAATGTCATGCACTGAAGCT ++DRR015708.5 HWI-ST895_0148_AC1GFWACXX:5:1101:2399:1887 length=100 +BCCFBDFFHHHHHJJJJJJJJJJJJJJJJJJJJJIJIFIJJHHIIIJJJJGHJJJJJJJJHIIJJIJJHHHHHHGEFFFFFFCDCEEEFDDDDCDD>CDD
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ERR027433_forward.fastqsanger Wed Mar 22 05:23:31 2017 -0400 @@ -0,0 +1,20 @@ +@IL4_4847:5:1:1051:7109/1 +GCCGCCAACGTCCGACGGCGCGTCCCGCACGACTTGAACCGATCTCACCGAGACAGAACTAT ++ERR027433.1 IL4_4847:5:1:1051:7109 length=62 +C@CC@@@C2BBCBB:>@8@@5@>>@@@@@>@97@@==<@>>@4<'@8>:35=%&B####### +@IL4_4847:5:1:1058:16093/1 +AAGGTCGGGCATTCGTTCGAGCCGACGACCGCGAGGAAGCGGTTCGGCCGGGCGTAGAATCC ++ERR027433.2 IL4_4847:5:1:1058:16093 length=62 +?ABBABB;<@>@=<?@CBBB:@2>BCBB,,?):7@/3$54'818->1-+=+()$6--C8+?1 +@IL4_4847:5:1:1111:15034/1 +TCGATCCGCAGGGCAATCTGTGGATCCCGGCGTTCGACGACGGCGGGCGGGTGGCTCGGCAT ++ERR027433.3 IL4_4847:5:1:1111:15034 length=62 +2524;@@@/4<@6@;9?C@@93@>BAB2>>C@>B>C4BC4@-+B<@525++%=)######## +@IL4_4847:5:1:1122:6048/1 +ATCTCCTCGACGCGCACCAGATAGCGGTCGTAGCAATCGCCGTCCACGCCGACGGGCCCACT ++ERR027433.4 IL4_4847:5:1:1122:6048 length=62 +BBB@@@@@@@AB.<@2)7;B-@==:@>@@@A7@<AA4:A3'@9+4;&00'9+AC5--DA5,? +@IL4_4847:5:1:1138:20693/1 +CTGCTCGCGCGAGAACGCGCGCAAGCTCGGGGGCGGTGGCGGGGTCGGGCGACACCGATGTA ++ERR027433.5 IL4_4847:5:1:1138:20693 length=62 +CB<C7@>);9@@@@=1B@@@@2@=*:4@@@0%(-$,'3%%'%$%$,44%3&$=$C;CC-C##
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ERR027433_reverse.fastqsanger Wed Mar 22 05:23:31 2017 -0400 @@ -0,0 +1,20 @@ +@IL4_4847:5:1:1051:7109/3 +AACGCACTGCGCGCGGACTCGCCCCCGCGGCCACTCCGTGGGCCGGCGTACGGA ++ERR027433.1 IL4_4847:5:1:1051:7109 length=54 +)(664;(>>/:1<+/'())0)',8%'*7'+9904)'(+()'''':+4+'.7)'( +@IL4_4847:5:1:1058:16093/3 +GCATAGCTGCCCTCGCCTTCTACTCGCACCGAACAAGCGGCCCGCCTACGCGGG ++ERR027433.2 IL4_4847:5:1:1058:16093 length=54 +;=>1(/2(:/=@(:'9/<(*'&.(?.6)(';+.5''-3.&''7(96%0:+($2$ +@IL4_4847:5:1:1111:15034/3 +GAGTGGAGCCGCGGACGGGAGCGCGGCGGCTGCCTCACAGCACCCGGGGGGTCG ++ERR027433.3 IL4_4847:5:1:1111:15034 length=54 +::3+:2655(*('((3*&&&4+')6'3/2,+++*.+')-/)4((<-+&&'&%<( +@IL4_4847:5:1:1122:6048/3 +GAGGTCGGCGTTCTGGATGACAGGCGCGAAGCCCCCGCCGTCGGTGCCTTCACG ++ERR027433.4 IL4_4847:5:1:1122:6048 length=54 +=8+(-(,(3.5*=99+;).)8'(,(/(+(-6@'-3<6&,%/4++)')1/)>(.& +@IL4_4847:5:1:1138:20693/3 +TGGACGTTGTGGTGGTTGTAAGAGATTTCGCTCCCCATGTTGGCGAGCTGCGAT ++ERR027433.5 IL4_4847:5:1:1138:20693 length=54 +;)?5;=9996@((((097:41=,A((+)5>9,:''''67+9)=(968-(8;8)3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/SRR1993644.fastqsanger Wed Mar 22 05:23:31 2017 -0400 @@ -0,0 +1,20 @@ +@1/2 +TATACTACTGTCATGTTTGCTTTTTTCGTGCTCATTACCTTATCGTATGCTTCCATCCAAAGATCTAGTTGTTTATAATATTCTCCCGGCCTTACTTCAAC ++SRR1993644.1 1 length=101 +?@<DDDADDFAAC@>GG<FDCBHIIGB:8??DEAHGAFEDF<B?F9DBFFCDHF>48BBCF)8877=@=@C7=E??EBD:AADA;>C3;BB6:A@>>@:3> +@2/2 +AAAAAAACTTTCTTTACAGGCGTAAAGAAAGTGAAATTGACAGTATTTATACATGAAATAGCAATGTCTTTCCCACTTCCCTACGCTGGCATTAACCAGAT ++SRR1993644.2 2 length=101 +<@@DA6DDF42ABGF9F?F@C<EDDDFBGI>04BGC>BFF><?*88BDFDEAFFDGCGEFEEFIFFFEF>EBDBB@@:ACCCAB8@?=;B<@BABBBBB?# +@3/2 +GTGCCATCATTTTCTATCCATTATTATGGATTATTGGCTCATCGTTTAATCCGGGTGATAGTTTATCTGGATCAAGTATTATTCCACAAAATGCAACGTTA ++SRR1993644.3 3 length=101 +=BBFFFFFHHHHHJJJJJJJJJJJJIJIJEIIGIJJJHGIGGIIGHIDIJGHIIJ?FFHGIIJJJJJJJJHGIHHHCEHFFFFFFFFEAECCDADDDDDDD +@4/2 +TTCAAAACACATAAAGCTAATTGCCGCATATGACAATATTGCTAAAATAATTTTTTTACCAGATATCGGTGTTAATCGAAATAATGTACTTTCGGTCATTT ++SRR1993644.4 4 length=101 +BBCFFFFFHHHHHJJJJJJJJJJJJJJJJJJJJJJJIJJJJJJIJJJJJJIIJJJJJJJIJJHHHHHFFDDCDEEEDDDDDDDDEDDDFFFEDDDDDDDDE +@5/2 +ACTTGCCAATGCGATGCACCAATCTTTTCAGCAATAATCGGCAAAATTGGGTCGACTACTCCTATACCTGAAAAGGCAAGGAAAGTAGCCAACACTGTAAT ++SRR1993644.5 5 length=101 +BCCFFFFFHHHHHIJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJIHIIJIJHHHHFFFFFFEEEEEEDDDDDDDDDDDDDCDDDDDDDDDDDDDED
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/list_pe Wed Mar 22 05:23:31 2017 -0400 @@ -0,0 +1,1 @@ +DRR015708