Mercurial > repos > iuc > sra_tools
view fastq_dump.xml @ 3:4732693f4661 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/sra-tools commit f30369bef4e2cef17395cdd62ce81768c93f5edd
author | iuc |
---|---|
date | Tue, 18 Oct 2016 14:54:40 -0400 |
parents | f256cb398262 |
children | 62e4d56ebb6f |
line wrap: on
line source
<tool id="fastq_dump" name="Extract reads" version="@VERSION@"> <description>in FASTQ/A format from NCBI SRA.</description> <macros> <import>sra_macros.xml</import> </macros> <expand macro="requirements"/> <stdio> <exit_code range="127" level="fatal" description="Could not locate fastq-dump binary"/> </stdio> <version_command>fastq-dump --version</version_command> <command> <![CDATA[ #if $input.input_select=="file_list": for acc in `cat $input.file_list` ; do #elif $input.input_select=="accession_number": acc="$input.accession" && #end if #if $input.input_select=="file_list" or $input.input_select=="accession_number": [ ""\$acc" =~ ^[E|S|D]RR[0-9]{1,}$" ] && ( #end if ## Need to set the home directory to the current working directory, ## else the tool tries to write to home/.ncbi and fails when used ## with a cluster manager. export HOME=\$PWD && vdb-config --restore-defaults && #if $input.input_select == "file": fastq-dump --log-level fatal --accession '${input.file.name}' #else: vdb-config -s "/repository/user/main/public/root=\$PWD" && ## Do not use prefetch if region is specified, to avoid downloading ## the complete sra file. #if ( str( $adv.region ) == "" ) and ( str( $adv.minID ) == "" ) and ( str( $adv.maxID ) == "" ): ASCP_PATH=`command -v ascp` && ASCP_KEY=`dirname \$ASCP_PATH`/asperaweb_id_dsa.openssh || true && prefetch -X 200G --ascp-path "\$ASCP_PATH|\$ASCP_KEY" "\$acc" && ## Duplicate vdb-config, in case settings changed between prefetch and ## dump command. vdb-config -s "/repository/user/main/public/root=\$PWD" && #end if fastq-dump --accession "\$acc" --split-files #end if --defline-seq '@\$sn[_\$rn]/\$ri' $adv.split #if str( $adv.alignments ) == "aligned": --aligned #end if #if str( $adv.alignments ) == "unaligned": --unaligned #end if #if str( $adv.minID ) != "": --minSpotId "$adv.minID" #end if #if str( $adv.maxID ) != "": --maxSpotId "$adv.maxID" #end if #if str( $adv.minlen ) != "": --minReadLen "$adv.minlen" #end if #if str( $adv.readfilter ) != "": --read-filter "$adv.readfilter" #end if #if str( $adv.region ) != "": --aligned-region "$adv.region" #end if #if str( $adv.spotgroups ) != "": --spot-groups "$adv.spotgroups" #end if #if str( $adv.matepairDist ) != "": --matepair-distance "$adv.matepairDist" #end if $adv.clip $adv.skip_technical #if str( $outputformat ) == "fasta": --fasta #end if #if $input.input_select=="file": --stdout "$input.file" > "$output_file" #elif $input.input_select=="file_list": "\$acc" #else: --stdout "\$acc" > "$output_accession" ) #end if #if $input.input_select=="file_list": ) ; done ; #if str( $outputformat ) == "fasta": for f in *_2.fasta ; do mv "\$f" "`basename \$f _2.fasta`_reverse.fasta" ; mv "`basename \$f _2.fasta`_1.fasta" "`basename \$f _2.fasta`_forward.fasta" ; done && for f in *_1.fasta; do mv "\$f" "`basename \$f _1.fasta`__single.fasta"; done #else: for f in *_2.fastq ; do mv "\$f" "`basename \$f _2.fastq`_reverse.fastq" ; mv "`basename \$f _2.fastq`_1.fastq" "`basename \$f _2.fastq`_forward.fastq" ; done && for f in *_1.fastq; do mv "\$f" "`basename \$f _1.fastq`__single.fastq"; done #end if #end if ]]> </command> <inputs> <expand macro="input_conditional"/> <param name="outputformat" type="select" label="select output format"> <option value="fastqsanger">fastq</option> <option value="fasta">fasta</option> </param> <section name="adv" title="Advanced Options" expanded="False"> <param name="minID" type="integer" label="minimum spot ID" optional="true"/> <param name="maxID" type="integer" label="maximum spot ID" optional="true"/> <param name="minlen" type="integer" label="minimum read length" optional="true"/> <param name="split" type="boolean" checked="true" truevalue="--split-spot" falsevalue=""> <label>split spot by read pairs</label> </param> <expand macro="alignments"/> <expand macro="region"/> <expand macro="matepairDist"/> <param name="readfilter" type="select" value=""> <label>filter by value</label> <option value="">None</option> <option value="pass">pass</option> <option value="reject">reject</option> <option value="criteria">criteria</option> <option value="redacted">redacted</option> </param> <param name="spotgroups" type="text" label="filter by spot-groups" optional="true"/> <param name="clip" type="boolean" truevalue="--clip" falsevalue=""> <label>apply left and right clips</label> </param> <param name="skip_technical" type="boolean" truevalue="--skip-technical" falsevalue="" checked="False" label="Dump only biological reads"/> </section> </inputs> <outputs> <collection name="list_paired" type="list:paired" label="Pair-end Fast(q|a)"> <filter>input['input_select'] == "file_list"</filter> <!-- Use named regex group to grab pattern <identifier_0>_<identifier_1>.fq. Here identifier_0 is the list identifier in the nested collection and identifier_1 is either forward or reverse (for instance samp1_forward.fq). --> <discover_datasets pattern="(?P<identifier_0>[^_]+)_(?P<identifier_1>[^_]+)\.fastq" ext="fastqsanger" visible="false" /> <discover_datasets pattern="(?P<identifier_0>[^_]+)_(?P<identifier_1>[^_]+)\.fasta" ext="fasta" visible="false" /> </collection> <collection name="output_collection" type='list' label="Single-end Fast(q|a)"> <filter>input['input_select'] == "file_list"</filter> <discover_datasets pattern="(?P<designation>.+)__single\.fastq" directory="." ext='fastqsanger'/> <discover_datasets pattern="(?P<designation>.+)__single\.fasta" directory="." ext='fasta'/> </collection> <data format="fastqsanger" name="output_accession" > <filter>input['input_select'] == "accession_number"</filter> <change_format> <when input="outputformat" value="fasta" format="fasta"/> </change_format> </data> <data format="fastqsanger" name="output_file" label="${input.file.name}.${outputformat}"> <filter>input['input_select'] == "file"</filter> <change_format> <when input="outputformat" value="fasta" format="fasta"/> </change_format> </data> </outputs> <tests> <test> <param name="input_select" value="accession_number"/> <param name="outputformat" value="fastqsanger"/> <param name="accession" value="SRR044777"/> <param name="skip_technical" value="True"/> <output name="output_accession"> <assert_contents> <not_has_text text="rRNA_primer"/> <has_text text="F47USSH02GNP1D" /> </assert_contents> </output> </test> <test> <param name="input_select" value="accession_number"/> <param name="outputformat" value="fastqsanger"/> <param name="accession" value="SRR925743"/> <param name="maxID" value="5"/> <output name="output_accession" file="fastq_dump_result.fastq" ftype="fastqsanger"/> </test> </tests> <help> This tool extracts reads from SRA archives using fastq-dump. The fastq-dump program is developed at NCBI, and is available at http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software. NB: Single-end or pair-end collections may be empty if given SRRs LibraryLayout contains only either SINGLE or PAIRED respectively @SRATOOLS_ATTRRIBUTION@ </help> <expand macro="citation"/> </tool>