Mercurial > repos > iuc > sra_tools
changeset 1:462ee06c9358 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/sra-tools commit 4defaa3ff1c21e2ec39033bfe63ee69471104ede
author | iuc |
---|---|
date | Tue, 17 May 2016 14:14:50 -0400 |
parents | b723c120161a |
children | f256cb398262 |
files | fastq_dump.xml sam_dump.xml sra_macros.xml tool_dependencies.xml |
diffstat | 4 files changed, 161 insertions(+), 55 deletions(-) [+] |
line wrap: on
line diff
--- a/fastq_dump.xml Sun Dec 06 09:04:37 2015 -0500 +++ b/fastq_dump.xml Tue May 17 14:14:50 2016 -0400 @@ -1,4 +1,4 @@ -<tool id="fastq_dump" name="Extract reads" version="1.2.5"> +<tool id="fastq_dump" name="Extract reads" version="1.3.0"> <description>in FASTQ/A format from NCBI SRA.</description> <macros> <import>sra_macros.xml</import> @@ -10,6 +10,18 @@ <version_command>fastq-dump --version</version_command> <command> <![CDATA[ + + #if $input.input_select=="file_list": + for acc in `cat $input.file_list` ; + do + #elif $input.input_select=="accession_number": + acc="$input.accession" && + #end if + + #if $input.input_select=="file_list" or $input.input_select=="accession_number": + [ ""\$acc" =~ ^[E|S|D]RR[0-9]{1,}$" ] && ( + #end if + ## Need to set the home directory to the current working directory, ## else the tool tries to write to home/.ncbi and fails when used ## with a cluster manager. @@ -24,15 +36,16 @@ #if ( str( $adv.region ) == "" ) and ( str( $adv.minID ) == "" ) and ( str( $adv.maxID ) == "" ): ASCP_PATH=`command -v ascp` && ASCP_KEY=`dirname \$ASCP_PATH`/asperaweb_id_dsa.openssh || true && - prefetch --ascp-path "\$ASCP_PATH|\$ASCP_KEY" $input.accession && + prefetch --ascp-path "\$ASCP_PATH|\$ASCP_KEY" "\$acc" && ## Duplicate vdb-config, in case settings changed between prefetch and ## dump command. vdb-config -s "/repository/user/main/public/root=\$PWD" && - #end if - fastq-dump --accession "$input.accession" + #end if + fastq-dump --accession "\$acc" + --split-files #end if --defline-seq '@\$sn[_\$rn]/\$ri' - --stdout + $adv.split #if str( $adv.alignments ) == "aligned": --aligned @@ -63,14 +76,45 @@ #end if $adv.clip $adv.skip_technical + #if str( $outputformat ) == "fasta": --fasta #end if #if $input.input_select=="file": + --stdout "$input.file" > "$output_file" + #elif $input.input_select=="file_list": + "\$acc" #else: - "$input.accession" > "$output_accession" + --stdout + "\$acc" > "$output_accession" ) #end if + + #if $input.input_select=="file_list": + ) ; done + + ; + + + #if str( $outputformat ) == "fasta": + + for f in *_2.fasta ; do mv "\$f" "`basename \$f _2.fasta`_reverse.fasta" ; mv "`basename \$f _2.fasta`_1.fasta" "`basename \$f _2.fasta`_forward.fasta" ; done && + for f in *_1.fasta; do mv "\$f" "`basename \$f _1.fasta`__single.fasta"; done + + #else: + + for f in *_2.fastq ; do mv "\$f" "`basename \$f _2.fastq`_reverse.fastq" ; mv "`basename \$f _2.fastq`_1.fastq" "`basename \$f _2.fastq`_forward.fastq" ; done && + for f in *_1.fastq; do mv "\$f" "`basename \$f _1.fastq`__single.fastq"; done + + #end if + + + + + + #end if + + ]]> </command> <inputs> @@ -105,45 +149,62 @@ </section> </inputs> <outputs> - <data format="fastq" name="output_accession" label="${input.accession}.${outputformat}"> - <filter>input['input_select'] == "accession_number"</filter> - <change_format> - <when input="outputformat" value="fasta" format="fasta"/> - </change_format> - </data> - <data format="fastq" name="output_file" label="${input.file.name}.${outputformat}"> - <filter>input['input_select'] == "file"</filter> - <change_format> - <when input="outputformat" value="fasta" format="fasta"/> - </change_format> - </data> + <collection name="list_paired" type="list:paired" label="Pair-end Fast(q|a)"> + <filter>input['input_select'] == "file_list"</filter> + <!-- Use named regex group to grab pattern + <identifier_0>_<identifier_1>.fq. Here identifier_0 is the list + identifier in the nested collection and identifier_1 is either + forward or reverse (for instance samp1_forward.fq). + --> + <discover_datasets pattern="(?P<identifier_0>[^_]+)_(?P<identifier_1>[^_]+)\.fastq" ext="fastqsanger" visible="false" /> + <discover_datasets pattern="(?P<identifier_0>[^_]+)_(?P<identifier_1>[^_]+)\.fasta" ext="fasta" visible="false" /> + </collection> + <collection name="output_collection" type='list' label="Single-end Fast(q|a)"> + <filter>input['input_select'] == "file_list"</filter> + <discover_datasets pattern="(?P<designation>.+)__single\.fastq" directory="." ext='fastqsanger'/> + <discover_datasets pattern="(?P<designation>.+)__single\.fasta" directory="." ext='fasta'/> + </collection> + <data format="fastqsanger" name="output_accession" > + <filter>input['input_select'] == "accession_number"</filter> + <change_format> + <when input="outputformat" value="fasta" format="fasta"/> + </change_format> + </data> + <data format="fastqsanger" name="output_file" label="${input.file.name}.${outputformat}"> + <filter>input['input_select'] == "file"</filter> + <change_format> + <when input="outputformat" value="fasta" format="fasta"/> + </change_format> + </data> </outputs> <tests> - <test> - <param name="input_select" value="accession_number"/> - <param name="outputformat" value="fastqsanger"/> - <param name="accession" value="SRR044777"/> - <param name="skip_technical" value="True"/> - <output name="output_accession"> - <assert_contents> - <not_has_text text="rRNA_primer"/> - <has_text text="F47USSH02GNP1D" /> - </assert_contents> - </output> - </test> - <test> - <param name="input_select" value="accession_number"/> - <param name="outputformat" value="fastqsanger"/> - <param name="accession" value="SRR925743"/> - <param name="maxID" value="5"/> - <output name="output_accession" file="fastq_dump_result.fastq" ftype="fastq"/> - </test> + <test> + <param name="input_select" value="accession_number"/> + <param name="outputformat" value="fastqsanger"/> + <param name="accession" value="SRR044777"/> + <param name="skip_technical" value="True"/> + <output name="output_accession"> + <assert_contents> + <not_has_text text="rRNA_primer"/> + <has_text text="F47USSH02GNP1D" /> + </assert_contents> + </output> + </test> + <test> + <param name="input_select" value="accession_number"/> + <param name="outputformat" value="fastqsanger"/> + <param name="accession" value="SRR925743"/> + <param name="maxID" value="5"/> + <output name="output_accession" file="fastq_dump_result.fastq" ftype="fastqsanger"/> + </test> </tests> <help> This tool extracts reads from SRA archives using fastq-dump. The fastq-dump program is developed at NCBI, and is available at http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software. + + NB: Single-end or pair-end collections may be empty if given SRRs LibraryLayout contains only either SINGLE or PAIRED respectively @SRATOOLS_ATTRRIBUTION@ </help> <expand macro="citation"/> -</tool> + </tool>
--- a/sam_dump.xml Sun Dec 06 09:04:37 2015 -0500 +++ b/sam_dump.xml Tue May 17 14:14:50 2016 -0400 @@ -1,4 +1,4 @@ -<tool id="sam_dump" name="Extract reads" version="1.2.5"> +<tool id="sam_dump" name="Extract reads" version="1.3.0"> <description>in SAM or BAM format from NCBI SRA.</description> <macros> <import>sra_macros.xml</import> @@ -7,6 +7,19 @@ <version_command>sam-dump --version</version_command> <command> <![CDATA[ + #if $input.input_select=="file_list": + for acc in `cat $input.file_list` ; + do + #elif $input.input_select=="accession_number": + acc="$input.accession" && + #end if + + #if $input.input_select=="file_list" or $input.input_select=="accession_number": + [ ""\$acc" =~ ^[E|S|D]RR[0-9]{1,}$" ] && ( + #end if + + + ## Need to set the home directory to the current working directory, ## else the tool tries to write to home/.ncbi and fails when used ## with a cluster manager. @@ -18,7 +31,7 @@ #if ( str( $adv.region ) == "" ): ASCP_PATH=`command -v ascp` && ASCP_KEY=`dirname \$ASCP_PATH`/asperaweb_id_dsa.openssh || true && - prefetch --ascp-path "\$ASCP_PATH|\$ASCP_KEY" "$input.accession" && + prefetch --ascp-path "\$ASCP_PATH|\$ASCP_KEY" "\$acc" && ## Duplicate vdb-config, in case settings changed between prefetch and ## dump command. vdb-config -s "/repository/user/main/public/root=\$PWD" && @@ -50,16 +63,30 @@ #if $input.input_select == "file": "$input.file" #elif $input.input_select == "accession_number": - "$input.accession" + "\$acc" + #elif $input.input_select=="file_list": + "\$acc" #end if + #if str( $outputformat ) == "bam": | samtools view -Sb - 2> /dev/null #end if #if $input.input_select == "file": > "$output_file" #elif $input.input_select == "accession_number": - > "$output_accession" + > "$output_accession" ) #end if + + #if $input.input_select=="file_list": + #if str( $outputformat ) == "bam": + > "\$acc.bam" + #elif str( $outputformat ) == "sam": + > "\$acc.sam" + #end if + ) ; done + #end if + + ]]> </command> <inputs> @@ -86,6 +113,11 @@ </section> </inputs> <outputs> + <collection name="output_collection" type='list'> + <filter>input['input_select'] == "file_list"</filter> + <discover_datasets pattern="(?P<designation>.+)\.bam" directory="." ext='bam'/> + <discover_datasets pattern="(?P<designation>.+)\.sam" directory="." ext='sam'/> + </collection> <data name="output_accession" format="bam" label="${input.accession}.${outputformat}"> <filter>input['input_select'] == "accession_number"</filter> <change_format>
--- a/sra_macros.xml Sun Dec 06 09:04:37 2015 -0500 +++ b/sra_macros.xml Tue May 17 14:14:50 2016 -0400 @@ -1,9 +1,10 @@ <macros> <macro name="requirements"> <requirements> - <requirement type="package" version="1.1.3">ngs_sdk</requirement> - <requirement type="package" version="2.5.2">ncbi_vdb</requirement> - <requirement type="package" version="2.5.2">sra_toolkit</requirement> + <requirement type="package" version="1.2.3">ngs_sdk</requirement> + <requirement type="package" version="2.6.2">ncbi_vdb</requirement> + <requirement type="package" version="2.6.2">sra_toolkit</requirement> + <requirement type="package" version="2.6.2">sra-tools</requirement> <requirement type="package" version="5.18.1">perl</requirement> </requirements> </macro> @@ -12,13 +13,17 @@ <param name="input_select" type="select" label="select input type"> <option value="accession_number">SRR accession</option> <option value="file">SRA archive in current history</option> + <option value="file_list">List of SRA accession, one per line</option> </param> <when value="accession_number"> - <param name="accession" type="text" label="SRR accession" help="Must start with SRR, e.g. SRR925743"/> + <param name="accession" type="text" label="SRR accession" help="Must start with SRR,DRR or ERR, e.g. SRR925743 , ERR343809"/> </when> <when value="file"> <param format="sra" name="file" type="data" label="sra archive"/> </when> + <when value="file_list"> + <param format="txt" name="file_list" type="data" label="sra accession list"/> + </when> </conditional> </macro> <macro name="alignments"> @@ -45,8 +50,16 @@ <citation type="doi">10.1093/nar/gkq1019</citation> </citations> </macro> - <token name="@SRATOOLS_ATTRRIBUTION@">Browse the NCBI SRA for SRR accessions at http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=studies. + <token name="@SRATOOLS_ATTRRIBUTION@"> + Browse the NCBI SRA for SRR accessions at http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=studies. + Galaxy tool wrapper originally written by Matt Shirley (mdshw5 at gmail.com). + + Wrapper modified by Philip Mabon ( philip.mabon at phac-aspc.gc.ca ). + Tool dependencies, clean-up and bug-fixes by Marius van den Beek (m.vandenbeek at gmail.com). - For support and bug reports contact Matt Shirley or Marius van den Beek or go to https://github.com/galaxyproject/tools-iuc.</token> + + For support and bug reports contact Matt Shirley or Marius van den Beek or go to https://github.com/galaxyproject/tools-iuc. + + </token> </macros>
--- a/tool_dependencies.xml Sun Dec 06 09:04:37 2015 -0500 +++ b/tool_dependencies.xml Tue May 17 14:14:50 2016 -0400 @@ -1,15 +1,15 @@ <?xml version="1.0"?> <tool_dependency> - <package name="ncbi_vdb" version="2.5.2"> - <repository changeset_revision="b980182ad267" name="package_ncbi_vdb_2_5_2" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> + <package name="ncbi_vdb" version="2.6.2"> + <repository changeset_revision="be38d36c735d" name="package_ncbi_vdb_2_6_2" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> </package> - <package name="ngs_sdk" version="1.1.3"> - <repository changeset_revision="1197d3c98375" name="package_ngs_sdk_1_1_3" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> + <package name="ngs_sdk" version="1.2.3"> + <repository changeset_revision="3da0f4d34e92" name="package_ngs_sdk_1_2_3" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> </package> - <package name="sra_toolkit" version="2.5.2"> - <repository changeset_revision="ab06c0240705" name="package_sra_toolkit_2_5_2" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> + <package name="sra_toolkit" version="2.6.2"> + <repository changeset_revision="98414d1f9480" name="package_sra_toolkit_2_6_2" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> </package> <package name="perl" version="5.18.1"> - <repository changeset_revision="8b3509930a44" name="package_perl_5_18" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> + <repository changeset_revision="35f117d7396b" name="package_perl_5_18" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> </package> </tool_dependency>