Mercurial > repos > iuc > sra_tools
diff fasterq_dump.xml @ 27:9a776b080193 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/sra-tools commit cbb1499906c801443d72bdf313d86f0182aca010
author | iuc |
---|---|
date | Sun, 22 Jan 2023 17:51:50 +0000 |
parents | 83c7d564b128 |
children | 4317d3cb6cba |
line wrap: on
line diff
--- a/fasterq_dump.xml Fri Sep 03 16:17:53 2021 +0000 +++ b/fasterq_dump.xml Sun Jan 22 17:51:50 2023 +0000 @@ -1,22 +1,21 @@ -<tool id="fasterq_dump" name="Faster Download and Extract Reads in FASTQ" version="@VERSION@+galaxy1" profile="18.01"> +<tool id="fasterq_dump" name="Faster Download and Extract Reads in FASTQ" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> <description>format from NCBI SRA</description> - <expand macro="bio_tools"/> <macros> - <import>sra_macros.xml</import> + <import>macros.xml</import> </macros> + <expand macro="edam_ontology"/> + <expand macro="bio_tools"/> <expand macro="requirements"/> - <version_command>fasterq-dump --version</version_command> + <version_command>fasterq-dump --version | tr -d $'\n'</version_command> <command detect_errors="exit_code"><![CDATA[ set -o | grep -q pipefail && set -o pipefail; @COPY_CONFIGFILE@ + @CONFIGURE_RETRY@ @SET_ACCESSIONS@ - #if $input.input_select == "file": - acc='${input.file.name}' && - ln -s '${input.file}' "\$acc" && - #end if - @CONFIGURE_RETRY@ while [ \$SRA_PREFETCH_ATTEMPT -le \$SRA_PREFETCH_RETRIES ] ; do fasterq-dump "\$acc" -e \${GALAXY_SLOTS:-1} + --seq-defline '@\$sn/\$ri' + --qual-defline '+' $adv.split #if str( $adv.minlen ) != "": --min-read-len "$adv.minlen" @@ -33,7 +32,7 @@ mkdir -p output && mkdir -p outputOther && count="\$(ls *.fastq | wc -l)" && - echo "There are \$count fastq" && + echo "There are \$count fastq files" && data=(\$(ls *.fastq)) && if [ "\$count" -eq 1 ]; then @COMPRESS@ "\${data[0]}" > output/"\${acc}"__single.fastqsanger.gz && @@ -61,13 +60,11 @@ rm "\$file"; done; fi; - #if $input.input_select=="file_list": - ) ; done - - ; - #elif $input.input_select=="accession_number": - ); + + #if $input.input_select != "sra_file": + ); done; #end if + echo "Done with all accessions." ]]> </command> <expand macro="configfile_hack"/> @@ -109,10 +106,8 @@ <param name="accession" value="ERR086330"/> <output_collection name="list_paired" type="list:paired" count="1"> <element name="ERR086330"> - <element name="forward" file="ERR086330_1.fastq.gz" decompress="True"> - </element> - <element name="reverse" file="ERR086330_2.fastq.gz" decompress="True"> - </element> + <element name="forward" file="ERR086330_1.fastq.gz" ftype="fastqsanger.gz" decompress="True"/> + <element name="reverse" file="ERR086330_2.fastq.gz" ftype="fastqsanger.gz" decompress="True"/> </element> </output_collection> </test> @@ -127,22 +122,34 @@ </output_collection> </test> <test expect_num_outputs="4"> - <param name="input_select" value="file"/> - <param name="file" value="SRR522874.sra"/> + <param name="input_select" value="accession_number"/> + <param name="accession" value="ERR086330, SRR11953971"/> + <output_collection name="list_paired" type="list:paired" count="2"> + <element name="ERR086330"> + <element name="forward" file="ERR086330_1.fastq.gz" ftype="fastqsanger.gz" decompress="True"/> + <element name="reverse" file="ERR086330_2.fastq.gz" ftype="fastqsanger.gz" decompress="True"/> + </element> + <element name="SRR11953971"> + <element name="forward" file="SRR11953971_1.fastq.gz" ftype="fastqsanger.gz" decompress="True"/> + <element name="reverse" file="SRR11953971_2.fastq.gz" ftype="fastqsanger.gz" decompress="True"/> + </element> + </output_collection> + </test> + <test expect_num_outputs="4"> + <param name="input_select" value="sra_file"/> + <param name="sra_file" value="SRR522874.sra"/> <param name="split" value="--split-files"/> <param name="skip_technical" value="True"/> <output_collection name="list_paired" type="list:paired" count="1"> <element name="SRR522874.sra"> - <element name="forward" file="SRR522874.sra_2.fastq.gz" decompress="True"> - </element> - <element name="reverse" file="SRR522874.sra_4.fastq.gz" decompress="True"> - </element> + <element name="forward" file="SRR522874.sra_2.fastq.gz" ftype="fastqsanger.gz" decompress="True"/> + <element name="reverse" file="SRR522874.sra_4.fastq.gz" ftype="fastqsanger.gz" decompress="True"/> </element> </output_collection> </test> <test expect_num_outputs="4"> - <param name="input_select" value="file"/> - <param name="file" value="SRR522874.sra"/> + <param name="input_select" value="sra_file"/> + <param name="sra_file" value="SRR522874.sra"/> <param name="split" value="--split-files"/> <param name="skip_technical" value="False"/> <output_collection name="output_collection_other" type="list" count="4"> @@ -156,16 +163,16 @@ <param name="input_select" value="file_list"/> <param name="file_list" value="list_sra"/> <param name="minlen" value="21"/> - <output_collection name="output_collection_other" type="list"> + <output_collection name="output_collection_other" type="list" count="1"> <element name="SRR522874__single" file="SRR522874.fastq.gz" ftype="fastqsanger.gz" decompress="True"/> </output_collection> <output_collection name="list_paired" type="list:paired" count="1"> <element name="SRR522874"> - <element name="forward" file="SRR522874_1.fastq.gz" decompress="True"/> - <element name="reverse" file="SRR522874_2.fastq.gz" decompress="True"/> + <element name="forward" file="SRR522874_1.fastq.gz" ftype="fastqsanger.gz" decompress="True"/> + <element name="reverse" file="SRR522874_2.fastq.gz" ftype="fastqsanger.gz" decompress="True"/> </element> </output_collection> - <output_collection name="output_collection" type="list"> + <output_collection name="output_collection" type="list" count="1"> <element name="SRR002702" file="SRR002702_2.fastq.gz" ftype="fastqsanger.gz" decompress="True"/> </output_collection> </test> @@ -174,8 +181,8 @@ <param name="file_list" value="sra_manifest.tabular" ftype="sra_manifest.tabular"/> <output_collection name="list_paired" type="list:paired" count="1"> <element name="SRR11953971"> - <element name="forward" file="SRR11953971_1.fastq.gz" decompress="True"/> - <element name="reverse" file="SRR11953971_2.fastq.gz" decompress="True"/> + <element name="forward" file="SRR11953971_1.fastq.gz" ftype="fastqsanger.gz" decompress="True"/> + <element name="reverse" file="SRR11953971_2.fastq.gz" ftype="fastqsanger.gz" decompress="True"/> </element> </output_collection> </test> @@ -183,51 +190,20 @@ <help><![CDATA[ **What it does?** -This tool extracts data (in fastq_ format) from the Short Read Archive (SRA) at the National Center for Biotechnology Information (NCBI). It is based on the fasterq-dump_ utility of the SRA Toolkit. - -**How to use it?** - -There are three ways in which you can download data: - - 1. Data for single accession - 2. Multiple datasets using a list of accessions - 3. Extract data from already uploaded SRA dataset - -Below we discuss each in detail. - ------- - -**Uploading data for a single accession** - -When you type a single accession number (e.g., `SRR1582967`) into **Accession** box and click **Execute** the tool will fetch data for you. +This tool extracts data (in fastq_ format) from the Short Read Archive (SRA) at the National Center for Biotechnology Information (NCBI). It is based on the fasterq-dump_ utility of the SRA Toolkit. The following applies: ------ - -**Uploading multiple datasets using a list of accessions** - -A more realistic scenario is when you want to upload a number of datasets at once. To do this you need a list of accession, where there is only one accession per line (see below for information on how to generate such a file). Once you have this file: + - if data is paired-ended (or mate-pair) the tool will generate a collection of file pairs, in which each element will be a pair of fastq_ files containing forward and reverse mates. + - if data is single ended, each element of the collection will be a single fastq_ dataset. - 1. Upload it into your history using Galaxy's upload tool - 2. Once the list of accessions is uploaded choose *List of SRA accessions, one per line* from **select input type** dropdown - 3. Choose uploaded file within the **sra accession list** field - 4. Click **Execute** ------ - -**Extract data from already uploaded SRA dataset** - -If a SRA dataset is present in the history, it can be converted into fastq dataset by setting **select input type** drop-down to *SRA archive in current history*. Just like in the case of extracting data for single accession number the following applies: - - - if data is paired-ended (or mate-pair) the tool will generate a single *interleaved* dataset, in which forward and reverse mates are alternating (see example below). - - if data is single ended, a standard fastq dataset will be produced +@HOW_TO_USE_IT@ ----- **Output** -In every case, fastq datasets produced will be saved in Galaxy's history as a collection_ - a single history element containing multiple datasets. -In fact, three collections will be produced: one containing paired-end data, another containing single-end data, and a third one which contains reads which could not be classified. -Some collections may be empty if the accessions provided in the list does not contain one of the type of data. +In every case, fastq datasets produced will be saved in Galaxy's history as a collection_ - a single history element containing multiple datasets. In fact, regardless of the experimental design, three collections will be produced: one containing paired-end data, another containing single-end data, and a third one which contains reads which could not be classified. +Some collections may be empty if the accessions provided in the list do not contain one of the type of data. .. class:: warningmark @@ -236,7 +212,7 @@ .. class:: warningmark By default, only biological reads are dumped and in case of PAIRED dataset only the spots which have both reads will be in the paired-end collection. The remaining single reads will be in the other colletion. -To keep all reads, and maybe do not have the same number of reads in forward and reverse use the --split-files option in Advanced Options, Select how to split the spots. +To keep all reads, and potentially not have the same number of reads in forward and reverse use the --split-files option in Advanced Options, Select how to split the spots. @ACCESSION_LIST_HOWTO@ @@ -244,14 +220,12 @@ .. _fastq: https://en.wikipedia.org/wiki/FASTQ_format -.. _fastq-dump: https://trace.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=toolkit_doc&f=fastq-dump .. _fasterq-dump: https://github.com/ncbi/sra-tools/wiki/HowTo:-fasterq-dump .. _collection: https://galaxyproject.org/tutorials/collections/ -.. _link: https://trace.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=studies +.. _link: https://trace.ncbi.nlm.nih.gov/Traces/index.html?view=run_browser&display=reads @SRATOOLS_ATTRRIBUTION@ - ]]> </help> <expand macro="citation"/> - </tool> +</tool>