comparison fasterq_dump.xml @ 27:9a776b080193 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/sra-tools commit cbb1499906c801443d72bdf313d86f0182aca010
author iuc
date Sun, 22 Jan 2023 17:51:50 +0000
parents 83c7d564b128
children 4317d3cb6cba
comparison
equal deleted inserted replaced
26:83c7d564b128 27:9a776b080193
1 <tool id="fasterq_dump" name="Faster Download and Extract Reads in FASTQ" version="@VERSION@+galaxy1" profile="18.01"> 1 <tool id="fasterq_dump" name="Faster Download and Extract Reads in FASTQ" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2 <description>format from NCBI SRA</description> 2 <description>format from NCBI SRA</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="edam_ontology"/>
3 <expand macro="bio_tools"/> 7 <expand macro="bio_tools"/>
4 <macros>
5 <import>sra_macros.xml</import>
6 </macros>
7 <expand macro="requirements"/> 8 <expand macro="requirements"/>
8 <version_command>fasterq-dump --version</version_command> 9 <version_command>fasterq-dump --version | tr -d $'\n'</version_command>
9 <command detect_errors="exit_code"><![CDATA[ 10 <command detect_errors="exit_code"><![CDATA[
10 set -o | grep -q pipefail && set -o pipefail; 11 set -o | grep -q pipefail && set -o pipefail;
11 @COPY_CONFIGFILE@ 12 @COPY_CONFIGFILE@
13 @CONFIGURE_RETRY@
12 @SET_ACCESSIONS@ 14 @SET_ACCESSIONS@
13 #if $input.input_select == "file":
14 acc='${input.file.name}' &&
15 ln -s '${input.file}' "\$acc" &&
16 #end if
17 @CONFIGURE_RETRY@
18 while [ \$SRA_PREFETCH_ATTEMPT -le \$SRA_PREFETCH_RETRIES ] ; do 15 while [ \$SRA_PREFETCH_ATTEMPT -le \$SRA_PREFETCH_RETRIES ] ; do
19 fasterq-dump "\$acc" -e \${GALAXY_SLOTS:-1} 16 fasterq-dump "\$acc" -e \${GALAXY_SLOTS:-1}
17 --seq-defline '@\$sn/\$ri'
18 --qual-defline '+'
20 $adv.split 19 $adv.split
21 #if str( $adv.minlen ) != "": 20 #if str( $adv.minlen ) != "":
22 --min-read-len "$adv.minlen" 21 --min-read-len "$adv.minlen"
23 #end if 22 #end if
24 $adv.skip_technical 2>&1 | tee -a '$log'; 23 $adv.skip_technical 2>&1 | tee -a '$log';
31 fi ; 30 fi ;
32 done && 31 done &&
33 mkdir -p output && 32 mkdir -p output &&
34 mkdir -p outputOther && 33 mkdir -p outputOther &&
35 count="\$(ls *.fastq | wc -l)" && 34 count="\$(ls *.fastq | wc -l)" &&
36 echo "There are \$count fastq" && 35 echo "There are \$count fastq files" &&
37 data=(\$(ls *.fastq)) && 36 data=(\$(ls *.fastq)) &&
38 if [ "\$count" -eq 1 ]; then 37 if [ "\$count" -eq 1 ]; then
39 @COMPRESS@ "\${data[0]}" > output/"\${acc}"__single.fastqsanger.gz && 38 @COMPRESS@ "\${data[0]}" > output/"\${acc}"__single.fastqsanger.gz &&
40 rm "\${data[0]}"; 39 rm "\${data[0]}";
41 elif [ "$adv.split" = "--split-3" ]; then 40 elif [ "$adv.split" = "--split-3" ]; then
59 for file in \${data[*]}; do 58 for file in \${data[*]}; do
60 @COMPRESS@ "\$file" > outputOther/"\$file"sanger.gz && 59 @COMPRESS@ "\$file" > outputOther/"\$file"sanger.gz &&
61 rm "\$file"; 60 rm "\$file";
62 done; 61 done;
63 fi; 62 fi;
64 #if $input.input_select=="file_list": 63
65 ) ; done 64 #if $input.input_select != "sra_file":
66 65 ); done;
67 ;
68 #elif $input.input_select=="accession_number":
69 );
70 #end if 66 #end if
67 echo "Done with all accessions."
71 ]]> 68 ]]>
72 </command> 69 </command>
73 <expand macro="configfile_hack"/> 70 <expand macro="configfile_hack"/>
74 <inputs> 71 <inputs>
75 <expand macro="input_conditional"/> 72 <expand macro="input_conditional"/>
107 <test expect_num_outputs="4"> 104 <test expect_num_outputs="4">
108 <param name="input_select" value="accession_number"/> 105 <param name="input_select" value="accession_number"/>
109 <param name="accession" value="ERR086330"/> 106 <param name="accession" value="ERR086330"/>
110 <output_collection name="list_paired" type="list:paired" count="1"> 107 <output_collection name="list_paired" type="list:paired" count="1">
111 <element name="ERR086330"> 108 <element name="ERR086330">
112 <element name="forward" file="ERR086330_1.fastq.gz" decompress="True"> 109 <element name="forward" file="ERR086330_1.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
113 </element> 110 <element name="reverse" file="ERR086330_2.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
114 <element name="reverse" file="ERR086330_2.fastq.gz" decompress="True">
115 </element>
116 </element> 111 </element>
117 </output_collection> 112 </output_collection>
118 </test> 113 </test>
119 <test expect_num_outputs="4"> 114 <test expect_num_outputs="4">
120 <param name="input_select" value="accession_number"/> 115 <param name="input_select" value="accession_number"/>
125 <element name="SRR002702_1" file="SRR002702_1.fastq.gz" ftype="fastqsanger.gz" decompress="True"/> 120 <element name="SRR002702_1" file="SRR002702_1.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
126 <element name="SRR002702_2" file="SRR002702_2.fastq.gz" ftype="fastqsanger.gz" decompress="True"/> 121 <element name="SRR002702_2" file="SRR002702_2.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
127 </output_collection> 122 </output_collection>
128 </test> 123 </test>
129 <test expect_num_outputs="4"> 124 <test expect_num_outputs="4">
130 <param name="input_select" value="file"/> 125 <param name="input_select" value="accession_number"/>
131 <param name="file" value="SRR522874.sra"/> 126 <param name="accession" value="ERR086330, SRR11953971"/>
127 <output_collection name="list_paired" type="list:paired" count="2">
128 <element name="ERR086330">
129 <element name="forward" file="ERR086330_1.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
130 <element name="reverse" file="ERR086330_2.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
131 </element>
132 <element name="SRR11953971">
133 <element name="forward" file="SRR11953971_1.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
134 <element name="reverse" file="SRR11953971_2.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
135 </element>
136 </output_collection>
137 </test>
138 <test expect_num_outputs="4">
139 <param name="input_select" value="sra_file"/>
140 <param name="sra_file" value="SRR522874.sra"/>
132 <param name="split" value="--split-files"/> 141 <param name="split" value="--split-files"/>
133 <param name="skip_technical" value="True"/> 142 <param name="skip_technical" value="True"/>
134 <output_collection name="list_paired" type="list:paired" count="1"> 143 <output_collection name="list_paired" type="list:paired" count="1">
135 <element name="SRR522874.sra"> 144 <element name="SRR522874.sra">
136 <element name="forward" file="SRR522874.sra_2.fastq.gz" decompress="True"> 145 <element name="forward" file="SRR522874.sra_2.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
137 </element> 146 <element name="reverse" file="SRR522874.sra_4.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
138 <element name="reverse" file="SRR522874.sra_4.fastq.gz" decompress="True"> 147 </element>
139 </element> 148 </output_collection>
140 </element> 149 </test>
141 </output_collection> 150 <test expect_num_outputs="4">
142 </test> 151 <param name="input_select" value="sra_file"/>
143 <test expect_num_outputs="4"> 152 <param name="sra_file" value="SRR522874.sra"/>
144 <param name="input_select" value="file"/>
145 <param name="file" value="SRR522874.sra"/>
146 <param name="split" value="--split-files"/> 153 <param name="split" value="--split-files"/>
147 <param name="skip_technical" value="False"/> 154 <param name="skip_technical" value="False"/>
148 <output_collection name="output_collection_other" type="list" count="4"> 155 <output_collection name="output_collection_other" type="list" count="4">
149 <element name="SRR522874_1" file="SRR522874.sra_1.fastq.gz" ftype="fastqsanger.gz" decompress="True"/> 156 <element name="SRR522874_1" file="SRR522874.sra_1.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
150 <element name="SRR522874_2" file="SRR522874.sra_2.fastq.gz" ftype="fastqsanger.gz" decompress="True"/> 157 <element name="SRR522874_2" file="SRR522874.sra_2.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
154 </test> 161 </test>
155 <test expect_num_outputs="4"> 162 <test expect_num_outputs="4">
156 <param name="input_select" value="file_list"/> 163 <param name="input_select" value="file_list"/>
157 <param name="file_list" value="list_sra"/> 164 <param name="file_list" value="list_sra"/>
158 <param name="minlen" value="21"/> 165 <param name="minlen" value="21"/>
159 <output_collection name="output_collection_other" type="list"> 166 <output_collection name="output_collection_other" type="list" count="1">
160 <element name="SRR522874__single" file="SRR522874.fastq.gz" ftype="fastqsanger.gz" decompress="True"/> 167 <element name="SRR522874__single" file="SRR522874.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
161 </output_collection> 168 </output_collection>
162 <output_collection name="list_paired" type="list:paired" count="1"> 169 <output_collection name="list_paired" type="list:paired" count="1">
163 <element name="SRR522874"> 170 <element name="SRR522874">
164 <element name="forward" file="SRR522874_1.fastq.gz" decompress="True"/> 171 <element name="forward" file="SRR522874_1.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
165 <element name="reverse" file="SRR522874_2.fastq.gz" decompress="True"/> 172 <element name="reverse" file="SRR522874_2.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
166 </element> 173 </element>
167 </output_collection> 174 </output_collection>
168 <output_collection name="output_collection" type="list"> 175 <output_collection name="output_collection" type="list" count="1">
169 <element name="SRR002702" file="SRR002702_2.fastq.gz" ftype="fastqsanger.gz" decompress="True"/> 176 <element name="SRR002702" file="SRR002702_2.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
170 </output_collection> 177 </output_collection>
171 </test> 178 </test>
172 <test expect_num_outputs="4"> 179 <test expect_num_outputs="4">
173 <param name="input_select" value="file_list"/> 180 <param name="input_select" value="file_list"/>
174 <param name="file_list" value="sra_manifest.tabular" ftype="sra_manifest.tabular"/> 181 <param name="file_list" value="sra_manifest.tabular" ftype="sra_manifest.tabular"/>
175 <output_collection name="list_paired" type="list:paired" count="1"> 182 <output_collection name="list_paired" type="list:paired" count="1">
176 <element name="SRR11953971"> 183 <element name="SRR11953971">
177 <element name="forward" file="SRR11953971_1.fastq.gz" decompress="True"/> 184 <element name="forward" file="SRR11953971_1.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
178 <element name="reverse" file="SRR11953971_2.fastq.gz" decompress="True"/> 185 <element name="reverse" file="SRR11953971_2.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
179 </element> 186 </element>
180 </output_collection> 187 </output_collection>
181 </test> 188 </test>
182 </tests> 189 </tests>
183 <help><![CDATA[ 190 <help><![CDATA[
184 **What it does?** 191 **What it does?**
185 192
186 This tool extracts data (in fastq_ format) from the Short Read Archive (SRA) at the National Center for Biotechnology Information (NCBI). It is based on the fasterq-dump_ utility of the SRA Toolkit. 193 This tool extracts data (in fastq_ format) from the Short Read Archive (SRA) at the National Center for Biotechnology Information (NCBI). It is based on the fasterq-dump_ utility of the SRA Toolkit. The following applies:
187 194
188 **How to use it?** 195 - if data is paired-ended (or mate-pair) the tool will generate a collection of file pairs, in which each element will be a pair of fastq_ files containing forward and reverse mates.
189 196 - if data is single ended, each element of the collection will be a single fastq_ dataset.
190 There are three ways in which you can download data: 197
191 198
192 1. Data for single accession 199 @HOW_TO_USE_IT@
193 2. Multiple datasets using a list of accessions
194 3. Extract data from already uploaded SRA dataset
195
196 Below we discuss each in detail.
197
198 ------
199
200 **Uploading data for a single accession**
201
202 When you type a single accession number (e.g., `SRR1582967`) into **Accession** box and click **Execute** the tool will fetch data for you.
203 200
204 ----- 201 -----
205 202
206 **Uploading multiple datasets using a list of accessions** 203 **Output**
207 204
208 A more realistic scenario is when you want to upload a number of datasets at once. To do this you need a list of accession, where there is only one accession per line (see below for information on how to generate such a file). Once you have this file: 205 In every case, fastq datasets produced will be saved in Galaxy's history as a collection_ - a single history element containing multiple datasets. In fact, regardless of the experimental design, three collections will be produced: one containing paired-end data, another containing single-end data, and a third one which contains reads which could not be classified.
209 206 Some collections may be empty if the accessions provided in the list do not contain one of the type of data.
210 1. Upload it into your history using Galaxy's upload tool 207
211 2. Once the list of accessions is uploaded choose *List of SRA accessions, one per line* from **select input type** dropdown 208 .. class:: warningmark
212 3. Choose uploaded file within the **sra accession list** field 209
213 4. Click **Execute** 210 When you decide to dump technical reads (in Advanced Options Dump only biological reads is set to No), you will probably find your PAIRED data in the other data collection as it is impossible to determine if it was 2 biological reads or one biological and one technical.
211
212 .. class:: warningmark
213
214 By default, only biological reads are dumped and in case of PAIRED dataset only the spots which have both reads will be in the paired-end collection. The remaining single reads will be in the other colletion.
215 To keep all reads, and potentially not have the same number of reads in forward and reverse use the --split-files option in Advanced Options, Select how to split the spots.
216
217 @ACCESSION_LIST_HOWTO@
214 218
215 ----- 219 -----
216 220
217 **Extract data from already uploaded SRA dataset**
218
219 If a SRA dataset is present in the history, it can be converted into fastq dataset by setting **select input type** drop-down to *SRA archive in current history*. Just like in the case of extracting data for single accession number the following applies:
220
221 - if data is paired-ended (or mate-pair) the tool will generate a single *interleaved* dataset, in which forward and reverse mates are alternating (see example below).
222 - if data is single ended, a standard fastq dataset will be produced
223
224 -----
225
226 **Output**
227
228 In every case, fastq datasets produced will be saved in Galaxy's history as a collection_ - a single history element containing multiple datasets.
229 In fact, three collections will be produced: one containing paired-end data, another containing single-end data, and a third one which contains reads which could not be classified.
230 Some collections may be empty if the accessions provided in the list does not contain one of the type of data.
231
232 .. class:: warningmark
233
234 When you decide to dump technical reads (in Advanced Options Dump only biological reads is set to No), you will probably find your PAIRED data in the other data collection as it is impossible to determine if it was 2 biological reads or one biological and one technical.
235
236 .. class:: warningmark
237
238 By default, only biological reads are dumped and in case of PAIRED dataset only the spots which have both reads will be in the paired-end collection. The remaining single reads will be in the other colletion.
239 To keep all reads, and maybe do not have the same number of reads in forward and reverse use the --split-files option in Advanced Options, Select how to split the spots.
240
241 @ACCESSION_LIST_HOWTO@
242
243 -----
244
245 221
246 .. _fastq: https://en.wikipedia.org/wiki/FASTQ_format 222 .. _fastq: https://en.wikipedia.org/wiki/FASTQ_format
247 .. _fastq-dump: https://trace.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=toolkit_doc&f=fastq-dump
248 .. _fasterq-dump: https://github.com/ncbi/sra-tools/wiki/HowTo:-fasterq-dump 223 .. _fasterq-dump: https://github.com/ncbi/sra-tools/wiki/HowTo:-fasterq-dump
249 .. _collection: https://galaxyproject.org/tutorials/collections/ 224 .. _collection: https://galaxyproject.org/tutorials/collections/
250 .. _link: https://trace.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=studies 225 .. _link: https://trace.ncbi.nlm.nih.gov/Traces/index.html?view=run_browser&display=reads
251 226
252 @SRATOOLS_ATTRRIBUTION@ 227 @SRATOOLS_ATTRRIBUTION@
253
254 ]]> 228 ]]>
255 </help> 229 </help>
256 <expand macro="citation"/> 230 <expand macro="citation"/>
257 </tool> 231 </tool>