comparison rgFastQC.xml @ 19:9da02be9c6cc draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fastqc commit 2f544e337886709995a93d53f394919ce8e4673a
author iuc
date Fri, 10 May 2019 14:23:53 -0400
parents 3e1cdf5406db
children ddf5c37952ac
comparison
equal deleted inserted replaced
18:3e1cdf5406db 19:9da02be9c6cc
1 <tool id="fastqc" name="FastQC" version="0.72"> 1 <tool id="fastqc" name="FastQC" version="0.72">
2 <description>Read Quality reports</description> 2 <description>Read Quality reports</description>
3 <requirements> 3 <requirements>
4 <requirement type="package" version="0.11.8">fastqc</requirement> 4 <requirement type="package" version="0.11.7">fastqc</requirement>
5 </requirements> 5 </requirements>
6 <stdio> 6 <command detect_errors="exit_code"><![CDATA[
7 <exit_code range="1:" level="fatal" description="FastQC returned non zero exit code" />
8 <exit_code range=":-1" level="fatal" description="FastQC returned non zero exit code" />
9 <regex match="There is insufficient memory for the Java Runtime Environment"
10 source="stdout"
11 level="fatal_oom"
12 description="Out of memory error occurred" />
13 </stdio>
14 <command><![CDATA[
15 #import re 7 #import re
16 #set input_name = re.sub('[^\w\-\s]', '_', str($input_file.element_identifier)) 8 #set input_name = re.sub('[^\w\-\s]', '_', str($input_file.element_identifier))
17 9
18 #if $input_file.ext.endswith('.gz'): 10 #if $input_file.ext.endswith('.gz'):
19 #set input_file_sl = $input_name + '.gz' 11 #set input_file_sl = $input_name + '.gz'
33 25
34 ln -s '${input_file}' '${input_file_sl}' && 26 ln -s '${input_file}' '${input_file_sl}' &&
35 mkdir -p '${html_file.files_path}' && 27 mkdir -p '${html_file.files_path}' &&
36 fastqc 28 fastqc
37 --outdir '${html_file.files_path}' 29 --outdir '${html_file.files_path}'
30
38 #if $contaminants.dataset and str($contaminants) > '' 31 #if $contaminants.dataset and str($contaminants) > ''
39 --contaminants '${contaminants}' 32 --contaminants '${contaminants}'
40 #end if
41
42 #if $adapters.dataset and str($adapters) > ''
43 --adapters '${adapters}'
44 #end if 33 #end if
45 34
46 #if $limits.dataset and str($limits) > '' 35 #if $limits.dataset and str($limits) > ''
47 --limits '${limits}' 36 --limits '${limits}'
48 #end if 37 #end if
49 38
50 --quiet 39 --quiet
51 --extract 40 --extract
52 #if $min_length:
53 --min_length $min_length
54 #end if
55 $nogroup
56 --kmers $kmers
57 -f '${format}' 41 -f '${format}'
58 '${input_file_sl}' 42 '${input_file_sl}'
59 43
60 && cp '${html_file.files_path}'/*/fastqc_data.txt output.txt 44 && cp '${html_file.files_path}'/*/fastqc_data.txt output.txt
61 && cp '${html_file.files_path}'/*\.html output.html 45 && cp '${html_file.files_path}'/*\.html output.html
64 <inputs> 48 <inputs>
65 <param format="fastq,fastq.gz,fastq.bz2,bam,sam" name="input_file" type="data" 49 <param format="fastq,fastq.gz,fastq.bz2,bam,sam" name="input_file" type="data"
66 label="Short read data from your current history" /> 50 label="Short read data from your current history" />
67 <param name="contaminants" type="data" format="tabular" optional="true" label="Contaminant list" 51 <param name="contaminants" type="data" format="tabular" optional="true" label="Contaminant list"
68 help="tab delimited file with 2 columns: name and sequence. For example: Illumina Small RNA RT Primer CAAGCAGAAGACGGCATACGA" /> 52 help="tab delimited file with 2 columns: name and sequence. For example: Illumina Small RNA RT Primer CAAGCAGAAGACGGCATACGA" />
69 <param argument="--adapters" type="data" format="tabular" optional="true" label="Adapter list"
70 help="list of adapters adapter sequences which will be explicity searched against the library. tab delimited file with 2 columns: name and sequence." />
71 <param name="limits" type="data" format="txt" optional="true" label="Submodule and Limit specifing file" 53 <param name="limits" type="data" format="txt" optional="true" label="Submodule and Limit specifing file"
72 help="a file that specifies which submodules are to be executed (default=all) and also specifies the thresholds for the each submodules warning parameter" /> 54 help="a file that specifies which submodules are to be executed (default=all) and also specifies the thresholds for the each submodules warning parameter" />
73 <param argument="--nogroup" type="boolean" truevalue="--nogroup" falsevalue="" checked="False"
74 label="Disable grouping of bases for reads >50bp" help="Using this option will cause fastqc to crash and burn if you use it on really long reads, and your plots may end up a ridiculous size. You have been warned!"/>
75 <param argument="--min_length" type="integer" value="" optional="true"
76 label="Lower limit on the length of the sequence to be shown in the report"
77 help=" As long as you set this to a value greater or equal to your longest read length then this will be the sequence length used to create your read groups. This can be useful for making directly comaparable statistics from datasets with somewhat variable read lengths."/>
78 <param argument="--kmers" type="integer" value="7" min="2" max="10"
79 label="length of Kmer to look for" help="note: the Kmer test is disabled and needs to be enabled using a custom Submodule and limits file"/>
80 </inputs> 55 </inputs>
81 <outputs> 56 <outputs>
82 <data format="html" name="html_file" from_work_dir="output.html" label="${tool.name} on ${on_string}: Webpage" /> 57 <data format="html" name="html_file" from_work_dir="output.html" label="${tool.name} on ${on_string}: Webpage" />
83 <data format="txt" name="text_file" from_work_dir="output.txt" label="${tool.name} on ${on_string}: RawData" /> 58 <data format="txt" name="text_file" from_work_dir="output.txt" label="${tool.name} on ${on_string}: RawData" />
84 </outputs> 59 </outputs>
85 <tests> 60 <tests>
86 <test> 61 <test>
87 <param name="input_file" value="1000trimmed.fastq" /> 62 <param name="input_file" value="1000gsample.fastq" />
88 <output name="html_file" file="fastqc_report.html" ftype="html" compare="sim_size" delta="5000"/> 63 <param name="contaminants" value="fastqc_contaminants.txt" ftype="tabular" />
89 <output name="text_file" file="fastqc_data.txt" ftype="txt"/> 64 <output name="html_file" file="fastqc_report.html" ftype="html" lines_diff="100"/>
65 <output name="text_file" file="fastqc_data.txt" ftype="txt" lines_diff="4"/>
90 </test> 66 </test>
91 <test> 67 <test>
92 <param name="input_file" value="1000trimmed.fastq" /> 68 <param name="input_file" value="1000gsample.fastq" />
93 <param name="contaminants" value="fastqc_contaminants.txt" ftype="tabular" /> 69 <param name="limits" value="fastqc_customlimits.txt" ftype="txt" />
94 <output name="html_file" file="fastqc_report_contaminants.html" ftype="html" compare="sim_size" delta="5000"/> 70 <output name="html_file" file="fastqc_report2.html" ftype="html" lines_diff="100"/>
95 <output name="text_file" file="fastqc_data_contaminants.txt" ftype="txt"/> 71 <output name="text_file" file="fastqc_data2.txt" ftype="txt" lines_diff="4"/>
96 </test> 72 </test>
97 <test> 73 <test>
98 <param name="input_file" value="1000trimmed.fastq" /> 74 <param name="input_file" value="1000gsample.fastq.gz" ftype="fastq.gz" />
99 <param name="adapters" value="fastqc_adapters.txt" ftype="tabular" /> 75 <param name="contaminants" value="fastqc_contaminants.txt" ftype="tabular" />
100 <output name="html_file" file="fastqc_report_adapters.html" ftype="html" compare="sim_size" delta="5000"/> 76 <output name="html_file" file="fastqc_report.html" ftype="html" lines_diff="100"/>
101 <output name="text_file" file="fastqc_data_adapters.txt" ftype="txt"/> 77 <output name="text_file" file="fastqc_data.txt" ftype="txt" lines_diff="4"/>
102 </test> 78 </test>
103 <test> 79 <test>
104 <param name="input_file" value="1000trimmed.fastq" /> 80 <param name="input_file" value="1000gsample.fastq.bz2" ftype="fastq.bz2" />
105 <param name="limits" value="fastqc_customlimits.txt" ftype="txt" /> 81 <param name="contaminants" value="fastqc_contaminants.txt" ftype="tabular" />
106 <output name="html_file" file="fastqc_report_customlimits.html" ftype="html" compare="sim_size" delta="5000"/> 82 <output name="html_file" file="fastqc_report.html" ftype="html" lines_diff="100"/>
107 <output name="text_file" file="fastqc_data_customlimits.txt" ftype="txt"/> 83 <output name="text_file" file="fastqc_data.txt" ftype="txt" lines_diff="4"/>
108 </test>
109 <test>
110 <param name="input_file" value="1000trimmed.fastq" ftype="fastq" />
111 <param name="kmers" value="3" />
112 <param name="limits" value="fastqc_customlimits.txt" ftype="txt" />
113 <output name="html_file" file="fastqc_report_kmer.html" ftype="html" compare="sim_size" delta="5000"/>
114 <output name="text_file" file="fastqc_data_kmer.txt" ftype="txt"/>
115 <assert_command>
116 <has_text text="--kmers 3"/>
117 </assert_command>
118 </test>
119 <test>
120 <param name="input_file" value="1000trimmed.fastq" />
121 <param name="min_length" value="108" />
122 <output name="html_file" file="fastqc_report_min_length.html" ftype="html" compare="sim_size" delta="5000"/>
123 <output name="text_file" file="fastqc_data_min_length.txt" ftype="txt"/>
124 </test>
125 <test>
126 <param name="input_file" value="1000trimmed.fastq" ftype="fastq" />
127 <param name="nogroup" value="--nogroup" />
128 <output name="html_file" file="fastqc_report_nogroup.html" ftype="html" compare="sim_size" delta="5000"/>
129 <output name="text_file" file="fastqc_data_nogroup.txt" ftype="txt"/>
130 <assert_command>
131 <has_text text="--nogroup"/>
132 </assert_command>
133 </test> 84 </test>
134 <test> 85 <test>
135 <param name="input_file" value="hisat_output_1.bam" ftype="bam" /> 86 <param name="input_file" value="hisat_output_1.bam" ftype="bam" />
136 <output name="html_file" file="fastqc_report_hisat.html" ftype="html" compare="sim_size" delta="5000"/> 87 <output name="html_file" file="fastqc_report_hisat.html" ftype="html" lines_diff="100"/>
137 <output name="text_file" file="fastqc_data_hisat.txt" ftype="txt"/> 88 <output name="text_file" file="fastqc_data_hisat.txt" ftype="txt" lines_diff="4"/>
138 </test> 89 </test>
139 </tests> 90 </tests>
140 <help><![CDATA[ 91 <help>
141 .. class:: infomark 92 .. class:: infomark
142 93
143 **Purpose** 94 **Purpose**
144 95
145 FastQC aims to provide a simple way to do some quality control checks on raw 96 FastQC aims to provide a simple way to do some quality control checks on raw
146 sequence data coming from high throughput sequencing pipelines. 97 sequence data coming from high throughput sequencing pipelines.
147 It provides a set of analyses which you can use to get a quick 98 It provides a modular set of analyses which you can use to give a quick
148 impression of whether your data has any problems of 99 impression of whether your data has any problems of
149 which you should be aware before doing any further analysis. 100 which you should be aware before doing any further analysis.
150 101
151 The main functions of FastQC are: 102 The main functions of FastQC are:
152 103
200 - Kmer Content 151 - Kmer Content
201 152
202 All except Basic Statistics and Overrepresented sequences are plots. 153 All except Basic Statistics and Overrepresented sequences are plots.
203 .. _FastQC: http://www.bioinformatics.babraham.ac.uk/projects/fastqc/ 154 .. _FastQC: http://www.bioinformatics.babraham.ac.uk/projects/fastqc/
204 .. _Picard-tools: https://broadinstitute.github.io/picard/ 155 .. _Picard-tools: https://broadinstitute.github.io/picard/
205 ]]></help> 156 </help>
206 <citations> 157 <citations>
207 <citation type="bibtex"> 158 <citation type="bibtex">
208 @unpublished{andrews_s, 159 @unpublished{andrews_s,
209 author = {Andrews, S.}, 160 author = {Andrews, S.},
210 keywords = {bioinformatics, ngs, qc}, 161 keywords = {bioinformatics, ngs, qc},