comparison mageck_count.xml @ 2:9527a3d6ebd2 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mageck commit 49e456dda49db1f52fc876f406a10273a408b1a2
author iuc
date Wed, 04 Apr 2018 11:03:29 -0400
parents 4d72d204dcfa
children d8f26ae1e909
comparison
equal deleted inserted replaced
1:4d72d204dcfa 2:9527a3d6ebd2
1 <?xml version="1.0"?> 1 <?xml version="1.0"?>
2 <tool id="mageck_count" name="MAGeCK count" version="@VERSION@.1" > 2 <tool id="mageck_count" name="MAGeCK count" version="@VERSION@.2" >
3 <description>- collect sgRNA read counts from read mapping files</description> 3 <description>- collect sgRNA read counts from read mapping files</description>
4 <macros> 4 <macros>
5 <import>mageck_macros.xml</import> 5 <import>mageck_macros.xml</import>
6 </macros> 6 </macros>
7 <expand macro="requirements"> 7 <expand macro="requirements">
9 </expand> 9 </expand>
10 <expand macro="version" /> 10 <expand macro="version" />
11 <command detect_errors="exit_code"><![CDATA[ 11 <command detect_errors="exit_code"><![CDATA[
12 12
13 #if str($reads.format_select) == "files": 13 #if str($reads.format_select) == "files":
14 14 #import re
15 #if $reads.sample.is_of_type('fastq.gz', 'fastqsanger.gz'): 15 #set $names = []
16 ln -s '${reads.sample}' 'input.gz' && 16 #for $i, $sample in enumerate($reads.sample):
17 #set $infile = 'input.gz' 17
18 #elif $reads.sample.is_of_type('fastq'): 18 #if $sample.is_of_type('fastq.gz', 'fastqsanger.gz'):
19 ln -s '${reads.sample}' 'input.fastq' && 19 ln -s '${sample}' input_${i}.gz &&
20 #set $infile = 'input.fastq' 20 #set $infile = 'input' + str($i) + '.gz'
21 #elif $reads.sample.is_of_type('bam'): 21 #elif $sample.is_of_type('fastq'):
22 ln -s '${reads.sample}' 'input.bam' && 22 ln -s '${sample}' input_${i}.fastq &&
23 #set $infile = 'input.bam' 23 #set $infile = 'input' + str($i) + 'fastq'
24 #end if 24 #elif $sample.is_of_type('bam'):
25 25 ln -s '${sample}' input_${i}.bam &&
26 #end if 26 #set $infile = 'input' + str($i) + 'bam'
27 #end if
28 #silent $names.append(re.sub('[^\w\-\s]', '_', str($sample.element_identifier)))
29 #end for
30
31 #end if
32
27 33
28 mageck count 34 mageck count
29 35
30 #if str($reads.format_select) == "files": 36 #if str($reads.format_select) == "files":
31 --fastq $infile 37
32 -l '$reads.sgrna_library_file' 38 -l '$reads.sgrna_library_file'
39
40 --fastq input_*
41
33 #if $reads.sample_label: 42 #if $reads.sample_label:
34 --sample-label '$reads.sample_label' 43 --sample-label '$reads.sample_label'
35 #else: 44 #else:
36 --sample-label '$reads.sample.element_identifier' 45 --sample-label ${ ','.join( $names ) }
37 #end if 46 #end if
47
38 #elif str($reads.format_select) == "table": 48 #elif str($reads.format_select) == "table":
39 -k '$reads.counts' 49 -k '$reads.counts'
40 #if '$sgrna_library_file': 50 #if '$sgrna_library_file':
41 -l '$sgrna_library_file' 51 -l '$sgrna_library_file'
42 #end if 52 #end if
53 63
54 #if $adv.trim5: 64 #if $adv.trim5:
55 --trim-5 $adv.trim5 65 --trim-5 $adv.trim5
56 #end if 66 #end if
57 67
58 --norm-method $adv.norm_method 68 #if $adv.norm_method:
69 --norm-method $adv.norm_method
70 #end if
59 71
60 #if $adv.control_sgrna: 72 #if $adv.control_sgrna:
61 --control-sgrna $adv.control_sgrna 73 --control-sgrna $adv.control_sgrna
62 #end if 74 #end if
63 75
77 89
78 #if $out.pdfreportOpt: 90 #if $out.pdfreportOpt:
79 && 91 &&
80 gs -dBATCH -dNOPAUSE -q -dPDFSETTINGS=/prepress -sDEVICE=pdfwrite -sOutputFile=merged.pdf *.pdf 92 gs -dBATCH -dNOPAUSE -q -dPDFSETTINGS=/prepress -sDEVICE=pdfwrite -sOutputFile=merged.pdf *.pdf
81 #end if 93 #end if
94
82 ]]></command> 95 ]]></command>
83 <inputs> 96 <inputs>
84 <conditional name="reads"> 97 <conditional name="reads">
85 <param name="format_select" type="select" label="Reads Files or Count Table?" help="You can choose to input either separate files of reads (one per sample) or a single count table"> 98 <param name="format_select" type="select" label="Reads Files or Count Table?" help="You can choose to input either separate files of reads (one per sample) or a single count table">
86 <option value="files">Separate Reads files</option> 99 <option value="files">Separate Reads files</option>
87 <option value="table">Single Count table</option> 100 <option value="table">Single Count table</option>
88 </param> 101 </param>
89 <when value="files"> 102 <when value="files">
90 <param name="sample" argument="--fastq" type="data" format="fastq,fastq.gz,bam" multiple="false" label="Sample reads" help="The input reads must be in FASTQ, FASTQ.GZ or BAM format and all files must be in the same format." /> 103 <param name="sample" argument="--fastq" type="data" format="fastq,fastq.gz,bam" multiple="true" label="Sample reads" help="The input reads must be in FASTQ, FASTQ.GZ or BAM format and all files must be in the same format." />
91 <param name="sgrna_library_file" type="data" argument="--list-seq" format="txt,tabular,tsv,csv" label="sgRNA library file" help="A library file must be provided with three columns containing the sgRNA ID, sequence, and gene it is targeting, see Help below for more information." /> 104 <param name="sgrna_library_file" type="data" argument="--list-seq" format="txt,tabular,tsv,csv" label="sgRNA library file" help="A library file must be provided with three columns containing the sgRNA ID, sequence, and gene it is targeting, see Help below for more information." />
92 <param name="sample_label" argument="--sample-label" type="text" optional="true" value="" label="Specify sample label" help="By default, the input filename will be used as the sample label. Optionally you can specify a different sample label to use."/> 105 <param name="sample_label" argument="--sample-label" type="text" optional="true" value="" label="Specify sample labels" help="By default, the input filenames will be used as the sample labels. Optionally you can specify different sample labels to use which must be separated by comma (,). Must be equal to the number of samples provided in --fastq option.">
106 <validator type="regex" message="Please only use letters, numbers or underscores in sample labels, and separate labels by commas">^[\w,]+$</validator>
107 </param>
93 </when> 108 </when>
94 <when value="table"> 109 <when value="table">
95 <param name="counts" argument="-k" type="data" format="tabular" optional="true" label="Counts Table" help="Alternatively, a tab-separated file of read counts can be used as input. See Help below for format" /> 110 <param name="counts" argument="-k" type="data" format="tabular" optional="true" label="Counts Table" help="Alternatively, a tab-separated file of read counts can be used as input. See Help below for format" />
96 <param name="sgrna_library_file" type="data" argument="--list-seq" format="txt,tabular,tsv,csv" optional="True" label="sgRNA library file" help="Optionally, a library file can be provided with three columns containing the sgRNA ID, sequence, and gene it is targeting, see Help below for more information." /> 111 <param name="sgrna_library_file" type="data" argument="--list-seq" format="txt,tabular,tsv,csv" optional="True" label="sgRNA library file" help="Optionally, a library file can be provided with three columns containing the sgRNA ID, sequence, and gene it is targeting, see Help below for more information." />
97 </when> 112 </when>
98 </conditional> 113 </conditional>
99 114
100 <section name="out" title="Output Options"> 115 <section name="out" title="Output Options">
101 <param name="countsummaryOpt" type="boolean" truevalue="True" falsevalue="" checked="false" optional="true" label="Output summary statistics" help="Output summary statistics of the fastq files. Default: No" /> 116 <param name="countsummaryOpt" type="boolean" truevalue="True" falsevalue="" checked="false" optional="true" label="Output Count Summary file" help="Output summary statistics of the fastq files. Default: No" />
102 <param name="pdfreportOpt" argument="--pdf-report" type="boolean" truevalue="--pdf-report" falsevalue="" checked="false" optional="true" label="Output PDF report" help="Generate pdf report of the input file. Default: No" /> 117 <param name="normcountsOpt" type="boolean" truevalue="True" falsevalue="" checked="false" optional="true" label="Output Normalized Counts file" help="Default: No" />
103 <param name="unmappedOpt" argument="--unmapped-to-file" type="boolean" truevalue="--unmapped-to-file" falsevalue="" checked="false" optional="true" label="Output unmapped reads" help="Save unmapped reads to file. Default: No" /> 118 <param name="pdfreportOpt" argument="--pdf-report" type="boolean" truevalue="--pdf-report" falsevalue="" checked="false" optional="true" label="Output plots" help="Generate PDF of the plots. Default: No" />
104 <param name="rscriptOpt" type="boolean" truevalue="True" falsevalue="" checked="false" optional="true" label="Output R script" help="Output the R script used to generate the plots in the pdf report. Default: No" /> 119 <param name="unmappedOpt" argument="--unmapped-to-file" type="boolean" truevalue="--unmapped-to-file" falsevalue="" checked="false" optional="true" label="Output Unmapped reads" help="Save nmapped reads to file. Default: No" />
105 <param name="logOpt" type="boolean" truevalue="True" falsevalue="" checked="false" label="Output Log file" help="This file includes the logging information, it will list some basic statistics of the dataset at the end" /> 120 <param name="rfilesOpt" type="boolean" truevalue="True" falsevalue="" checked="false" optional="true" label="Output R files" help="Output the .R and .Rnw files used to generate the plots in the PDF report. The median-normalized read counts file will also be output as it is required to regenerate the plots. Default: No" />
121 <param name="logOpt" type="boolean" truevalue="True" falsevalue="" checked="false" label="Output Log file" help="This file includes the logging information, it will list some basic statistics of the dataset at the end. Default: No" />
106 </section> 122 </section>
107 123
108 <section name="adv" title="Advanced Options"> 124 <section name="adv" title="Advanced Options">
109 <param name="gmt_file" argument="--gmt-file" type="data" format="tabular" optional="true" value="" label="Pathway file for QC" help="TThe pathway file used for QC, in GMT format. By default it will use the GMT file provided by MAGeCK" /> 125 <param name="gmt_file" argument="--gmt-file" type="data" format="tabular" optional="true" value="" label="Pathway file for QC" help="The pathway file used for QC, in GMT format. By default it will use the GMT file provided by MAGeCK" />
110 <param name="trim5" argument="--trim-5" type="integer" min="0" optional="true" label="5' Trim length" help="Length of trimming the 5' of the reads. Default: 0" /> 126 <param name="trim5" argument="--trim-5" type="integer" min="0" optional="true" label="5' Trim length" help="Length of trimming the 5' of the reads. Default: 0" />
111 <param name="norm_method" argument="--norm-method" type="select" label="Method for normalization" help="Methods include: None (no normalization), Median (median normalization), Total (normalization by total read counts), Control (normalization by control sgRNAs specified by the --control-sgrna option). Default: Median" > 127 <param name="norm_method" argument="--norm-method" type="select" optional="true" label="Method for normalization" help="Methods include: None (no normalization), Median (median normalization), Total (normalization by total read counts), Control (normalization by control sgRNAs specified by the --control-sgrna option). Default: Median" >
128 <option value="median" selected="True">Median</option>
112 <option value="none">None</option> 129 <option value="none">None</option>
113 <option value="median" selected="True">Median</option>
114 <option value="total">Total</option> 130 <option value="total">Total</option>
115 <option value="control">Control</option> 131 <option value="control">Control</option>
116 </param> 132 </param>
117 <param name="control_sgrna" argument="--control-sgrna" type="data" format="tabular" optional="true" label="Control sgRNAs file" help="A file of control sgRNA IDs for normalization and for generating the null distribution of RRA" /> 133 <param name="control_sgrna" argument="--control-sgrna" type="data" format="tabular" optional="true" label="Control sgRNAs file" help="A file of control sgRNA IDs for normalization and for generating the null distribution of RRA" />
118 <param name="sgrna_len" argument="--sgrna-len" type="integer" min="0" optional="true" label="Length of the sgRNA" help="The program will automatically determine the sgRNA length from the library file, so only use this if you turn on the --unmapped-to-file option. Default: autodetected" /> 134 <param name="sgrna_len" argument="--sgrna-len" type="integer" min="0" optional="true" label="Length of the sgRNA" help="The program will automatically determine the sgRNA length from the library file, so only use this if you turn on the --unmapped-to-file option. Default: autodetected" />
121 <param name="test_run" argument="--test-run" type="boolean" truevalue="--test-run" falsevalue="" checked="false" optional="true" label="Test running" help="If this option is on, MAGeCK will only process the first 1M records for each file" /> 137 <param name="test_run" argument="--test-run" type="boolean" truevalue="--test-run" falsevalue="" checked="false" optional="true" label="Test running" help="If this option is on, MAGeCK will only process the first 1M records for each file" />
122 </section> 138 </section>
123 </inputs> 139 </inputs>
124 140
125 <outputs> 141 <outputs>
126 <data name="counts" format="tabular" from_work_dir="*.count.txt" label="${tool.name} on ${on_string}: sgRNA Counts" /> 142 <data name="counts" format="tabular" from_work_dir="output.count.txt" label="${tool.name} on ${on_string}: sgRNA Counts" />
127 <data name="countsummary" format="tabular" from_work_dir="*.countsummary.txt" label="${tool.name} on ${on_string}: sgRNA Count Summary" > 143 <data name="countsummary" format="tabular" from_work_dir="output.countsummary.txt" label="${tool.name} on ${on_string}: sgRNA Count Summary" >
128 <filter>out['countsummaryOpt'] is True</filter> 144 <filter>out['countsummaryOpt'] is True</filter>
129 </data> 145 </data>
130 <data name="pdfreport" format="pdf" from_work_dir="merged.pdf" label="${tool.name} on ${on_string}: PDF Report" > 146 <data name="normcounts" format="tabular" from_work_dir="output.count_normalized.txt" label="${tool.name} on ${on_string}: Normalized counts" >
147 <filter>out['normcountsOpt'] is True or out['rfilesOpt'] is True</filter>
148 </data>
149 <data name="unmapped" format="tabular" from_work_dir="output.unmapped.txt" label="${tool.name} on ${on_string}: Unmapped" >
150 <filter>out['unmappedOpt'] is True</filter>
151 </data>
152 <data name="pdfreport" format="pdf" from_work_dir="merged.pdf" label="${tool.name} on ${on_string}: PDF Report" >
131 <filter>out['pdfreportOpt'] is True</filter> 153 <filter>out['pdfreportOpt'] is True</filter>
132 </data>
133 <data name="unmapped" format="tabular" from_work_dir="*.unmapped.txt" label="${tool.name} on ${on_string}: Unmapped" >
134 <filter>out['unmappedOpt'] is True</filter>
135 </data> 154 </data>
136 <data name="log" format="txt" from_work_dir="output.log" label="${tool.name} on ${on_string}: Log" > 155 <data name="log" format="txt" from_work_dir="output.log" label="${tool.name} on ${on_string}: Log" >
137 <filter>out['logOpt'] is True</filter> 156 <filter>out['logOpt'] is True</filter>
157 </data>
158 <data name="rscript" format="txt" from_work_dir="output_countsummary.R" label="${tool.name} on ${on_string}: R file" >
159 <filter>out['rfilesOpt'] is True</filter>
160 </data>
161 <data name="rnwfile" format="txt" from_work_dir="output_countsummary.Rnw" label="${tool.name} on ${on_string}: Rnw file" >
162 <filter>out['rfilesOpt'] is True</filter>
138 </data> 163 </data>
139 </outputs> 164 </outputs>
140 165
141 <tests> 166 <tests>
142 <!-- Ensure fastq.gz input works --> 167 <!-- Ensure fastq.gz input works -->
143 <test expect_num_outputs="1"> 168 <test expect_num_outputs="1">
144 <param name="sgrna_library_file" value="demo/demo2/library.txt" ftype="tabular" /> 169 <param name="sgrna_library_file" value="demo/demo2/library.txt" ftype="tabular" />
145 <param name="format_select" value="files" /> 170 <param name="format_select" value="files" />
146 <param name="sample" value="test1.fastq.gz" ftype="fastq.gz"/> 171 <param name="sample" value="test1.fastq.gz" ftype="fastq.gz"/>
147 <output name="counts" file="out.count.fastq.txt"/> 172 <output name="counts" file="out.count.txt"/>
148 </test> 173 </test>
149 <!-- Ensure fastq input works --> 174 <!-- Ensure multiple fastq.gz input works with report -->
175 <test expect_num_outputs="2">
176 <param name="sgrna_library_file" value="demo/demo2/library.txt" ftype="tabular" />
177 <param name="format_select" value="files" />
178 <param name="sample" value="test1.fastq.gz,test2.fastq.gz" ftype="fastq.gz"/>
179 <param name="pdfreportOpt" value="True" />
180 <output name="counts" file="out.count_multi.txt"/>
181 <output name="pdfreport" file="out.countsummary_multi.pdf" compare="sim_size" />
182 </test>
183 <!-- Ensure fastq input works -->
150 <test expect_num_outputs="1"> 184 <test expect_num_outputs="1">
151 <param name="sgrna_library_file" value="demo/demo2/library.txt" ftype="tabular" /> 185 <param name="sgrna_library_file" value="demo/demo2/library.txt" ftype="tabular" />
152 <param name="format_select" value="files" /> 186 <param name="format_select" value="files" />
153 <param name="sample" value="demo/demo2/test1.fastq" ftype="fastq"/> 187 <param name="sample" value="demo/demo2/test1.fastq" ftype="fastq"/>
154 <param name="sample_label" value="test1.fastq.gz" /> 188 <param name="sample_label" value="test1_fastq_gz" />
155 <output name="counts" file="out.count.fastq.txt"/> 189 <output name="counts" file="out.count.txt"/>
156 </test> 190 </test>
157 <!-- Ensure BAM input works --> 191 <!-- Ensure BAM input works -->
158 <test expect_num_outputs="1"> 192 <test expect_num_outputs="1">
159 <param name="sgrna_library_file" value="demo/demo2/library.txt" ftype="tabular" /> 193 <param name="sgrna_library_file" value="demo/demo2/library.txt" ftype="tabular" />
160 <param name="format_select" value="files" /> 194 <param name="format_select" value="files" />
161 <param name="sample" value="test1.bam" ftype="bam"/> 195 <param name="sample" value="test1.bam" ftype="bam"/>
162 <output name="counts" file="out.count.bam.txt"/> 196 <output name="counts" file="out.count.bam.txt"/>
163 </test> 197 </test>
164 <!-- Ensure optional outputs work --> 198 <!-- Ensure optional outputs work -->
165 <test expect_num_outputs="5"> 199 <test expect_num_outputs="8">
166 <param name="sgrna_library_file" value="demo/demo2/library.txt" ftype="tabular" /> 200 <param name="sgrna_library_file" value="demo/demo2/library.txt" ftype="tabular" />
167 <param name="format_select" value="files" /> 201 <param name="format_select" value="files" />
168 <param name="sample" value="test1.fastq.gz" ftype="fastq.gz"/> 202 <param name="sample" value="test1.fastq.gz" ftype="fastq.gz"/>
169 <param name="countsummaryOpt" value="True" /> 203 <param name="countsummaryOpt" value="True" />
170 <param name="unmappedOpt" value="True" /> 204 <param name="unmappedOpt" value="True" />
171 <param name="pdfreportOpt" value="True" /> 205 <param name="pdfreportOpt" value="True" />
172 <param name="rscriptOpt" value="True" /> 206 <param name="rfilesOpt" value="True" />
173 <param name="logOpt" value="True" /> 207 <param name="logOpt" value="True" />
174 <output name="counts" file="out.count.fastq.txt"/> 208 <output name="counts" file="out.count.fastq.txt"/>
175 <output name="countsummary" file="out.countsummary.txt" compare="sim_size"/> 209 <output name="countsummary" file="out.countsummary.txt" compare="sim_size"/>
210 <output name="normcounts" file="output.count_normalized.txt"/>
176 <output name="log" file="out.count.log.txt" compare="sim_size"/> 211 <output name="log" file="out.count.log.txt" compare="sim_size"/>
177 <output name="unmapped" file="out.count.unmapped.txt" /> 212 <output name="unmapped" file="out.count.unmapped.txt" />
178 <output name="pdfreport" file="out.countsummary.pdf" compare="sim_size" /> 213 <output name="pdfreport" file="out.countsummary.pdf" compare="sim_size" />
214 <output name="rscript" file="out.count.R" />
215 <output name="rnwfile" file="output_countsummary.Rnw" />
179 </test> 216 </test>
180 </tests> 217 </tests>
181 218
182 <help><![CDATA[ 219 <help><![CDATA[
183 .. class:: infomark 220 .. class:: infomark
188 225
189 ----- 226 -----
190 227
191 **Inputs** 228 **Inputs**
192 229
193 **Read file(s)** 230 **Read files**
194 231
195 **MAGeCK count** accepts one or more FASTQ.GZ, FASTQ or BAM files as input. 232 **MAGeCK count** accepts one or more FASTQ.GZ, FASTQ or BAM files as input.
196 233
197 Since version 0.5.5, MAGeCK count module supports collecting read counts from BAM files. This will allow you to use a third-party aligner to map reads to the library with mismatches, providing more usable reads for the analysis. However, it is still recommended to directly use the fastq file in the count module (which does not allow any mismatches), because: 234 Since version 0.5.5, MAGeCK count module supports collecting read counts from BAM files. This will allow you to use a third-party aligner to map reads to the library with mismatches, providing more usable reads for the analysis. However, it is still recommended to directly use the fastq file in the count module (which does not allow any mismatches), because:
198 235
222 259
223 ----- 260 -----
224 261
225 **Outputs** 262 **Outputs**
226 263
264 This tool outputs
265
266 * an sgRNA Counts table
267
268 Optionally, under **Output Options** you can choose to output
269
270 * a Count Summary file
271 * a PDF report
272 * a Normalized Counts table
273 * an Unmapped reads file
274 * the .R and .Rnw files used to generate the plots and PDF
275 * a Log file of the analysis
276
227 **sgRNA Count file** 277 **sgRNA Count file**
228 278
229 An example of the sgRNA count output file is shown below. This file can be used with **MAGeCK test**. 279 An example of the sgRNA count output file is shown below. This file can be used with **MAGeCK test**.
230 280
231 Example: 281 Example:
232 282
233 ============== ======== ================ 283 ============== ======== =========== ===========
234 **sgRNA** **Gene** **Sample Label** 284 **sgRNA** **Gene** **Sample1** **Sample2**
235 -------------- -------- ---------------- 285 -------------- -------- ----------- -----------
236 A1CF_m52595977 A1CF 213 286 A1CF_m52595977 A1CF 213 199
237 A1CF_m52596017 A1CF 294 287 A1CF_m52596017 A1CF 294 164
238 A1CF_m52596056 A1CF 421 288 A1CF_m52596056 A1CF 421 378
239 A1CF_m52603842 A1CF 274 289 A1CF_m52603842 A1CF 274 281
240 A1CF_m52603847 A1CF 0 290 A1CF_m52603847 A1CF 0 0
241 ============== ======== ================ 291 ============== ======== =========== ===========
242 292
243 293
244 **Count Summary** 294 **Count Summary**
245 295
246 MAGeCK can produce a **Count Summary** file containing statistics of the input file (the statistics of fastq file are also in the PDF report). An example count summary file is shown below. 296 MAGeCK can produce a **Count Summary** file containing statistics of the input files (the statistics of fastq files are also in the PDF report). An example count summary file is shown below.
247 297
248 Example: 298 Example:
249 299
250 ========== ===== ===== ====== ========== =========== ========== ========= ======== ============ ======================= ========================== ============ 300 ========== ===== ===== ====== ========== =========== ========== ========= ======== ============ ======================= ========================== ============
251 File Label Reads Mapped Percentage TotalsgRNAs Zerocounts GiniIndex NegSelQC NegSelQCPval NegSelQCPvalPermutation NegSelQCPvalPermutationFDR NegSelQCGene 301 File Label Reads Mapped Percentage TotalsgRNAs Zerocounts GiniIndex NegSelQC NegSelQCPval NegSelQCPvalPermutation NegSelQCPvalPermutationFDR NegSelQCGene