comparison mageck_count.xml @ 1:4d72d204dcfa draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mageck commit 2eea865fe331694058922292e5681b96f4f0b4c7
author iuc
date Sat, 17 Feb 2018 10:41:26 -0500
parents b80c0e046539
children 9527a3d6ebd2
comparison
equal deleted inserted replaced
0:b80c0e046539 1:4d72d204dcfa
1 <?xml version="1.0"?> 1 <?xml version="1.0"?>
2 <tool id="mageck_count" name="MAGeCK count" version="@VERSION@" > 2 <tool id="mageck_count" name="MAGeCK count" version="@VERSION@.1" >
3 <description>- collect sgRNA read counts from read mapping files</description> 3 <description>- collect sgRNA read counts from read mapping files</description>
4 <macros> 4 <macros>
5 <import>mageck_macros.xml</import> 5 <import>mageck_macros.xml</import>
6 </macros> 6 </macros>
7 <expand macro="requirements"> 7 <expand macro="requirements">
28 mageck count 28 mageck count
29 29
30 #if str($reads.format_select) == "files": 30 #if str($reads.format_select) == "files":
31 --fastq $infile 31 --fastq $infile
32 -l '$reads.sgrna_library_file' 32 -l '$reads.sgrna_library_file'
33 #if $reads.sample_label: 33 #if $reads.sample_label:
34 --sample-label '$reads.sample_label' 34 --sample-label '$reads.sample_label'
35 #end if 35 #else:
36 36 --sample-label '$reads.sample.element_identifier'
37 #end if
37 #elif str($reads.format_select) == "table": 38 #elif str($reads.format_select) == "table":
38 -k '$reads.counts' 39 -k '$reads.counts'
39 #if '$sgrna_library_file': 40 #if '$sgrna_library_file':
40 -l '$sgrna_library_file' 41 -l '$sgrna_library_file'
41 #end if 42 #end if
42
43 #end if 43 #end if
44 44
45 -n output 45 -n output
46 46
47 #if $out.pdfreportOpt: 47 #if $out.pdfreportOpt:
52 $out.unmappedOpt 52 $out.unmappedOpt
53 53
54 #if $adv.trim5: 54 #if $adv.trim5:
55 --trim-5 $adv.trim5 55 --trim-5 $adv.trim5
56 #end if 56 #end if
57
57 --norm-method $adv.norm_method 58 --norm-method $adv.norm_method
59
58 #if $adv.control_sgrna: 60 #if $adv.control_sgrna:
59 --control-sgrna $adv.control_sgrna 61 --control-sgrna $adv.control_sgrna
60 #end if 62 #end if
61 --sgrna-len $adv.sgrna_len 63
64 #if $adv.sgrna_len:
65 --sgrna-len $adv.sgrna_len
66 #end if
67
62 $adv.count_n 68 $adv.count_n
69
63 $adv.reverse_complement 70 $adv.reverse_complement
71
64 $adv.test_run 72 $adv.test_run
65 73
66 #if $adv.gmt_file: 74 #if $adv.gmt_file:
67 --gmt-file '$adv.gmt_file' 75 --gmt-file '$adv.gmt_file'
68 #end if 76 #end if
79 <option value="table">Single Count table</option> 87 <option value="table">Single Count table</option>
80 </param> 88 </param>
81 <when value="files"> 89 <when value="files">
82 <param name="sample" argument="--fastq" type="data" format="fastq,fastq.gz,bam" multiple="false" label="Sample reads" help="The input reads must be in FASTQ, FASTQ.GZ or BAM format and all files must be in the same format." /> 90 <param name="sample" argument="--fastq" type="data" format="fastq,fastq.gz,bam" multiple="false" label="Sample reads" help="The input reads must be in FASTQ, FASTQ.GZ or BAM format and all files must be in the same format." />
83 <param name="sgrna_library_file" type="data" argument="--list-seq" format="txt,tabular,tsv,csv" label="sgRNA library file" help="A library file must be provided with three columns containing the sgRNA ID, sequence, and gene it is targeting, see Help below for more information." /> 91 <param name="sgrna_library_file" type="data" argument="--list-seq" format="txt,tabular,tsv,csv" label="sgRNA library file" help="A library file must be provided with three columns containing the sgRNA ID, sequence, and gene it is targeting, see Help below for more information." />
84 <param name="sample_label" argument="--sample-label" type="text" optional="true" value="" label="Sample label" help="Optionally, you can specify a sample label to use in the output file header."/> 92 <param name="sample_label" argument="--sample-label" type="text" optional="true" value="" label="Specify sample label" help="By default, the input filename will be used as the sample label. Optionally you can specify a different sample label to use."/>
85 </when> 93 </when>
86 <when value="table"> 94 <when value="table">
87 <param name="counts" argument="-k" type="data" format="tabular" optional="true" label="Counts Table" help="Alternatively, a tab-separated file of read counts can be used as input. See Help below for format" /> 95 <param name="counts" argument="-k" type="data" format="tabular" optional="true" label="Counts Table" help="Alternatively, a tab-separated file of read counts can be used as input. See Help below for format" />
88 <param name="sgrna_library_file" type="data" argument="--list-seq" format="txt,tabular,tsv,csv" optional="True" label="sgRNA library file" help="Optionally, a library file can be provided with three columns containing the sgRNA ID, sequence, and gene it is targeting, see Help below for more information." /> 96 <param name="sgrna_library_file" type="data" argument="--list-seq" format="txt,tabular,tsv,csv" optional="True" label="sgRNA library file" help="Optionally, a library file can be provided with three columns containing the sgRNA ID, sequence, and gene it is targeting, see Help below for more information." />
89 </when> 97 </when>
92 <section name="out" title="Output Options"> 100 <section name="out" title="Output Options">
93 <param name="countsummaryOpt" type="boolean" truevalue="True" falsevalue="" checked="false" optional="true" label="Output summary statistics" help="Output summary statistics of the fastq files. Default: No" /> 101 <param name="countsummaryOpt" type="boolean" truevalue="True" falsevalue="" checked="false" optional="true" label="Output summary statistics" help="Output summary statistics of the fastq files. Default: No" />
94 <param name="pdfreportOpt" argument="--pdf-report" type="boolean" truevalue="--pdf-report" falsevalue="" checked="false" optional="true" label="Output PDF report" help="Generate pdf report of the input file. Default: No" /> 102 <param name="pdfreportOpt" argument="--pdf-report" type="boolean" truevalue="--pdf-report" falsevalue="" checked="false" optional="true" label="Output PDF report" help="Generate pdf report of the input file. Default: No" />
95 <param name="unmappedOpt" argument="--unmapped-to-file" type="boolean" truevalue="--unmapped-to-file" falsevalue="" checked="false" optional="true" label="Output unmapped reads" help="Save unmapped reads to file. Default: No" /> 103 <param name="unmappedOpt" argument="--unmapped-to-file" type="boolean" truevalue="--unmapped-to-file" falsevalue="" checked="false" optional="true" label="Output unmapped reads" help="Save unmapped reads to file. Default: No" />
96 <param name="rscriptOpt" type="boolean" truevalue="True" falsevalue="" checked="false" optional="true" label="Output R script" help="Output the R script used to generate the plots in the pdf report. Default: No" /> 104 <param name="rscriptOpt" type="boolean" truevalue="True" falsevalue="" checked="false" optional="true" label="Output R script" help="Output the R script used to generate the plots in the pdf report. Default: No" />
97 <param name="logOpt" type="boolean" truevalue="True" falsevalue="" checked="false" label="Output logfile" help="This file includes the logging information, it will list some basic statistics of the dataset at the end" /> 105 <param name="logOpt" type="boolean" truevalue="True" falsevalue="" checked="false" label="Output Log file" help="This file includes the logging information, it will list some basic statistics of the dataset at the end" />
98 </section> 106 </section>
99 107
100 <section name="adv" title="Advanced Options"> 108 <section name="adv" title="Advanced Options">
101 <param name="gmt_file" argument="--gmt-file" type="data" format="tabular" optional="true" value="" label="Pathway file for QC" help="TThe pathway file used for QC, in GMT format. By default it will use the GMT file provided by MAGeCK" /> 109 <param name="gmt_file" argument="--gmt-file" type="data" format="tabular" optional="true" value="" label="Pathway file for QC" help="TThe pathway file used for QC, in GMT format. By default it will use the GMT file provided by MAGeCK" />
102 <param name="trim5" argument="--trim-5" type="integer" min="0" optional="true" label="5' Trim length" help="Length of trimming the 5' of the reads. Default 0" /> 110 <param name="trim5" argument="--trim-5" type="integer" min="0" optional="true" label="5' Trim length" help="Length of trimming the 5' of the reads. Default: 0" />
103 <param name="norm_method" argument="--norm-method" type="select" label="Method for normalization" help="Methods include: None (no normalization), Median (median normalization), Total (normalization by total read counts), Control (normalization by control sgRNAs specified by the --control-sgrna option). Default: Median" > 111 <param name="norm_method" argument="--norm-method" type="select" label="Method for normalization" help="Methods include: None (no normalization), Median (median normalization), Total (normalization by total read counts), Control (normalization by control sgRNAs specified by the --control-sgrna option). Default: Median" >
104 <option value="none">None</option> 112 <option value="none">None</option>
105 <option value="median" selected="True">Median</option> 113 <option value="median" selected="True">Median</option>
106 <option value="total">Total</option> 114 <option value="total">Total</option>
107 <option value="control">Control</option> 115 <option value="control">Control</option>
108 </param> 116 </param>
109 <param name="control_sgrna" argument="--control-sgrna" type="data" format="tabular" optional="true" label="Control sgRNAs file" help="A file of control sgRNA IDs for normalization and for generating the null distribution of RRA" /> 117 <param name="control_sgrna" argument="--control-sgrna" type="data" format="tabular" optional="true" label="Control sgRNAs file" help="A file of control sgRNA IDs for normalization and for generating the null distribution of RRA" />
110 <param name="sgrna_len" argument="--sgrna-len" type="integer" min="0" value="20" optional="true" label="Length of the sgRNA" help="The program will automatically determine the sgRNA length from the library file, so only use this if you turn on the --unmapped-to-file option. Default: 20" /> 118 <param name="sgrna_len" argument="--sgrna-len" type="integer" min="0" optional="true" label="Length of the sgRNA" help="The program will automatically determine the sgRNA length from the library file, so only use this if you turn on the --unmapped-to-file option. Default: autodetected" />
111 <param name="count_n" argument="--count-n" type="boolean" truevalue="--count-n" falsevalue="" checked="false" optional="true" label="Count sgRNAs with Ns" help="By default, sgRNAs containing Ns will be discarded" /> 119 <param name="count_n" argument="--count-n" type="boolean" truevalue="--count-n" falsevalue="" checked="false" optional="true" label="Count sgRNAs with Ns" help="By default, sgRNAs containing Ns will be discarded" />
112 <param name="reverse_complement" argument="--reverse-complement" type="boolean" truevalue="--reverse-complement" falsevalue="" checked="false" optional="true" label="Reverse complement the sequences in library for read mapping" /> 120 <param name="reverse_complement" argument="--reverse-complement" type="boolean" truevalue="--reverse-complement" falsevalue="" checked="false" optional="true" label="Reverse complement" help="Reverse complement the sequences in library for read mapping" />
113 <param name="test_run" argument="--test-run" type="boolean" truevalue="--test-run" falsevalue="" checked="false" optional="true" label="Test running" help="If this option is on, MAGeCK will only process the first 1M records for each file" /> 121 <param name="test_run" argument="--test-run" type="boolean" truevalue="--test-run" falsevalue="" checked="false" optional="true" label="Test running" help="If this option is on, MAGeCK will only process the first 1M records for each file" />
114 </section> 122 </section>
115 </inputs> 123 </inputs>
116 124
117 <outputs> 125 <outputs>
123 <filter>out['pdfreportOpt'] is True</filter> 131 <filter>out['pdfreportOpt'] is True</filter>
124 </data> 132 </data>
125 <data name="unmapped" format="tabular" from_work_dir="*.unmapped.txt" label="${tool.name} on ${on_string}: Unmapped" > 133 <data name="unmapped" format="tabular" from_work_dir="*.unmapped.txt" label="${tool.name} on ${on_string}: Unmapped" >
126 <filter>out['unmappedOpt'] is True</filter> 134 <filter>out['unmappedOpt'] is True</filter>
127 </data> 135 </data>
128 <data name="log" format="txt" from_work_dir="*.log" label="${tool.name} on ${on_string}: Log" > 136 <data name="log" format="txt" from_work_dir="output.log" label="${tool.name} on ${on_string}: Log" >
129 <filter>out['logOpt'] is True</filter> 137 <filter>out['logOpt'] is True</filter>
130 </data> 138 </data>
131 </outputs> 139 </outputs>
132 140
133 <tests> 141 <tests>
134 <!-- Ensure fastq works -->
135 <test expect_num_outputs="1">
136 <param name="sgrna_library_file" value="demo/demo2/library.txt" ftype="tabular" />
137 <param name="format_select" value="files" />
138 <param name="sample" value="demo/demo2/test1.fastq" ftype="fastq"/>
139 <output name="counts" file="out.count.fastq.txt"/>
140 </test>
141 <!-- Ensure fastq.gz input works --> 142 <!-- Ensure fastq.gz input works -->
142 <test expect_num_outputs="1"> 143 <test expect_num_outputs="1">
143 <param name="sgrna_library_file" value="demo/demo2/library.txt" ftype="tabular" /> 144 <param name="sgrna_library_file" value="demo/demo2/library.txt" ftype="tabular" />
144 <param name="format_select" value="files" /> 145 <param name="format_select" value="files" />
145 <param name="sample" value="test1.fastq.gz" ftype="fastq.gz"/> 146 <param name="sample" value="test1.fastq.gz" ftype="fastq.gz"/>
147 <output name="counts" file="out.count.fastq.txt"/>
148 </test>
149 <!-- Ensure fastq input works -->
150 <test expect_num_outputs="1">
151 <param name="sgrna_library_file" value="demo/demo2/library.txt" ftype="tabular" />
152 <param name="format_select" value="files" />
153 <param name="sample" value="demo/demo2/test1.fastq" ftype="fastq"/>
154 <param name="sample_label" value="test1.fastq.gz" />
146 <output name="counts" file="out.count.fastq.txt"/> 155 <output name="counts" file="out.count.fastq.txt"/>
147 </test> 156 </test>
148 <!-- Ensure BAM input works --> 157 <!-- Ensure BAM input works -->
149 <test expect_num_outputs="1"> 158 <test expect_num_outputs="1">
150 <param name="sgrna_library_file" value="demo/demo2/library.txt" ftype="tabular" /> 159 <param name="sgrna_library_file" value="demo/demo2/library.txt" ftype="tabular" />
179 188
180 ----- 189 -----
181 190
182 **Inputs** 191 **Inputs**
183 192
184 By default, MAGeCK count command will automatically determine the trimming length of the fastq file. 193 **Read file(s)**
194
195 **MAGeCK count** accepts one or more FASTQ.GZ, FASTQ or BAM files as input.
196
197 Since version 0.5.5, MAGeCK count module supports collecting read counts from BAM files. This will allow you to use a third-party aligner to map reads to the library with mismatches, providing more usable reads for the analysis. However, it is still recommended to directly use the fastq file in the count module (which does not allow any mismatches), because:
198
199 * Some mismatches in the sgRNAs may have unwanted behaviors (have no on-target cleavages or have other off-target cleavages);
200 * In most cases the read counts are enough if we allow no mismatches;
201 * The mapping procedure is more complicated; for example, you need to know the exact length of 3' adapter sequence.
202
203 It is also possible to input a Count Table to normalize counts and get statistics.
185 204
186 **sgRNA library file** 205 **sgRNA library file**
187 206
188 When starting from FASTQ, FASTQ.GZ or BAM files, MAGeCK needs to know the sgRNA sequences and targeting genes. Such information is provided in the sgRNA library file and can be specified in the tool form above. The sgRNA library file can be provided in .tsv or .csv format. There are three columns in the library file: the sgRNA ID, the sequence, and the gene it is targeting. 207 When starting from FASTQ, FASTQ.GZ or BAM files, MAGeCK needs to know the sgRNA sequences and targeting genes. Such information is provided in the sgRNA library file and can be specified in the tool form above. The sgRNA library file can be provided in .tsv or .csv format. There are three columns in the library file: the sgRNA ID, the sequence, and the gene it is targeting.
189 208