Mercurial > repos > iuc > mageck_count
comparison mageck_count.xml @ 1:4d72d204dcfa draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mageck commit 2eea865fe331694058922292e5681b96f4f0b4c7
author | iuc |
---|---|
date | Sat, 17 Feb 2018 10:41:26 -0500 |
parents | b80c0e046539 |
children | 9527a3d6ebd2 |
comparison
equal
deleted
inserted
replaced
0:b80c0e046539 | 1:4d72d204dcfa |
---|---|
1 <?xml version="1.0"?> | 1 <?xml version="1.0"?> |
2 <tool id="mageck_count" name="MAGeCK count" version="@VERSION@" > | 2 <tool id="mageck_count" name="MAGeCK count" version="@VERSION@.1" > |
3 <description>- collect sgRNA read counts from read mapping files</description> | 3 <description>- collect sgRNA read counts from read mapping files</description> |
4 <macros> | 4 <macros> |
5 <import>mageck_macros.xml</import> | 5 <import>mageck_macros.xml</import> |
6 </macros> | 6 </macros> |
7 <expand macro="requirements"> | 7 <expand macro="requirements"> |
28 mageck count | 28 mageck count |
29 | 29 |
30 #if str($reads.format_select) == "files": | 30 #if str($reads.format_select) == "files": |
31 --fastq $infile | 31 --fastq $infile |
32 -l '$reads.sgrna_library_file' | 32 -l '$reads.sgrna_library_file' |
33 #if $reads.sample_label: | 33 #if $reads.sample_label: |
34 --sample-label '$reads.sample_label' | 34 --sample-label '$reads.sample_label' |
35 #end if | 35 #else: |
36 | 36 --sample-label '$reads.sample.element_identifier' |
37 #end if | |
37 #elif str($reads.format_select) == "table": | 38 #elif str($reads.format_select) == "table": |
38 -k '$reads.counts' | 39 -k '$reads.counts' |
39 #if '$sgrna_library_file': | 40 #if '$sgrna_library_file': |
40 -l '$sgrna_library_file' | 41 -l '$sgrna_library_file' |
41 #end if | 42 #end if |
42 | |
43 #end if | 43 #end if |
44 | 44 |
45 -n output | 45 -n output |
46 | 46 |
47 #if $out.pdfreportOpt: | 47 #if $out.pdfreportOpt: |
52 $out.unmappedOpt | 52 $out.unmappedOpt |
53 | 53 |
54 #if $adv.trim5: | 54 #if $adv.trim5: |
55 --trim-5 $adv.trim5 | 55 --trim-5 $adv.trim5 |
56 #end if | 56 #end if |
57 | |
57 --norm-method $adv.norm_method | 58 --norm-method $adv.norm_method |
59 | |
58 #if $adv.control_sgrna: | 60 #if $adv.control_sgrna: |
59 --control-sgrna $adv.control_sgrna | 61 --control-sgrna $adv.control_sgrna |
60 #end if | 62 #end if |
61 --sgrna-len $adv.sgrna_len | 63 |
64 #if $adv.sgrna_len: | |
65 --sgrna-len $adv.sgrna_len | |
66 #end if | |
67 | |
62 $adv.count_n | 68 $adv.count_n |
69 | |
63 $adv.reverse_complement | 70 $adv.reverse_complement |
71 | |
64 $adv.test_run | 72 $adv.test_run |
65 | 73 |
66 #if $adv.gmt_file: | 74 #if $adv.gmt_file: |
67 --gmt-file '$adv.gmt_file' | 75 --gmt-file '$adv.gmt_file' |
68 #end if | 76 #end if |
79 <option value="table">Single Count table</option> | 87 <option value="table">Single Count table</option> |
80 </param> | 88 </param> |
81 <when value="files"> | 89 <when value="files"> |
82 <param name="sample" argument="--fastq" type="data" format="fastq,fastq.gz,bam" multiple="false" label="Sample reads" help="The input reads must be in FASTQ, FASTQ.GZ or BAM format and all files must be in the same format." /> | 90 <param name="sample" argument="--fastq" type="data" format="fastq,fastq.gz,bam" multiple="false" label="Sample reads" help="The input reads must be in FASTQ, FASTQ.GZ or BAM format and all files must be in the same format." /> |
83 <param name="sgrna_library_file" type="data" argument="--list-seq" format="txt,tabular,tsv,csv" label="sgRNA library file" help="A library file must be provided with three columns containing the sgRNA ID, sequence, and gene it is targeting, see Help below for more information." /> | 91 <param name="sgrna_library_file" type="data" argument="--list-seq" format="txt,tabular,tsv,csv" label="sgRNA library file" help="A library file must be provided with three columns containing the sgRNA ID, sequence, and gene it is targeting, see Help below for more information." /> |
84 <param name="sample_label" argument="--sample-label" type="text" optional="true" value="" label="Sample label" help="Optionally, you can specify a sample label to use in the output file header."/> | 92 <param name="sample_label" argument="--sample-label" type="text" optional="true" value="" label="Specify sample label" help="By default, the input filename will be used as the sample label. Optionally you can specify a different sample label to use."/> |
85 </when> | 93 </when> |
86 <when value="table"> | 94 <when value="table"> |
87 <param name="counts" argument="-k" type="data" format="tabular" optional="true" label="Counts Table" help="Alternatively, a tab-separated file of read counts can be used as input. See Help below for format" /> | 95 <param name="counts" argument="-k" type="data" format="tabular" optional="true" label="Counts Table" help="Alternatively, a tab-separated file of read counts can be used as input. See Help below for format" /> |
88 <param name="sgrna_library_file" type="data" argument="--list-seq" format="txt,tabular,tsv,csv" optional="True" label="sgRNA library file" help="Optionally, a library file can be provided with three columns containing the sgRNA ID, sequence, and gene it is targeting, see Help below for more information." /> | 96 <param name="sgrna_library_file" type="data" argument="--list-seq" format="txt,tabular,tsv,csv" optional="True" label="sgRNA library file" help="Optionally, a library file can be provided with three columns containing the sgRNA ID, sequence, and gene it is targeting, see Help below for more information." /> |
89 </when> | 97 </when> |
92 <section name="out" title="Output Options"> | 100 <section name="out" title="Output Options"> |
93 <param name="countsummaryOpt" type="boolean" truevalue="True" falsevalue="" checked="false" optional="true" label="Output summary statistics" help="Output summary statistics of the fastq files. Default: No" /> | 101 <param name="countsummaryOpt" type="boolean" truevalue="True" falsevalue="" checked="false" optional="true" label="Output summary statistics" help="Output summary statistics of the fastq files. Default: No" /> |
94 <param name="pdfreportOpt" argument="--pdf-report" type="boolean" truevalue="--pdf-report" falsevalue="" checked="false" optional="true" label="Output PDF report" help="Generate pdf report of the input file. Default: No" /> | 102 <param name="pdfreportOpt" argument="--pdf-report" type="boolean" truevalue="--pdf-report" falsevalue="" checked="false" optional="true" label="Output PDF report" help="Generate pdf report of the input file. Default: No" /> |
95 <param name="unmappedOpt" argument="--unmapped-to-file" type="boolean" truevalue="--unmapped-to-file" falsevalue="" checked="false" optional="true" label="Output unmapped reads" help="Save unmapped reads to file. Default: No" /> | 103 <param name="unmappedOpt" argument="--unmapped-to-file" type="boolean" truevalue="--unmapped-to-file" falsevalue="" checked="false" optional="true" label="Output unmapped reads" help="Save unmapped reads to file. Default: No" /> |
96 <param name="rscriptOpt" type="boolean" truevalue="True" falsevalue="" checked="false" optional="true" label="Output R script" help="Output the R script used to generate the plots in the pdf report. Default: No" /> | 104 <param name="rscriptOpt" type="boolean" truevalue="True" falsevalue="" checked="false" optional="true" label="Output R script" help="Output the R script used to generate the plots in the pdf report. Default: No" /> |
97 <param name="logOpt" type="boolean" truevalue="True" falsevalue="" checked="false" label="Output logfile" help="This file includes the logging information, it will list some basic statistics of the dataset at the end" /> | 105 <param name="logOpt" type="boolean" truevalue="True" falsevalue="" checked="false" label="Output Log file" help="This file includes the logging information, it will list some basic statistics of the dataset at the end" /> |
98 </section> | 106 </section> |
99 | 107 |
100 <section name="adv" title="Advanced Options"> | 108 <section name="adv" title="Advanced Options"> |
101 <param name="gmt_file" argument="--gmt-file" type="data" format="tabular" optional="true" value="" label="Pathway file for QC" help="TThe pathway file used for QC, in GMT format. By default it will use the GMT file provided by MAGeCK" /> | 109 <param name="gmt_file" argument="--gmt-file" type="data" format="tabular" optional="true" value="" label="Pathway file for QC" help="TThe pathway file used for QC, in GMT format. By default it will use the GMT file provided by MAGeCK" /> |
102 <param name="trim5" argument="--trim-5" type="integer" min="0" optional="true" label="5' Trim length" help="Length of trimming the 5' of the reads. Default 0" /> | 110 <param name="trim5" argument="--trim-5" type="integer" min="0" optional="true" label="5' Trim length" help="Length of trimming the 5' of the reads. Default: 0" /> |
103 <param name="norm_method" argument="--norm-method" type="select" label="Method for normalization" help="Methods include: None (no normalization), Median (median normalization), Total (normalization by total read counts), Control (normalization by control sgRNAs specified by the --control-sgrna option). Default: Median" > | 111 <param name="norm_method" argument="--norm-method" type="select" label="Method for normalization" help="Methods include: None (no normalization), Median (median normalization), Total (normalization by total read counts), Control (normalization by control sgRNAs specified by the --control-sgrna option). Default: Median" > |
104 <option value="none">None</option> | 112 <option value="none">None</option> |
105 <option value="median" selected="True">Median</option> | 113 <option value="median" selected="True">Median</option> |
106 <option value="total">Total</option> | 114 <option value="total">Total</option> |
107 <option value="control">Control</option> | 115 <option value="control">Control</option> |
108 </param> | 116 </param> |
109 <param name="control_sgrna" argument="--control-sgrna" type="data" format="tabular" optional="true" label="Control sgRNAs file" help="A file of control sgRNA IDs for normalization and for generating the null distribution of RRA" /> | 117 <param name="control_sgrna" argument="--control-sgrna" type="data" format="tabular" optional="true" label="Control sgRNAs file" help="A file of control sgRNA IDs for normalization and for generating the null distribution of RRA" /> |
110 <param name="sgrna_len" argument="--sgrna-len" type="integer" min="0" value="20" optional="true" label="Length of the sgRNA" help="The program will automatically determine the sgRNA length from the library file, so only use this if you turn on the --unmapped-to-file option. Default: 20" /> | 118 <param name="sgrna_len" argument="--sgrna-len" type="integer" min="0" optional="true" label="Length of the sgRNA" help="The program will automatically determine the sgRNA length from the library file, so only use this if you turn on the --unmapped-to-file option. Default: autodetected" /> |
111 <param name="count_n" argument="--count-n" type="boolean" truevalue="--count-n" falsevalue="" checked="false" optional="true" label="Count sgRNAs with Ns" help="By default, sgRNAs containing Ns will be discarded" /> | 119 <param name="count_n" argument="--count-n" type="boolean" truevalue="--count-n" falsevalue="" checked="false" optional="true" label="Count sgRNAs with Ns" help="By default, sgRNAs containing Ns will be discarded" /> |
112 <param name="reverse_complement" argument="--reverse-complement" type="boolean" truevalue="--reverse-complement" falsevalue="" checked="false" optional="true" label="Reverse complement the sequences in library for read mapping" /> | 120 <param name="reverse_complement" argument="--reverse-complement" type="boolean" truevalue="--reverse-complement" falsevalue="" checked="false" optional="true" label="Reverse complement" help="Reverse complement the sequences in library for read mapping" /> |
113 <param name="test_run" argument="--test-run" type="boolean" truevalue="--test-run" falsevalue="" checked="false" optional="true" label="Test running" help="If this option is on, MAGeCK will only process the first 1M records for each file" /> | 121 <param name="test_run" argument="--test-run" type="boolean" truevalue="--test-run" falsevalue="" checked="false" optional="true" label="Test running" help="If this option is on, MAGeCK will only process the first 1M records for each file" /> |
114 </section> | 122 </section> |
115 </inputs> | 123 </inputs> |
116 | 124 |
117 <outputs> | 125 <outputs> |
123 <filter>out['pdfreportOpt'] is True</filter> | 131 <filter>out['pdfreportOpt'] is True</filter> |
124 </data> | 132 </data> |
125 <data name="unmapped" format="tabular" from_work_dir="*.unmapped.txt" label="${tool.name} on ${on_string}: Unmapped" > | 133 <data name="unmapped" format="tabular" from_work_dir="*.unmapped.txt" label="${tool.name} on ${on_string}: Unmapped" > |
126 <filter>out['unmappedOpt'] is True</filter> | 134 <filter>out['unmappedOpt'] is True</filter> |
127 </data> | 135 </data> |
128 <data name="log" format="txt" from_work_dir="*.log" label="${tool.name} on ${on_string}: Log" > | 136 <data name="log" format="txt" from_work_dir="output.log" label="${tool.name} on ${on_string}: Log" > |
129 <filter>out['logOpt'] is True</filter> | 137 <filter>out['logOpt'] is True</filter> |
130 </data> | 138 </data> |
131 </outputs> | 139 </outputs> |
132 | 140 |
133 <tests> | 141 <tests> |
134 <!-- Ensure fastq works --> | |
135 <test expect_num_outputs="1"> | |
136 <param name="sgrna_library_file" value="demo/demo2/library.txt" ftype="tabular" /> | |
137 <param name="format_select" value="files" /> | |
138 <param name="sample" value="demo/demo2/test1.fastq" ftype="fastq"/> | |
139 <output name="counts" file="out.count.fastq.txt"/> | |
140 </test> | |
141 <!-- Ensure fastq.gz input works --> | 142 <!-- Ensure fastq.gz input works --> |
142 <test expect_num_outputs="1"> | 143 <test expect_num_outputs="1"> |
143 <param name="sgrna_library_file" value="demo/demo2/library.txt" ftype="tabular" /> | 144 <param name="sgrna_library_file" value="demo/demo2/library.txt" ftype="tabular" /> |
144 <param name="format_select" value="files" /> | 145 <param name="format_select" value="files" /> |
145 <param name="sample" value="test1.fastq.gz" ftype="fastq.gz"/> | 146 <param name="sample" value="test1.fastq.gz" ftype="fastq.gz"/> |
147 <output name="counts" file="out.count.fastq.txt"/> | |
148 </test> | |
149 <!-- Ensure fastq input works --> | |
150 <test expect_num_outputs="1"> | |
151 <param name="sgrna_library_file" value="demo/demo2/library.txt" ftype="tabular" /> | |
152 <param name="format_select" value="files" /> | |
153 <param name="sample" value="demo/demo2/test1.fastq" ftype="fastq"/> | |
154 <param name="sample_label" value="test1.fastq.gz" /> | |
146 <output name="counts" file="out.count.fastq.txt"/> | 155 <output name="counts" file="out.count.fastq.txt"/> |
147 </test> | 156 </test> |
148 <!-- Ensure BAM input works --> | 157 <!-- Ensure BAM input works --> |
149 <test expect_num_outputs="1"> | 158 <test expect_num_outputs="1"> |
150 <param name="sgrna_library_file" value="demo/demo2/library.txt" ftype="tabular" /> | 159 <param name="sgrna_library_file" value="demo/demo2/library.txt" ftype="tabular" /> |
179 | 188 |
180 ----- | 189 ----- |
181 | 190 |
182 **Inputs** | 191 **Inputs** |
183 | 192 |
184 By default, MAGeCK count command will automatically determine the trimming length of the fastq file. | 193 **Read file(s)** |
194 | |
195 **MAGeCK count** accepts one or more FASTQ.GZ, FASTQ or BAM files as input. | |
196 | |
197 Since version 0.5.5, MAGeCK count module supports collecting read counts from BAM files. This will allow you to use a third-party aligner to map reads to the library with mismatches, providing more usable reads for the analysis. However, it is still recommended to directly use the fastq file in the count module (which does not allow any mismatches), because: | |
198 | |
199 * Some mismatches in the sgRNAs may have unwanted behaviors (have no on-target cleavages or have other off-target cleavages); | |
200 * In most cases the read counts are enough if we allow no mismatches; | |
201 * The mapping procedure is more complicated; for example, you need to know the exact length of 3' adapter sequence. | |
202 | |
203 It is also possible to input a Count Table to normalize counts and get statistics. | |
185 | 204 |
186 **sgRNA library file** | 205 **sgRNA library file** |
187 | 206 |
188 When starting from FASTQ, FASTQ.GZ or BAM files, MAGeCK needs to know the sgRNA sequences and targeting genes. Such information is provided in the sgRNA library file and can be specified in the tool form above. The sgRNA library file can be provided in .tsv or .csv format. There are three columns in the library file: the sgRNA ID, the sequence, and the gene it is targeting. | 207 When starting from FASTQ, FASTQ.GZ or BAM files, MAGeCK needs to know the sgRNA sequences and targeting genes. Such information is provided in the sgRNA library file and can be specified in the tool form above. The sgRNA library file can be provided in .tsv or .csv format. There are three columns in the library file: the sgRNA ID, the sequence, and the gene it is targeting. |
189 | 208 |