Mercurial > repos > iuc > mageck_count

--- a/mageck_count.xml	Wed Feb 14 06:42:18 2018 -0500
+++ b/mageck_count.xml	Sat Feb 17 10:41:26 2018 -0500
@@ -1,5 +1,5 @@
 <?xml version="1.0"?>
-<tool id="mageck_count" name="MAGeCK count" version="@VERSION@" >
+<tool id="mageck_count" name="MAGeCK count" version="@VERSION@.1" >
     <description>- collect sgRNA read counts from read mapping files</description>
     <macros>
         <import>mageck_macros.xml</import>
@@ -30,16 +30,16 @@
 #if str($reads.format_select) == "files":
     --fastq $infile
     -l '$reads.sgrna_library_file'
-#if $reads.sample_label:
-    --sample-label '$reads.sample_label'
-#end if
-
+    #if $reads.sample_label:
+        --sample-label '$reads.sample_label'
+    #else:
+        --sample-label '$reads.sample.element_identifier'
+    #end if
 #elif str($reads.format_select) == "table":
     -k '$reads.counts'
     #if '$sgrna_library_file':
         -l '$sgrna_library_file'
     #end if
-
 #end if

 -n output
@@ -54,13 +54,21 @@
 #if $adv.trim5:
    --trim-5 $adv.trim5
 #end if
+
 --norm-method $adv.norm_method
+
 #if $adv.control_sgrna:
     --control-sgrna $adv.control_sgrna
 #end if
---sgrna-len $adv.sgrna_len
+
+#if $adv.sgrna_len:
+    --sgrna-len $adv.sgrna_len
+#end if
+
 $adv.count_n
+
 $adv.reverse_complement
+
 $adv.test_run

 #if $adv.gmt_file:
@@ -81,7 +89,7 @@
             <when value="files">
                 <param name="sample" argument="--fastq" type="data" format="fastq,fastq.gz,bam" multiple="false" label="Sample reads" help="The input reads must be in FASTQ, FASTQ.GZ or BAM format and all files must be in the same format." />
                 <param name="sgrna_library_file" type="data" argument="--list-seq" format="txt,tabular,tsv,csv" label="sgRNA library file" help="A library file must be provided with three columns containing the sgRNA ID, sequence, and gene it is targeting, see Help below for more information." />
-                <param name="sample_label" argument="--sample-label" type="text" optional="true" value="" label="Sample label" help="Optionally, you can specify a sample label to use in the output file header."/>
+                <param name="sample_label" argument="--sample-label" type="text" optional="true" value="" label="Specify sample label" help="By default, the input filename will be used as the sample label. Optionally you can specify a different sample label to use."/>
             </when>
             <when value="table">
                 <param name="counts" argument="-k"  type="data" format="tabular" optional="true" label="Counts Table" help="Alternatively, a tab-separated file of read counts can be used as input. See Help below for format" />
@@ -94,12 +102,12 @@
             <param name="pdfreportOpt" argument="--pdf-report" type="boolean" truevalue="--pdf-report" falsevalue="" checked="false" optional="true" label="Output PDF report" help="Generate pdf report of the input file. Default: No" />
             <param name="unmappedOpt" argument="--unmapped-to-file" type="boolean" truevalue="--unmapped-to-file" falsevalue="" checked="false" optional="true" label="Output unmapped reads" help="Save unmapped reads to file. Default: No" />
             <param name="rscriptOpt" type="boolean" truevalue="True" falsevalue="" checked="false" optional="true" label="Output R script" help="Output the R script used to generate the plots in the pdf report. Default: No" />
-            <param name="logOpt" type="boolean" truevalue="True" falsevalue="" checked="false" label="Output logfile" help="This file includes the logging information, it will list some basic statistics of the dataset at the end" />
+            <param name="logOpt" type="boolean" truevalue="True" falsevalue="" checked="false" label="Output Log file" help="This file includes the logging information, it will list some basic statistics of the dataset at the end" />
         </section>

         <section name="adv" title="Advanced Options">
             <param name="gmt_file" argument="--gmt-file" type="data" format="tabular" optional="true" value="" label="Pathway file for QC" help="TThe pathway file used for QC, in GMT format. By default it will use the GMT file provided by MAGeCK" />
-            <param name="trim5" argument="--trim-5" type="integer" min="0" optional="true" label="5' Trim length" help="Length of trimming the 5' of the reads. Default 0" />
+            <param name="trim5" argument="--trim-5" type="integer" min="0" optional="true" label="5' Trim length" help="Length of trimming the 5' of the reads. Default: 0" />
             <param name="norm_method" argument="--norm-method" type="select" label="Method for normalization" help="Methods include: None (no normalization), Median (median normalization), Total (normalization by total read counts), Control (normalization by control sgRNAs specified by the --control-sgrna option). Default: Median" >
                 <option value="none">None</option>
                 <option value="median" selected="True">Median</option>
@@ -107,9 +115,9 @@
                 <option value="control">Control</option>
             </param>
             <param name="control_sgrna" argument="--control-sgrna" type="data" format="tabular" optional="true" label="Control sgRNAs file" help="A file of control sgRNA IDs for normalization and for generating the null distribution of RRA" />
-            <param name="sgrna_len" argument="--sgrna-len" type="integer" min="0" value="20" optional="true" label="Length of the sgRNA" help="The program will automatically determine the sgRNA length from the library file, so only use this if you turn on the --unmapped-to-file option. Default: 20" />
+            <param name="sgrna_len" argument="--sgrna-len" type="integer" min="0" optional="true" label="Length of the sgRNA" help="The program will automatically determine the sgRNA length from the library file, so only use this if you turn on the --unmapped-to-file option. Default: autodetected" />
             <param name="count_n" argument="--count-n" type="boolean" truevalue="--count-n" falsevalue="" checked="false" optional="true" label="Count sgRNAs with Ns" help="By default, sgRNAs containing Ns will be discarded" />
-            <param name="reverse_complement" argument="--reverse-complement" type="boolean" truevalue="--reverse-complement" falsevalue="" checked="false" optional="true" label="Reverse complement the sequences in library for read mapping" />
+            <param name="reverse_complement" argument="--reverse-complement" type="boolean" truevalue="--reverse-complement" falsevalue="" checked="false" optional="true" label="Reverse complement" help="Reverse complement the sequences in library for read mapping" />
             <param name="test_run" argument="--test-run" type="boolean" truevalue="--test-run" falsevalue="" checked="false" optional="true" label="Test running" help="If this option is on, MAGeCK will only process the first 1M records for each file" />
         </section>
     </inputs>
@@ -125,19 +133,12 @@
         <data name="unmapped" format="tabular" from_work_dir="*.unmapped.txt" label="${tool.name} on ${on_string}: Unmapped" >
             <filter>out['unmappedOpt'] is True</filter>
         </data>
-        <data name="log" format="txt" from_work_dir="*.log" label="${tool.name} on ${on_string}: Log" >
+        <data name="log" format="txt" from_work_dir="output.log" label="${tool.name} on ${on_string}: Log" >
             <filter>out['logOpt'] is True</filter>
         </data>
     </outputs>

     <tests>
-        <!-- Ensure fastq works -->
-        <test expect_num_outputs="1">
-            <param name="sgrna_library_file" value="demo/demo2/library.txt" ftype="tabular" />
-            <param name="format_select" value="files" />
-            <param name="sample" value="demo/demo2/test1.fastq" ftype="fastq"/>
-            <output name="counts" file="out.count.fastq.txt"/>
-        </test>
         <!-- Ensure fastq.gz input works -->
         <test expect_num_outputs="1">
             <param name="sgrna_library_file" value="demo/demo2/library.txt" ftype="tabular" />
@@ -145,6 +146,14 @@
             <param name="sample" value="test1.fastq.gz" ftype="fastq.gz"/>
             <output name="counts" file="out.count.fastq.txt"/>
         </test>
+        <!-- Ensure fastq input works -->
+        <test expect_num_outputs="1">
+            <param name="sgrna_library_file" value="demo/demo2/library.txt" ftype="tabular" />
+            <param name="format_select" value="files" />
+            <param name="sample" value="demo/demo2/test1.fastq" ftype="fastq"/>
+            <param name="sample_label" value="test1.fastq.gz" />
+            <output name="counts" file="out.count.fastq.txt"/>
+        </test>
         <!-- Ensure BAM input works -->
         <test expect_num_outputs="1">
             <param name="sgrna_library_file" value="demo/demo2/library.txt" ftype="tabular" />
@@ -181,7 +190,17 @@

 **Inputs**

-By default, MAGeCK count command will automatically determine the trimming length of the fastq file.
+**Read file(s)**
+
+**MAGeCK count** accepts one or more FASTQ.GZ, FASTQ or BAM files as input.
+
+Since version 0.5.5, MAGeCK count module supports collecting read counts from BAM files. This will allow you to use a third-party aligner to map reads to the library with mismatches, providing more usable reads for the analysis. However, it is still recommended to directly use the fastq file in the count module (which does not allow any mismatches), because:
+
+* Some mismatches in the sgRNAs may have unwanted behaviors (have no on-target cleavages or have other off-target cleavages);
+* In most cases the read counts are enough if we allow no mismatches;
+* The mapping procedure is more complicated; for example, you need to know the exact length of 3' adapter sequence.
+
+It is also possible to input a Count Table to normalize counts and get statistics.

 **sgRNA library file**
--- a/test-data/out.count.bam.txt	Wed Feb 14 06:42:18 2018 -0500
+++ b/test-data/out.count.bam.txt	Sat Feb 17 10:41:26 2018 -0500
@@ -1,4 +1,4 @@
-sgRNA	Gene	sample1
+sgRNA	Gene	test1.bam
 s_10007	CCNA1	0
 s_10008	CCNA1	0
 s_10027	CCNC	0
--- a/test-data/out.count.fastq.txt	Wed Feb 14 06:42:18 2018 -0500
+++ b/test-data/out.count.fastq.txt	Sat Feb 17 10:41:26 2018 -0500
@@ -1,4 +1,4 @@
-sgRNA	Gene	sample1
+sgRNA	Gene	test1.fastq.gz
 s_47512	RNF111	1
 s_24835	HCFC1R1	1
 s_14784	CYP4B1	4