Mercurial > repos > iuc > mageck_count

diff mageck_count.xml @ 2:9527a3d6ebd2 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mageck commit 49e456dda49db1f52fc876f406a10273a408b1a2
author: iuc
date: Wed, 04 Apr 2018 11:03:29 -0400
parents: 4d72d204dcfa
children: d8f26ae1e909
--- a/mageck_count.xml	Sat Feb 17 10:41:26 2018 -0500
+++ b/mageck_count.xml	Wed Apr 04 11:03:29 2018 -0400
@@ -1,5 +1,5 @@
 <?xml version="1.0"?>
-<tool id="mageck_count" name="MAGeCK count" version="@VERSION@.1" >
+<tool id="mageck_count" name="MAGeCK count" version="@VERSION@.2" >
     <description>- collect sgRNA read counts from read mapping files</description>
     <macros>
         <import>mageck_macros.xml</import>
@@ -11,30 +11,40 @@
     <command detect_errors="exit_code"><![CDATA[
 
 #if str($reads.format_select) == "files":
+    #import re
+    #set $names = []
+    #for $i, $sample in enumerate($reads.sample):
 
-    #if $reads.sample.is_of_type('fastq.gz', 'fastqsanger.gz'):
-        ln -s '${reads.sample}' 'input.gz' &&                                                         
-        #set $infile = 'input.gz'
-    #elif $reads.sample.is_of_type('fastq'):
-        ln -s '${reads.sample}' 'input.fastq' &&
-        #set $infile = 'input.fastq'
-    #elif $reads.sample.is_of_type('bam'):
-        ln -s '${reads.sample}' 'input.bam' &&                                            
-        #set $infile = 'input.bam'
-    #end if
+        #if $sample.is_of_type('fastq.gz', 'fastqsanger.gz'):
+            ln -s '${sample}' input_${i}.gz &&
+            #set $infile = 'input' + str($i) + '.gz'
+        #elif $sample.is_of_type('fastq'):
+            ln -s '${sample}' input_${i}.fastq &&
+            #set $infile = 'input' + str($i) + 'fastq'
+        #elif $sample.is_of_type('bam'):
+            ln -s '${sample}' input_${i}.bam &&
+            #set $infile = 'input' + str($i) + 'bam'
+        #end if
+        #silent $names.append(re.sub('[^\w\-\s]', '_', str($sample.element_identifier)))
+    #end for
 
 #end if
 
+
 mageck count
 
 #if str($reads.format_select) == "files":
-    --fastq $infile
+
     -l '$reads.sgrna_library_file'
+
+    --fastq input_*
+
     #if $reads.sample_label:
         --sample-label '$reads.sample_label'
     #else:
-        --sample-label '$reads.sample.element_identifier'
+        --sample-label ${ ','.join( $names ) }
     #end if
+
 #elif str($reads.format_select) == "table":
     -k '$reads.counts'
     #if '$sgrna_library_file':
@@ -55,7 +65,9 @@
    --trim-5 $adv.trim5
 #end if
 
---norm-method $adv.norm_method
+#if $adv.norm_method:
+    --norm-method $adv.norm_method
+#end if
 
 #if $adv.control_sgrna:
     --control-sgrna $adv.control_sgrna
@@ -79,6 +91,7 @@
   &&
   gs -dBATCH -dNOPAUSE -q -dPDFSETTINGS=/prepress -sDEVICE=pdfwrite -sOutputFile=merged.pdf *.pdf
 #end if
+
     ]]></command>
     <inputs>
         <conditional name="reads">
@@ -87,9 +100,11 @@
                 <option value="table">Single Count table</option>
             </param>
             <when value="files">
-                <param name="sample" argument="--fastq" type="data" format="fastq,fastq.gz,bam" multiple="false" label="Sample reads" help="The input reads must be in FASTQ, FASTQ.GZ or BAM format and all files must be in the same format." />
+                <param name="sample" argument="--fastq" type="data" format="fastq,fastq.gz,bam" multiple="true" label="Sample reads" help="The input reads must be in FASTQ, FASTQ.GZ or BAM format and all files must be in the same format." />
                 <param name="sgrna_library_file" type="data" argument="--list-seq" format="txt,tabular,tsv,csv" label="sgRNA library file" help="A library file must be provided with three columns containing the sgRNA ID, sequence, and gene it is targeting, see Help below for more information." />
-                <param name="sample_label" argument="--sample-label" type="text" optional="true" value="" label="Specify sample label" help="By default, the input filename will be used as the sample label. Optionally you can specify a different sample label to use."/>
+                <param name="sample_label" argument="--sample-label" type="text" optional="true" value="" label="Specify sample labels" help="By default, the input filenames will be used as the sample labels. Optionally you can specify different sample labels to use which must be separated by comma (,). Must be equal to the number of samples provided in --fastq option.">
+                    <validator type="regex" message="Please only use letters, numbers or underscores in sample labels, and separate labels by commas">^[\w,]+$</validator>
+                </param>
             </when>
             <when value="table">
                 <param name="counts" argument="-k"  type="data" format="tabular" optional="true" label="Counts Table" help="Alternatively, a tab-separated file of read counts can be used as input. See Help below for format" />
@@ -98,19 +113,20 @@
         </conditional>
 
         <section name="out" title="Output Options">
-            <param name="countsummaryOpt" type="boolean" truevalue="True" falsevalue="" checked="false" optional="true" label="Output summary statistics" help="Output summary statistics of the fastq files. Default: No" />
-            <param name="pdfreportOpt" argument="--pdf-report" type="boolean" truevalue="--pdf-report" falsevalue="" checked="false" optional="true" label="Output PDF report" help="Generate pdf report of the input file. Default: No" />
-            <param name="unmappedOpt" argument="--unmapped-to-file" type="boolean" truevalue="--unmapped-to-file" falsevalue="" checked="false" optional="true" label="Output unmapped reads" help="Save unmapped reads to file. Default: No" />
-            <param name="rscriptOpt" type="boolean" truevalue="True" falsevalue="" checked="false" optional="true" label="Output R script" help="Output the R script used to generate the plots in the pdf report. Default: No" />
-            <param name="logOpt" type="boolean" truevalue="True" falsevalue="" checked="false" label="Output Log file" help="This file includes the logging information, it will list some basic statistics of the dataset at the end" />
+            <param name="countsummaryOpt" type="boolean" truevalue="True" falsevalue="" checked="false" optional="true" label="Output Count Summary file" help="Output summary statistics of the fastq files. Default: No" />
+            <param name="normcountsOpt" type="boolean" truevalue="True" falsevalue="" checked="false" optional="true" label="Output Normalized Counts file" help="Default: No" />
+            <param name="pdfreportOpt" argument="--pdf-report" type="boolean" truevalue="--pdf-report" falsevalue="" checked="false" optional="true"  label="Output plots" help="Generate PDF of the plots. Default: No" />
+            <param name="unmappedOpt" argument="--unmapped-to-file" type="boolean" truevalue="--unmapped-to-file" falsevalue="" checked="false" optional="true" label="Output Unmapped reads" help="Save nmapped reads to file. Default: No" />
+            <param name="rfilesOpt" type="boolean" truevalue="True" falsevalue="" checked="false" optional="true" label="Output R files" help="Output the .R and .Rnw files used to generate the plots in the PDF report. The median-normalized read counts file will also be output as it is required to regenerate the plots. Default: No" />
+            <param name="logOpt" type="boolean" truevalue="True" falsevalue="" checked="false" label="Output Log file" help="This file includes the logging information, it will list some basic statistics of the dataset at the end. Default: No" />
         </section>
 
         <section name="adv" title="Advanced Options">
-            <param name="gmt_file" argument="--gmt-file" type="data" format="tabular" optional="true" value="" label="Pathway file for QC" help="TThe pathway file used for QC, in GMT format. By default it will use the GMT file provided by MAGeCK" />
+            <param name="gmt_file" argument="--gmt-file" type="data" format="tabular" optional="true" value="" label="Pathway file for QC" help="The pathway file used for QC, in GMT format. By default it will use the GMT file provided by MAGeCK" />
             <param name="trim5" argument="--trim-5" type="integer" min="0" optional="true" label="5' Trim length" help="Length of trimming the 5' of the reads. Default: 0" />
-            <param name="norm_method" argument="--norm-method" type="select" label="Method for normalization" help="Methods include: None (no normalization), Median (median normalization), Total (normalization by total read counts), Control (normalization by control sgRNAs specified by the --control-sgrna option). Default: Median" >
+            <param name="norm_method" argument="--norm-method" type="select" optional="true" label="Method for normalization" help="Methods include: None (no normalization), Median (median normalization), Total (normalization by total read counts), Control (normalization by control sgRNAs specified by the --control-sgrna option). Default: Median" >
+                <option value="median" selected="True">Median</option>
                 <option value="none">None</option>
-                <option value="median" selected="True">Median</option>
                 <option value="total">Total</option>
                 <option value="control">Control</option>
             </param>
@@ -123,19 +139,28 @@
     </inputs>
 
     <outputs>
-        <data name="counts" format="tabular" from_work_dir="*.count.txt" label="${tool.name} on ${on_string}: sgRNA Counts" />
-        <data name="countsummary" format="tabular" from_work_dir="*.countsummary.txt" label="${tool.name} on ${on_string}: sgRNA Count Summary" >
+        <data name="counts" format="tabular" from_work_dir="output.count.txt" label="${tool.name} on ${on_string}: sgRNA Counts" />
+        <data name="countsummary" format="tabular" from_work_dir="output.countsummary.txt" label="${tool.name} on ${on_string}: sgRNA Count Summary" >
             <filter>out['countsummaryOpt'] is True</filter>
         </data>
-        <data name="pdfreport" format="pdf" from_work_dir="merged.pdf" label="${tool.name} on ${on_string}: PDF Report"  >
-            <filter>out['pdfreportOpt'] is True</filter>
+        <data name="normcounts" format="tabular" from_work_dir="output.count_normalized.txt" label="${tool.name} on ${on_string}: Normalized counts" >
+            <filter>out['normcountsOpt'] is True or out['rfilesOpt'] is True</filter>
         </data>
-        <data name="unmapped" format="tabular" from_work_dir="*.unmapped.txt" label="${tool.name} on ${on_string}: Unmapped" >
+        <data name="unmapped" format="tabular" from_work_dir="output.unmapped.txt" label="${tool.name} on ${on_string}: Unmapped" >
             <filter>out['unmappedOpt'] is True</filter>
         </data>
+        <data name="pdfreport" format="pdf" from_work_dir="merged.pdf" label="${tool.name} on ${on_string}: PDF Report" >
+            <filter>out['pdfreportOpt'] is True</filter>
+        </data>
         <data name="log" format="txt" from_work_dir="output.log" label="${tool.name} on ${on_string}: Log" >
             <filter>out['logOpt'] is True</filter>
         </data>
+        <data name="rscript" format="txt" from_work_dir="output_countsummary.R" label="${tool.name} on ${on_string}: R file"  >
+            <filter>out['rfilesOpt'] is True</filter>
+        </data>
+        <data name="rnwfile" format="txt" from_work_dir="output_countsummary.Rnw" label="${tool.name} on ${on_string}: Rnw file"  >
+            <filter>out['rfilesOpt'] is True</filter>
+        </data>
     </outputs>
 
     <tests>
@@ -144,15 +169,24 @@
             <param name="sgrna_library_file" value="demo/demo2/library.txt" ftype="tabular" />
             <param name="format_select" value="files" />
             <param name="sample" value="test1.fastq.gz" ftype="fastq.gz"/>
-            <output name="counts" file="out.count.fastq.txt"/>
+            <output name="counts" file="out.count.txt"/>
         </test>
-        <!-- Ensure fastq input works -->
+        <!-- Ensure multiple fastq.gz input works with report -->
+        <test expect_num_outputs="2">
+            <param name="sgrna_library_file" value="demo/demo2/library.txt" ftype="tabular" />
+            <param name="format_select" value="files" />
+            <param name="sample" value="test1.fastq.gz,test2.fastq.gz" ftype="fastq.gz"/>
+            <param name="pdfreportOpt" value="True" />
+            <output name="counts" file="out.count_multi.txt"/>
+            <output name="pdfreport" file="out.countsummary_multi.pdf" compare="sim_size" />
+        </test>
+         <!-- Ensure fastq input works -->
         <test expect_num_outputs="1">
             <param name="sgrna_library_file" value="demo/demo2/library.txt" ftype="tabular" />
             <param name="format_select" value="files" />
             <param name="sample" value="demo/demo2/test1.fastq" ftype="fastq"/>
-            <param name="sample_label" value="test1.fastq.gz" />
-            <output name="counts" file="out.count.fastq.txt"/>
+            <param name="sample_label" value="test1_fastq_gz" />
+            <output name="counts" file="out.count.txt"/>
         </test>
         <!-- Ensure BAM input works -->
         <test expect_num_outputs="1">
@@ -162,20 +196,23 @@
             <output name="counts" file="out.count.bam.txt"/>
         </test>
         <!-- Ensure optional outputs work -->
-        <test expect_num_outputs="5">
+        <test expect_num_outputs="8">
             <param name="sgrna_library_file" value="demo/demo2/library.txt" ftype="tabular" />
             <param name="format_select" value="files" />
             <param name="sample" value="test1.fastq.gz" ftype="fastq.gz"/>
             <param name="countsummaryOpt" value="True" />
             <param name="unmappedOpt" value="True" />
             <param name="pdfreportOpt" value="True" />
-            <param name="rscriptOpt" value="True" />
+            <param name="rfilesOpt" value="True" />
             <param name="logOpt" value="True" />
             <output name="counts" file="out.count.fastq.txt"/>
             <output name="countsummary" file="out.countsummary.txt" compare="sim_size"/>
+            <output name="normcounts" file="output.count_normalized.txt"/>
             <output name="log" file="out.count.log.txt" compare="sim_size"/>
             <output name="unmapped" file="out.count.unmapped.txt" />
             <output name="pdfreport" file="out.countsummary.pdf" compare="sim_size" />
+            <output name="rscript" file="out.count.R" />
+            <output name="rnwfile" file="output_countsummary.Rnw" />
         </test>
     </tests>
 
@@ -190,7 +227,7 @@
 
 **Inputs**
 
-**Read file(s)**
+**Read files**
 
 **MAGeCK count** accepts one or more FASTQ.GZ, FASTQ or BAM files as input.
 
@@ -224,26 +261,39 @@
 
 **Outputs**
 
+This tool outputs
+
+    * an sgRNA Counts table
+
+Optionally, under **Output Options** you can choose to output
+
+    * a Count Summary file
+    * a PDF report
+    * a Normalized Counts table
+    * an Unmapped reads file
+    * the .R and .Rnw files used to generate the plots and PDF
+    * a Log file of the analysis
+
 **sgRNA Count file**
 
 An example of the sgRNA count output file is shown below. This file can be used with **MAGeCK test**.
 
 Example:
 
-    ==============  ========    ================
-    **sgRNA**       **Gene**    **Sample Label**
-    --------------  --------    ----------------
-    A1CF_m52595977  A1CF        213             
-    A1CF_m52596017  A1CF        294             
-    A1CF_m52596056  A1CF        421             
-    A1CF_m52603842  A1CF        274             
-    A1CF_m52603847  A1CF        0               
-    ==============  ========    ================
+    ==============  ========    =========== ===========
+    **sgRNA**       **Gene**    **Sample1** **Sample2**
+    --------------  --------    ----------- -----------
+    A1CF_m52595977  A1CF        213         199    
+    A1CF_m52596017  A1CF        294         164    
+    A1CF_m52596056  A1CF        421         378    
+    A1CF_m52603842  A1CF        274         281    
+    A1CF_m52603847  A1CF        0           0    
+    ==============  ========    =========== ===========
 
 
 **Count Summary**
 
-MAGeCK can produce a **Count Summary** file containing statistics of the input file (the statistics of fastq file are also in the PDF report). An example count summary file is shown below.
+MAGeCK can produce a **Count Summary** file containing statistics of the input files (the statistics of fastq files are also in the PDF report). An example count summary file is shown below.
 
 Example:
author	iuc
date	Wed, 04 Apr 2018 11:03:29 -0400
parents	4d72d204dcfa
children	d8f26ae1e909