changeset 4:63030102d46e draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/trinity commit bc4ea54f0deec4ddec8e6cf79fd547491e165686
author iuc
date Mon, 28 Aug 2017 16:53:59 -0400
parents 24d072085816
children 96be11bb913d
files analyze_diff_expr.xml test-data/count/kallisto/abundance.tsv test-data/count/kallisto/abundance.tsv.genes test-data/count/kallisto/abundance_B.tsv test-data/count/kallisto/abundance_B.tsv.genes
diffstat 5 files changed, 97 insertions(+), 30 deletions(-) [+]
line wrap: on
line diff
--- a/analyze_diff_expr.xml	Fri Mar 31 11:37:35 2017 -0400
+++ b/analyze_diff_expr.xml	Mon Aug 28 16:53:59 2017 -0400
@@ -1,4 +1,4 @@
-<tool id="trinity_analyze_diff_expr" name="Extract and cluster differentially expressed transcripts" version="@WRAPPER_VERSION@.0">
+<tool id="trinity_analyze_diff_expr" name="Extract and cluster differentially expressed transcripts" version="@WRAPPER_VERSION@.2">
     <description>from a Trinity assembly</description>
     <macros>
         <import>macros.xml</import>
@@ -6,19 +6,29 @@
     <expand macro="requirements">
         <requirement type="package" version="2.6.0">bioconductor-qvalue</requirement>
         <requirement type="package" version="1.26.0">bioconductor-goseq</requirement>
-        <requirement type="package" version="3.4.0">bioconductor-go.db</requirement>
+        <requirement type="package" version="2.0.6">r-cluster</requirement>
     </expand>
     <command detect_errors="aggressive"><![CDATA[
     ## DE results input files must be in the working directory and have suffix .DE_results
     #import re
     #for $input in $DE_results
-        ln -s "${input}" "${re.sub('[^\w\-_.]', '_', input.element_identifier)}.DE_results"
+        #if re.search('.DE_results$',input.element_identifier)
+            ## General case, where DE results files have been previously generated by run_de_analysis.pl
+            ln -s "${input}" "${re.sub('[^\w\-_.]', '_', input.element_identifier)}"
+        #else
+            ## Particular case, where DE results files have non-standard names
+            ln -s "${input}" "${re.sub('[^\w\-_.]', '_', input.element_identifier)}.DE_results"
+        #end if
         &&
     #end for
-    #for $DE_matrix in $DE_matrices
-        ln -s "${DE_matrix}" "${re.sub('[^\w\-_.]', '_', DE_matrix.element_identifier)}.count_matrix"
-        &&
-    #end for
+    #if str( $additional_params.GO_enrichment.examine_GO_enrichment ) == "yes":
+        ## DE matrix input files must be in the working directory and have the same name as DE results input files, but replacing suffix .DE_results by suffix .count_matrix
+        #for $DE_matrix in $additional_params.GO_enrichment.DE_matrices
+            ## Handle general case, where DE results files and DE matrix files have been previously generated by run_de_analysis.pl
+            ln -s "${DE_matrix}" "${re.sub('[^\w\-_.]', '_', DE_matrix.element_identifier)}"
+            &&
+        #end for
+    #end if
 
     analyze_diff_expr.pl
         --matrix "${matrix}"
@@ -48,7 +58,6 @@
         <param format="tabular" name="matrix" argument="--matrix" type="data" label="Expression matrix" help="Raw counts matrix produced by 'Build expression matrix for a de novo assembly of RNA-Seq data by Trinity' tool"/>
         <param format="tabular" name="samples" argument="--samples" type="data" label="Sample description" help="File describing samples and replicates"/>
         <param format="tabular" name="DE_results" type="data_collection" collection_type="list" label="Differential expression results" help="Generated by 'Differential expression analysis using a Trinity assembly' tool"/>
-        <param format="tabular" name="DE_matrices" type="data_collection" collection_type="list" label="Differential expression count matrices" help="Generated by 'Differential expression analysis using a Trinity assembly' tool"/>
         <param name="p" type="float" argument="-P" value="0.001" label="p-value cutoff for FDR"/>
         <param name="c" type="float" argument="-C" value="2" label="min abs(log2(a/b)) fold change" help="Default: 2 (meaning 2^(2) or 4-fold"/>
         <section name="additional_params" title="Additional Options" expanded="False">
@@ -63,6 +72,7 @@
                 <when value="no">
                 </when>
                 <when value="yes">
+                    <param format="tabular" name="DE_matrices" type="data_collection" collection_type="list" label="Differential expression count matrices" help="Generated by 'Differential expression analysis using a Trinity assembly' tool. If not, be careful that the file names are identical to the file names of differential expression results, with extension '.count_matrix' instead of '.DE_results'."/>
                     <param format="tabular" name="GO_annots" argument="--GO_annots" type="data" label="Extracted GO assignments file" help="Generated by the Trinotate script extract_GO_assignments_from_Trinotate_xls.pl. Must have 2 columns: feature_id GO:000001,GO:00002,..."/>
                     <param format="tabular" name="gene_lengths" argument="--gene_lengths" type="data" label="Gene length file" help="Must have 2 columns: feature_id length"/>
                 </when>
@@ -71,7 +81,7 @@
     </inputs>
     <outputs>
         <collection name="extracted_DE_genes" type="list" label="${tool.name} on ${on_string}: extracted differentially expressed genes">
-            <discover_datasets pattern="(?P&lt;name&gt;.+)\.subset$" ext="tabular" />
+            <discover_datasets pattern="(?P&lt;name&gt;.+\.subset)$" ext="tabular" />
         </collection>
         <collection name="summary_files" type="list" label="${tool.name} on ${on_string}: summary files">
             <data format="tabular" name="results_matrix" from_work_dir="results.matrix"/>
@@ -83,34 +93,29 @@
         </collection>
         <data format="RData" name="rdata" label="${tool.name} on ${on_string}: RData file" from_work_dir="results.matrix.RData"/>
         <collection name="GOseq_enrichment" type="list" label="${tool.name} on ${on_string}: GOseq enriched and depleted categories">
+            <filter>additional_params['GO_enrichment']['examine_GO_enrichment'] == 'yes'</filter>
             <discover_datasets pattern="(?P&lt;name&gt;.+\.subset\.GOseq\.(enriched|depleted))$" ext="tabular" />
         </collection>
     </outputs>
     <tests>
+        <!-- Test without GO enrichment analysis -->
         <test>
             <param name="matrix" value="count/qcheck/matrix.counts.matrix"/>
             <param name="samples" value="count/samples.txt"/>
             <param name="DE_results">
                 <collection type="list">
-                    <element name="input.matrix.wt_37_vs_wt_GSNO.DESeq2" value="count/exp_diff/input.matrix.wt_37_vs_wt_GSNO.DESeq2.DE_results" ftype="tabular" />
-                    <element name="input.matrix.wt_37_vs_wt_ph8.DESeq2" value="count/exp_diff/input.matrix.wt_37_vs_wt_ph8.DESeq2.DE_results" ftype="tabular" />
-                    <element name="input.matrix.wt_GSNO_vs_wt_ph8.DESeq2" value="count/exp_diff/input.matrix.wt_GSNO_vs_wt_ph8.DESeq2.DE_results" ftype="tabular" />
-              </collection>
-            </param>
-            <param name="DE_matrices">
-                <collection type="list">
-                    <element name="input.matrix.wt_37_vs_wt_GSNO.DESeq2" value="count/exp_diff/input.matrix.wt_37_vs_wt_GSNO.DESeq2.count_matrix" ftype="tabular" />
-                    <element name="input.matrix.wt_37_vs_wt_ph8.DESeq2" value="count/exp_diff/input.matrix.wt_37_vs_wt_ph8.DESeq2.count_matrix" ftype="tabular" />
-                    <element name="input.matrix.wt_GSNO_vs_wt_ph8.DESeq2" value="count/exp_diff/input.matrix.wt_GSNO_vs_wt_ph8.DESeq2.count_matrix" ftype="tabular" />
+                    <element name="input.matrix.wt_37_vs_wt_GSNO.DESeq2.DE_results" value="count/exp_diff/input.matrix.wt_37_vs_wt_GSNO.DESeq2.DE_results" ftype="tabular" />
+                    <element name="input.matrix.wt_37_vs_wt_ph8.DESeq2.DE_results" value="count/exp_diff/input.matrix.wt_37_vs_wt_ph8.DESeq2.DE_results" ftype="tabular" />
+                    <element name="input.matrix.wt_GSNO_vs_wt_ph8.DESeq2.DE_results" value="count/exp_diff/input.matrix.wt_GSNO_vs_wt_ph8.DESeq2.DE_results" ftype="tabular" />
               </collection>
             </param>
             <output_collection name="extracted_DE_genes">
-                <element name="input.matrix.wt_37_vs_wt_GSNO.DESeq2.DE_results.P0.001_C2.0.wt_37-UP" compare="sim_size" file="count/analyze_diff_expr/input.matrix.wt_37_vs_wt_GSNO.DESeq2.DE_results.P0.001_C2.wt_37-UP.subset"/>
-                <element name="input.matrix.wt_37_vs_wt_GSNO.DESeq2.DE_results.P0.001_C2.0.wt_GSNO-UP" compare="sim_size" file="count/analyze_diff_expr/input.matrix.wt_37_vs_wt_GSNO.DESeq2.DE_results.P0.001_C2.wt_GSNO-UP.subset"/>
-                <element name="input.matrix.wt_37_vs_wt_ph8.DESeq2.DE_results.P0.001_C2.0.wt_37-UP" compare="sim_size" file="count/analyze_diff_expr/input.matrix.wt_37_vs_wt_ph8.DESeq2.DE_results.P0.001_C2.wt_37-UP.subset"/>
-                <element name="input.matrix.wt_37_vs_wt_ph8.DESeq2.DE_results.P0.001_C2.0.wt_ph8-UP" compare="sim_size" file="count/analyze_diff_expr/input.matrix.wt_37_vs_wt_ph8.DESeq2.DE_results.P0.001_C2.wt_ph8-UP.subset"/>
-                <element name="input.matrix.wt_GSNO_vs_wt_ph8.DESeq2.DE_results.P0.001_C2.0.wt_GSNO-UP" compare="sim_size" file="count/analyze_diff_expr/input.matrix.wt_GSNO_vs_wt_ph8.DESeq2.DE_results.P0.001_C2.wt_GSNO-UP.subset"/>
-                <element name="input.matrix.wt_GSNO_vs_wt_ph8.DESeq2.DE_results.P0.001_C2.0.wt_ph8-UP" compare="sim_size" file="count/analyze_diff_expr/input.matrix.wt_GSNO_vs_wt_ph8.DESeq2.DE_results.P0.001_C2.wt_ph8-UP.subset"/>
+                <element name="input.matrix.wt_37_vs_wt_GSNO.DESeq2.DE_results.P0.001_C2.0.wt_37-UP.subset" compare="sim_size" file="count/analyze_diff_expr/input.matrix.wt_37_vs_wt_GSNO.DESeq2.DE_results.P0.001_C2.wt_37-UP.subset"/>
+                <element name="input.matrix.wt_37_vs_wt_GSNO.DESeq2.DE_results.P0.001_C2.0.wt_GSNO-UP.subset" compare="sim_size" file="count/analyze_diff_expr/input.matrix.wt_37_vs_wt_GSNO.DESeq2.DE_results.P0.001_C2.wt_GSNO-UP.subset"/>
+                <element name="input.matrix.wt_37_vs_wt_ph8.DESeq2.DE_results.P0.001_C2.0.wt_37-UP.subset" compare="sim_size" file="count/analyze_diff_expr/input.matrix.wt_37_vs_wt_ph8.DESeq2.DE_results.P0.001_C2.wt_37-UP.subset"/>
+                <element name="input.matrix.wt_37_vs_wt_ph8.DESeq2.DE_results.P0.001_C2.0.wt_ph8-UP.subset" compare="sim_size" file="count/analyze_diff_expr/input.matrix.wt_37_vs_wt_ph8.DESeq2.DE_results.P0.001_C2.wt_ph8-UP.subset"/>
+                <element name="input.matrix.wt_GSNO_vs_wt_ph8.DESeq2.DE_results.P0.001_C2.0.wt_GSNO-UP.subset" compare="sim_size" file="count/analyze_diff_expr/input.matrix.wt_GSNO_vs_wt_ph8.DESeq2.DE_results.P0.001_C2.wt_GSNO-UP.subset"/>
+                <element name="input.matrix.wt_GSNO_vs_wt_ph8.DESeq2.DE_results.P0.001_C2.0.wt_ph8-UP.subset" compare="sim_size" file="count/analyze_diff_expr/input.matrix.wt_GSNO_vs_wt_ph8.DESeq2.DE_results.P0.001_C2.wt_ph8-UP.subset"/>
             </output_collection>
             <output_collection name="summary_files">
                 <element name="results_matrix" compare="sim_size" file="count/analyze_diff_expr/results.matrix"/>
@@ -122,6 +127,7 @@
             </output_collection>
             <output name="rdata" compare="sim_size" file="count/analyze_diff_expr/results.matrix.RData"/>
         </test>
+        <!-- Test without GO enrichment analysis, test name of DE_results input files different from those generated by tool run_de_analysis.pl -->
         <test>
             <param name="matrix" value="count/qcheck/matrix.counts.matrix"/>
             <param name="samples" value="count/samples.txt"/>
@@ -130,18 +136,47 @@
                     <element name="input.matrix.wt_37_vs_wt_GSNO.DESeq2" value="count/exp_diff/input.matrix.wt_37_vs_wt_GSNO.DESeq2.DE_results" ftype="tabular" />
                     <element name="input.matrix.wt_37_vs_wt_ph8.DESeq2" value="count/exp_diff/input.matrix.wt_37_vs_wt_ph8.DESeq2.DE_results" ftype="tabular" />
                     <element name="input.matrix.wt_GSNO_vs_wt_ph8.DESeq2" value="count/exp_diff/input.matrix.wt_GSNO_vs_wt_ph8.DESeq2.DE_results" ftype="tabular" />
-                </collection>
+              </collection>
             </param>
-            <param name="DE_matrices">
+            <output_collection name="extracted_DE_genes">
+                <element name="input.matrix.wt_37_vs_wt_GSNO.DESeq2.DE_results.P0.001_C2.0.wt_37-UP.subset" compare="sim_size" file="count/analyze_diff_expr/input.matrix.wt_37_vs_wt_GSNO.DESeq2.DE_results.P0.001_C2.wt_37-UP.subset"/>
+                <element name="input.matrix.wt_37_vs_wt_GSNO.DESeq2.DE_results.P0.001_C2.0.wt_GSNO-UP.subset" compare="sim_size" file="count/analyze_diff_expr/input.matrix.wt_37_vs_wt_GSNO.DESeq2.DE_results.P0.001_C2.wt_GSNO-UP.subset"/>
+                <element name="input.matrix.wt_37_vs_wt_ph8.DESeq2.DE_results.P0.001_C2.0.wt_37-UP.subset" compare="sim_size" file="count/analyze_diff_expr/input.matrix.wt_37_vs_wt_ph8.DESeq2.DE_results.P0.001_C2.wt_37-UP.subset"/>
+                <element name="input.matrix.wt_37_vs_wt_ph8.DESeq2.DE_results.P0.001_C2.0.wt_ph8-UP.subset" compare="sim_size" file="count/analyze_diff_expr/input.matrix.wt_37_vs_wt_ph8.DESeq2.DE_results.P0.001_C2.wt_ph8-UP.subset"/>
+                <element name="input.matrix.wt_GSNO_vs_wt_ph8.DESeq2.DE_results.P0.001_C2.0.wt_GSNO-UP.subset" compare="sim_size" file="count/analyze_diff_expr/input.matrix.wt_GSNO_vs_wt_ph8.DESeq2.DE_results.P0.001_C2.wt_GSNO-UP.subset"/>
+                <element name="input.matrix.wt_GSNO_vs_wt_ph8.DESeq2.DE_results.P0.001_C2.0.wt_ph8-UP.subset" compare="sim_size" file="count/analyze_diff_expr/input.matrix.wt_GSNO_vs_wt_ph8.DESeq2.DE_results.P0.001_C2.wt_ph8-UP.subset"/>
+            </output_collection>
+            <output_collection name="summary_files">
+                <element name="results_matrix" compare="sim_size" file="count/analyze_diff_expr/results.matrix"/>
+                <element name="results_matrix_log2_centered" compare="sim_size" file="count/analyze_diff_expr/results.matrix.log2.centered.dat"/>
+                <element name="results_matrix_log2_centered_heatmap" delta="100" compare="sim_size" file="count/analyze_diff_expr/results.matrix.log2.centered.genes_vs_samples_heatmap.pdf"/>
+                <element name="results_matrix_log2" compare="sim_size" file="count/analyze_diff_expr/results.matrix.log2.dat"/>
+                <element name="results_matrix_log2_sample_cor" compare="sim_size" file="count/analyze_diff_expr/results.matrix.log2.sample_cor.dat"/>
+                <element name="results_matrix_log2_sample_cor_matrix" delta="100" compare="sim_size" file="count/analyze_diff_expr/results.matrix.log2.sample_cor_matrix.pdf"/>
+            </output_collection>
+            <output name="rdata" compare="sim_size" file="count/analyze_diff_expr/results.matrix.RData"/>
+        </test>
+        <!-- Test with GO enrichment analysis -->
+        <test>
+            <param name="matrix" value="count/qcheck/matrix.counts.matrix"/>
+            <param name="samples" value="count/samples.txt"/>
+            <param name="DE_results">
                 <collection type="list">
-                    <element name="input.matrix.wt_37_vs_wt_GSNO.DESeq2" value="count/exp_diff/input.matrix.wt_37_vs_wt_GSNO.DESeq2.count_matrix" ftype="tabular" />
-                    <element name="input.matrix.wt_37_vs_wt_ph8.DESeq2" value="count/exp_diff/input.matrix.wt_37_vs_wt_ph8.DESeq2.count_matrix" ftype="tabular" />
-                    <element name="input.matrix.wt_GSNO_vs_wt_ph8.DESeq2" value="count/exp_diff/input.matrix.wt_GSNO_vs_wt_ph8.DESeq2.count_matrix" ftype="tabular" />
+                    <element name="input.matrix.wt_37_vs_wt_GSNO.DESeq2.DE_results" value="count/exp_diff/input.matrix.wt_37_vs_wt_GSNO.DESeq2.DE_results" ftype="tabular" />
+                    <element name="input.matrix.wt_37_vs_wt_ph8.DESeq2.DE_results" value="count/exp_diff/input.matrix.wt_37_vs_wt_ph8.DESeq2.DE_results" ftype="tabular" />
+                    <element name="input.matrix.wt_GSNO_vs_wt_ph8.DESeq2.DE_results" value="count/exp_diff/input.matrix.wt_GSNO_vs_wt_ph8.DESeq2.DE_results" ftype="tabular" />
                 </collection>
             </param>
             <section name="additional_params">
                 <conditional name="GO_enrichment">
                     <param name="examine_GO_enrichment" value="yes"/>
+                    <param name="DE_matrices">
+                        <collection type="list">
+                            <element name="input.matrix.wt_37_vs_wt_GSNO.DESeq2.count_matrix" value="count/exp_diff/input.matrix.wt_37_vs_wt_GSNO.DESeq2.count_matrix" ftype="tabular" />
+                            <element name="input.matrix.wt_37_vs_wt_ph8.DESeq2.count_matrix" value="count/exp_diff/input.matrix.wt_37_vs_wt_ph8.DESeq2.count_matrix" ftype="tabular" />
+                            <element name="input.matrix.wt_GSNO_vs_wt_ph8.DESeq2.count_matrix" value="count/exp_diff/input.matrix.wt_GSNO_vs_wt_ph8.DESeq2.count_matrix" ftype="tabular" />
+                        </collection>
+                    </param>
                     <param name="GO_annots" value="count/trinotate/go_annotations.txt"/>
                     <param name="gene_lengths" value="count/trinotate/genes.lengths.txt"/>
                 </conditional>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/count/kallisto/abundance.tsv	Mon Aug 28 16:53:59 2017 -0400
@@ -0,0 +1,8 @@
+target_id	length	eff_length	est_counts	tpm
+TRINITY_DN1_c0_g1_i1	380	133.091	13	114025
+TRINITY_DN0_c0_g1_i1	229	31.75	7	257370
+TRINITY_DN2_c0_g1_i1	279	55.7143	1	20952.6
+TRINITY_DN2_c1_g1_i1	541	279.667	15	62611.6
+TRINITY_DN2_c2_g1_i1	240	42.75	4	109227
+TRINITY_DN2_c3_g1_i1	202	19	0	0
+TRINITY_DN3_c0_g1_i1	216	18.75	7	435814
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/count/kallisto/abundance.tsv.genes	Mon Aug 28 16:53:59 2017 -0400
@@ -0,0 +1,8 @@
+target_id	length	eff_length	est_counts	tpm
+TRINITY_DN3_c0_g1	216.00	18.75	7.00	435814.00
+TRINITY_DN2_c2_g1	240.00	42.75	4.00	109227.00
+TRINITY_DN2_c3_g1	202.00	19.00	0.00	0.00
+TRINITY_DN0_c0_g1	229.00	31.75	7.00	257370.00
+TRINITY_DN2_c0_g1	279.00	55.71	1.00	20952.60
+TRINITY_DN1_c0_g1	380.00	133.09	13.00	114025.00
+TRINITY_DN2_c1_g1	541.00	279.67	15.00	62611.60
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/count/kallisto/abundance_B.tsv	Mon Aug 28 16:53:59 2017 -0400
@@ -0,0 +1,8 @@
+target_id	length	eff_length	est_counts	tpm
+TRINITY_DN1_c0_g1_i1	229	31.75	7	257370
+TRINITY_DN0_c0_g1_i1	380	133.091	13	114025
+TRINITY_DN2_c0_g1_i1	279	55.7143	1	20952.6
+TRINITY_DN2_c2_g1_i1	541	279.667	15	62611.6
+TRINITY_DN2_c1_g1_i1	240	42.75	4	109227
+TRINITY_DN2_c3_g1_i1	202	19	0	0
+TRINITY_DN3_c0_g1_i1	216	18.75	7	435814
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/count/kallisto/abundance_B.tsv.genes	Mon Aug 28 16:53:59 2017 -0400
@@ -0,0 +1,8 @@
+target_id	length	eff_length	est_counts	tpm
+TRINITY_DN3_c0_g1	216.00	18.75	7.00	435814.00
+TRINITY_DN2_c1_g1	240.00	42.75	4.00	109227.00
+TRINITY_DN2_c3_g1	202.00	19.00	0.00	0.00
+TRINITY_DN0_c0_g1	380.00	133.09	13.00	114025.00
+TRINITY_DN2_c0_g1	279.00	55.71	1.00	20952.60
+TRINITY_DN1_c0_g1	229.00	31.75	7.00	257370.00
+TRINITY_DN2_c2_g1	541.00	279.67	15.00	62611.60