Mercurial > repos > rnateam > graphclust_postprocessing_no_align

diff glob_report_no_align.xml @ 0:0a48b2db75e7 draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/GraphClust/CollectResultsNoAlign commit 2a6fd70c1bcec36ffdf0bba2ec82489b39cfc84e
author: rnateam
date: Sat, 27 Oct 2018 13:49:00 -0400
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/glob_report_no_align.xml	Sat Oct 27 13:49:00 2018 -0400
@@ -0,0 +1,171 @@
+<tool id="graphclust_glob_report_no_align" name="Graphclust glob_report collect clusters" version="0.5" >
+  <requirements>
+    <requirement type="package" version="0.6.0">graphclust-wrappers</requirement>
+    <requirement type="package" version='0.5'>perl-array-utils</requirement>
+    <requirement type="package" version='0.18.1'>scikit-learn</requirement>
+    <requirement type="package" version='1.8.10'>locarna</requirement>
+    <requirement type="package" version='2.1'>rnaz</requirement>
+    <requirement type="package" version="1.1.2">infernal</requirement>
+    <requirement type="package" version='2.2.10'>viennarna</requirement>
+    <requirement type="package" version='1.3.30'>graphicsmagick</requirement>
+    <requirement type="package" version='0.6.1'>rscape</requirement>
+    <requirement type="package" version='6.0'>unzip</requirement>
+  </requirements>
+  <command detect_errors="exit_code">  
+    <![CDATA[
+        unzip $FASTA  &> /dev/null &&
+        mkdir ./CMSEARCH &&
+        mkdir ./MODEL &&
+        #import re
+        #for $cms_res in $cmsearch_results:
+            #set $safename_cm = re.sub('[^\w\-_\.]', '_', $cms_res.element_identifier)
+            ln -f -s  '$cms_res' ./CMSEARCH/$safename_cm &&
+        #end for
+        #for $mods in $model_tree_files:
+            #set $safename_tr = re.sub('[^\w\-_\.]', '_', $mods.element_identifier)
+            ln -f -s  '$mods' ./MODEL/$safename_tr &&
+        #end for
+
+        'glob_res.pl'
+                $merge_cluster_ol
+                $merge_overlap
+                $min_cluster_size
+                $cm_min_bitscore
+                $cm_max_eval
+                1 ## cm_bitscore_sig
+                $partition_type ''
+                $cut_type
+                0 ## zero means do not align
+        #if  $iteration_num.iteration_num_selector:
+          $iteration_num.CI
+          $final_partition_soft
+          $final_partition_used_cmsearch
+          '$combined_cm'
+
+        #end if
+
+        &&
+        python '$__tool_directory__/evaluation.py' FASTA/ RESULTS/
+       
+        #if $cdhit:
+        &&
+          python '$__tool_directory__/addCdhitseqs.py' '$cdhit'
+        #end if
+]]>
+  </command>
+  <inputs>
+    <param type="data" name="FASTA" format="zip" help="FASTA.zip from pre-processing step"/>
+    <param type="data" name="cmsearch_results" format="tabular" multiple="True" 
+      help="Tabular cmsearch results of the candidate clusters from the cmsearch step"/>
+    <param type="data" name="model_tree_files" format="txt" multiple="True" label="model-tree-stk"
+      help="model.tree.stk files from pgma_graphclust candidate clustering step"/>
+    <param name="partition_type" type="boolean" checked="True" truevalue="0" falsevalue="1" label="Hard partition"
+      help="Whether to do hard partitioning (no overlap) or soft mode(cluster elements may overlap) "/>
+    <param name="cut_type" type="boolean" checked="True" truevalue="0" falsevalue="1" label="Use CM score for cutoff" help="otherwise use E-value"/>
+    <param name="cm_min_bitscore" type="integer" value="20" size="5" label="cm_min_bitscore" help=""/>
+    <param name="cm_max_eval" type="float" value="0.001" size="5" label="cm_max_E-val" help=""/>
+    <param type="data" name="cdhit" format="txt" optional="true" label="CD-HIT output" 
+      help="Optional CD-HIT pre-clustering output to be combined into the final clustering output"/>
+    <param name="merge_cluster_ol" type="float" value="0.66" size="5" label="merge_cluster_ovelap" 
+      help="Overlapping ratio criteria to merge overlapping clusters or keep separate clusters (soft partitioning)"/>
+    <param name="merge_overlap" type="float" value="0.51" size="5" label="merge_fraction_overlap" 
+      help="Overlapping ratio criteria to merge overlapping sequence fractions from same input sequence"/>
+    <param name="min_cluster_size" type="integer" value="3" size="5" label="minimum cluster size" 
+      help="Minimum number of elements that can form a cluster. Higher values discard small clusters and may produce larger merged clusters"/>
+    <!-- <param name="cm_bitscore_sig" type="integer" value="1" size="5" label="cm_bitscore_sig" help=""/> -->
+
+    <conditional name="iteration_num">
+      <param name="iteration_num_selector" type="boolean"  checked="no" label="Multiple iterations"  help="for single iteration- NO, for multiple-YES"/>
+      <when value="true">
+        <param name="CI" type="integer" value="2" size="5" label="Number of current iteration "/>
+        <param type="data" name="final_partition_soft" format="txt" />
+        <param type="data" name="final_partition_used_cmsearch" format="txt" />
+        <param type="data" name="combined_cm" format="txt" />
+      </when>
+      <when value="false" ></when>
+    </conditional>
+
+  </inputs>
+  <outputs>
+    <data name="final_stats" format="txt" from_work_dir="RESULTS/cluster.final.stats" label="cluster.final.stats"  />
+    <data name="tableForEval" format="tabular" from_work_dir="RESULTS/fullTab.tabular" label="tableForEval"  />
+    <data name="final_soft" format="txt" from_work_dir="RESULTS/partitions/final_partition.soft" label="soft_part"   />
+    <data name="final_used_cmsearch" format="txt" from_work_dir="RESULTS/partitions/final_partition.used_cmsearch" label="final_partition_used_cmsearch"   />
+    <data name="evaluation" format="txt" from_work_dir="RESULTS/evaluation.txt" label="evaluation_of_clusters"  />
+    <data name="combined_cm_out" format="txt" from_work_dir="combined_cm_out" label="combined_cmsearch_output"  />
+    <collection name="clusters" type="list" label="CLUSTERS-cmsearch">
+      <discover_datasets format="txt" pattern="(?P&lt;name&gt;^.*\.all$)" directory="RESULTS"  />
+    </collection>
+    <collection name="allFastaSorted" type="list" label="cluster-sequences-sorted">
+      <discover_datasets format="fasta" pattern="(?P&lt;name&gt;^.*\.sorted.fa$)" directory="RESULTS"  />
+    </collection>
+
+    <collection name="partitions" type="list" label="Partitions">
+      <discover_datasets pattern="(?P&lt;name&gt;^.*$)" directory="RESULTS/partitions" />
+    </collection>
+    <data name="RESULTS_zip" format="zip" from_work_dir="RESULTS.zip" label="RESULTS.zip"  />
+  </outputs>
+  <tests>
+    <test>
+      <param name="FASTA" value="FASTA.zip" ftype="searchgui_archive"/>
+      <param name="cmsearch_results" value="1.1.tree,1.2.tree"/>
+      <param name="model_tree_files" value="1.1.model.tree.fa,1.2.model.tree.fa"/>
+      <param name="partition_type" value="0"/>
+      <param name="cut_type" value="0"/>
+      <conditional name="iteration_num">
+        <param name="iteration_num_selector" value="false"/>
+      </conditional>
+      <param name="merge_cluster_ol" value="0.66"/>
+      <param name="merge_overlap" value="0.51"/>
+      <param name="min_cluster_size" value="3"/>
+      <param name="cm_min_bitscore" value="20"/>
+      <param name="cm_max_eval" value="0.001"/>
+      <!-- <param name="cm_bitscore_sig" value="0"/> -->
+      <output name="final_stats" file="RESULTS/cluster.final.stats" />
+      <output name="combined_cm_out" file="combined_cm_out"/>
+      <output name="evaluation" file="evaluation1.txt"/>
+      <output_collection name="clusters" type="list">
+        <element name="1.cluster.all" file="RESULTS/1.cluster.all" compare="contains"/>
+        <element name="2.cluster.all" file="RESULTS/2.cluster.all" compare="contains"/>
+        
+      </output_collection>
+      <output_collection name="partitions">
+        <element name="final_overlap.map" file="RESULTS/partitions/final_overlap.map" compare="contains">
+          <assert_contents>
+            <has_text text="1.1  1.1 " />
+            <has_text text="1.2  1.2" />
+          </assert_contents>
+        </element>
+        <element name="final_overlap.matrix" file="RESULTS/partitions/final_overlap.matrix" compare="contains">
+          <assert_contents>
+            <has_text text="MODEL CLASS 0 0" />
+            <!--has_text text="1.2" />
+            <has_text text="1.1" /-->
+          </assert_contents>
+        </element>
+        <element name="final_partition.hard.best" file="RESULTS/partitions/final_partition.hard.best" />
+        <element name="final_partition.hard.merged" file="RESULTS/partitions/final_partition.hard.merged" />
+        <element name="final_partition.soft" file="RESULTS/partitions/final_partition.soft" />
+        <element name="final_partition.used_cmsearch" file="RESULTS/partitions/final_partition.used_cmsearch" compare="contains"/>
+      </output_collection>
+
+      <output name="RESULTS_zip" file="RESULTS.zip" ftype="zip" compare="sim_size" delta="20000"/>
+
+    </test>
+  </tests>
+  <help>
+    <![CDATA[
+
+**What it does**
+
+Post-processing. Redundant clusters are merged and instances that belong to multiple clusters
+are assigned unambiguously. For every pair of clusters, the relative overlap (i.e. the fraction of
+instances that occur in both clusters) is computed and clusters are merged if the overlap exceeds 50%.
+Cluster members are finally ranked by their CM bitscore.
+
+    ]]>
+  </help>
+  <citations>
+      <citation type="doi">10.5281/zenodo.597695</citation>
+  </citations>
+</tool>
author	rnateam
date	Sat, 27 Oct 2018 13:49:00 -0400
parents
children