view glob_report.xml @ 17:f93c868203cc draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/GraphClust/CollectResults commit 4406735e44aba20859c252be39f4e99df28c7a92
author rnateam
date Sat, 27 Oct 2018 13:23:06 -0400
parents 79df97a1bc0f
children
line wrap: on
line source

<tool id="glob_report" name="cluster_collection_report" version="0.5" >
  <requirements>
    <requirement type="package" version="0.6.0">graphclust-wrappers</requirement>
    <requirement type="package" version='0.5'>perl-array-utils</requirement>
    <requirement type="package" version='0.18.1'>scikit-learn</requirement>
    <requirement type="package" version='1.8.10'>locarna</requirement>
    <requirement type="package" version='2.1'>rnaz</requirement>
    <requirement type="package" version="1.1.2">infernal</requirement>
    <requirement type="package" version='2.2.10'>viennarna</requirement>
    <requirement type="package" version='1.3.30'>graphicsmagick</requirement>
    <requirement type="package" version='0.6.1'>rscape</requirement>
    <requirement type="package" version='6.0'>unzip</requirement>
  </requirements>
  <command detect_errors="exit_code">    
  <![CDATA[
        unzip $FASTA  &> /dev/null &&
        mkdir ./CMSEARCH &&
        mkdir ./MODEL &&
        #import re
        #for $cms_res in $cmsearch_results:
            #set $safename_cm = re.sub('[^\w\-_\.]', '_', $cms_res.element_identifier)
            ln -f -s  '$cms_res' ./CMSEARCH/$safename_cm &&
        #end for
        #for $mods in $model_tree_files:
            #set $safename_tr = re.sub('[^\w\-_\.]', '_', $mods.element_identifier)
            ln -f -s  '$mods' ./MODEL/$safename_tr &&
        #end for
     
        'glob_res.pl'
                $merge_cluster_ol
                $merge_overlap
                $min_cluster_size
                $cm_min_bitscore
                $cm_max_eval
                $cm_bitscore_sig
                $partition_type ''
                $cut_type
                $results_top_num
        #if  $iteration_num.iteration_num_selector:
          $iteration_num.CI
          $final_partition_soft
          $final_partition_used_cmsearch
          '$combined_cm'

        #end if

        #if  str($advanced_opts.advanced_opts_selector) == "show":
            #if  str($advanced_opts.param_type.param_type_selector) == "gclust":
                  $advanced_opts.param_type.p
                  $advanced_opts.param_type.max_diff_am
                  $advanced_opts.param_type.max_diff
                  $advanced_opts.param_type.tau
                  $advanced_opts.param_type.struct_weight
                  $advanced_opts.param_type.indel_opening
                  $advanced_opts.param_type.indel
                  $advanced_opts.param_type.alifold_consensus_dp
            #end if
        #end if

        &&
        python '$__tool_directory__/evaluation.py' FASTA/ RESULTS/
       
    #if $cdhit:
        &&
        python '$__tool_directory__/addCdhitseqs.py' '$cdhit'
      #end if
]]>
  </command>
  <inputs>
    <param type="data" name="FASTA" format="zip" />
    <param type="data" name="cmsearch_results" format="tabular" multiple="True"/>
    <param type="data" name="model_tree_files" format="txt" multiple="True"/>
    <param name="partition_type" type="boolean" checked="True" truevalue="0" falsevalue="1" label="Hard partition"/>
    <param name="cut_type" type="boolean" checked="True" truevalue="0" falsevalue="1" label="Use CM score for cutoff" help="otherwise use E-value"/>
    <param type="data" name="cdhit" format="txt" optional="true"/>
    <conditional name="iteration_num">
      <param name="iteration_num_selector" type="boolean"  checked="no" label="Multiple iterations"  help="for single iteration- NO, for multiple-YES"/>
      <when value="true">
        <param name="CI" type="integer" value="2" size="5" label="Number of current iteration "/>
        <param type="data" name="final_partition_soft" format="txt" />
        <param type="data" name="final_partition_used_cmsearch" format="txt" />
        <param type="data" name="combined_cm" format="txt" />
      </when>
      <when value="false" ></when>
    </conditional>
    <param name="merge_cluster_ol" type="float" value="0.66" size="5" label="merge_cluster_ol" help=""/>
    <param name="merge_overlap" type="float" value="0.51" size="5" label="merge_overlap" help=""/>
    <param name="min_cluster_size" type="integer" value="3" size="5" label="min_cluster_size" help=""/>
    <param name="cm_min_bitscore" type="integer" value="20" size="5" label="cm_min_bitscore" help=""/>
    <param name="cm_max_eval" type="float" value="0.001" size="5" label="cm_max_eval" help=""/>
    <param name="cm_bitscore_sig" type="integer" value="1" size="5" label="cm_bitscore_sig" help=""/>
    <param name="results_top_num" type="integer" value="5" size="5" label="results_top_num" help=""/>

    <conditional name="advanced_opts">
    <param name="advanced_opts_selector" type="select" label="Advanced Options">
        <option value="hide" selected="True">Hide</option>
        <option value="show">Show</option>
    </param>
    <when value="hide"></when>
    <when value="show">

      <conditional name="param_type">
      <param name="param_type_selector" type="select" label="Choose the type of parameters">
          <option value="locarna">LocARNA defaults</option>
          <option value="gclust" selected="True">GrapClust defaults(changeable)</option>
      </param>
      <when value="gclust">

        <param name="p" type="float" value="0.001" size="5" label="minimal probability" help="-p"/>
        <param name="max_diff_am" type="integer" value="50" size="5" label=" maximal difference for sizes of matched arcs" help="--max-diff-am"/>
        <param argument="tau" type="integer" value="50" min="0" max="200" label="Sequence contribution at structure match in percent"/>
        <param name="max_diff" type="integer" value="100" size="5" label="maximal difference for alignment traces" help="--max-diff"/>

        <param name="struct_weight" argument="struct-weight"
                label="Structure weight" type="integer"
                value="180" min="0" max="800" />
         <param name="indel_opening" argument="indel-opening"
                label="Indel opening score" type="integer"
                value="-400" max="0" min="-1500" />
         <param argument="indel" label="Indel score" type="integer"
                value="-200" min="-1000" max="0" />

         <param  name="alifold_consensus_dp"
                 type="boolean" checked="True"
                 truevalue="--alifold-consensus-dp" falsevalue=" "
                 label="Compute consensus dot plot by alifold" />

      </when>
      <when value="locarna">
      </when>
  </conditional>

    </when>
  </conditional>

  </inputs>
  <outputs>
    <data name="final_stats" format="txt" from_work_dir="RESULTS/cluster.final.stats" label="cluster.final.stats"  />
    <data name="tableForEval" format="tabular" from_work_dir="RESULTS/fullTab.tabular" label="tableForEval"  />
    <data name="final_soft" format="txt" from_work_dir="RESULTS/partitions/final_partition.soft" label="soft_part"   />
    <data name="final_used_cmsearch" format="txt" from_work_dir="RESULTS/partitions/final_partition.used_cmsearch" label="final_partition_used_cmsearch"   />
    <data name="evaluation" format="txt" from_work_dir="RESULTS/evaluation.txt" label="evaluation_of_clusters"  />
    <data name="combined_cm_out" format="txt" from_work_dir="combined_cm_out" label="combined_cmsearch_output"  />
    <collection name="clusters" type="list" label="CLUSTERS">
      <discover_datasets format="txt" pattern="(?P&lt;name&gt;^.*\.all$)" directory="RESULTS"  />
    </collection>
    <collection name="allFasta" type="list" label="sequences.fa">
      <discover_datasets format="fasta" pattern="(?P&lt;name&gt;^.*\.all.fa$)" directory="RESULTS"  />
    </collection>
    <collection name="partitions" type="list" label="Partitions">
      <discover_datasets pattern="(?P&lt;name&gt;^.*$)" directory="RESULTS/partitions" />
    </collection>
    <collection name="topSecondaryStruct" type="list" label="Top $results_top_num alirna.ps">
      <discover_datasets format="png" pattern="(?P&lt;name&gt;^.*\.alirna.png$)"  />
    </collection>
    <collection name="topDot" type="list" label="Top $results_top_num aln.ps">
      <discover_datasets format="png" pattern="(?P&lt;name&gt;^.*\.aln.png$)"  />
    </collection>
    <collection name="rscapePlot" type="list" label="R-scape Plot">
      <discover_datasets format="pdf" pattern="(?P&lt;name&gt;^.*\.pdf$)"  />
    </collection>
    <data name="RESULTS_zip" format="zip" from_work_dir="RESULTS.zip" label="RESULTS.zip"  />
  </outputs>
  <tests>
    <test>
      <param name="FASTA" value="FASTA.zip" ftype="searchgui_archive"/>
      <param name="cmsearch_results" value="1.1.tree,1.2.tree"/>
      <param name="model_tree_files" value="1.1.model.tree.fa,1.2.model.tree.fa"/>
      <param name="partition_type" value="0"/>
      <param name="cut_type" value="0"/>
      <conditional name="iteration_num">
        <param name="iteration_num_selector" value="false"/>
      </conditional>
      <param name="merge_cluster_ol" value="0.66"/>
      <param name="merge_overlap" value="0.51"/>
      <param name="min_cluster_size" value="3"/>
      <param name="cm_min_bitscore" value="20"/>
      <param name="cm_max_eval" value="0.001"/>
      <param name="cm_bitscore_sig" value="0"/>
      <param name="results_top_num" value="5"/>
      <output name="final_stats" file="RESULTS/cluster.final.stats" />
      <output name="combined_cm_out" file="combined_cm_out"/>
      <output name="evaluation" file="evaluation1.txt"/>
      <output_collection name="clusters" type="list">
        <element name="1.cluster.all" file="RESULTS/1.cluster.all" compare="contains"/>
        <element name="2.cluster.all" file="RESULTS/2.cluster.all" compare="contains"/>  
      </output_collection>
      <output_collection name="partitions">
        <element name="final_overlap.map" file="RESULTS/partitions/final_overlap.map" compare="contains">
          <assert_contents>
            <has_text text="1.1  1.1 " />
            <has_text text="1.2  1.2" />
          </assert_contents>
        </element>
        <element name="final_overlap.matrix" file="RESULTS/partitions/final_overlap.matrix" compare="contains">
          <assert_contents>
            <has_text text="MODEL CLASS 0 0" />
            <!--has_text text="1.2" />
            <has_text text="1.1" /-->
          </assert_contents>
        </element>
        <element name="final_partition.hard.best" file="RESULTS/partitions/final_partition.hard.best" />
        <element name="final_partition.hard.merged" file="RESULTS/partitions/final_partition.hard.merged" />
        <element name="final_partition.soft" file="RESULTS/partitions/final_partition.soft" />
        <element name="final_partition.used_cmsearch" file="RESULTS/partitions/final_partition.used_cmsearch" compare="contains"/>
      </output_collection>
      <output_collection name="topSecondaryStruct" type="list">
        <element name="1.cluster.top5.alirna.png" file="1.cluster.top5.alirna.png" ftype="png" compare="sim_size" />
        <element name="2.cluster.top5.alirna.png" file="2.cluster.top5.alirna.png" ftype="png" compare="sim_size" />
      </output_collection>
      <output_collection name="topDot" type="list">
        <element name="1.cluster.top5.aln.png" file="1.cluster.top5.aln.png"  ftype="png" compare="sim_size" />
        <element name="2.cluster.top5.aln.png" file="2.cluster.top5.aln.png"  ftype="png" compare="sim_size" />
      </output_collection>

      <output_collection name="rscapePlot" type="list">
        <element name="1.cluster.top5.result.aln_1.R2R.sto.pdf" file="1.cluster.top5.result.aln_1.R2R.sto.pdf"  ftype="pdf" compare="sim_size" />
        <element name="2.cluster.top5.result.aln_1.R2R.sto.pdf" file="2.cluster.top5.result.aln_1.R2R.sto.pdf"  ftype="pdf" compare="sim_size" />
      </output_collection>

      <output name="RESULTS_zip" file="RESULTS.zip" ftype="zip" compare="sim_size" delta="20000"/>

    </test>
  </tests>
  <help>
    <![CDATA[

**What it does**

Post-processing. Redundant clusters are merged and instances that belong to multiple clusters
are assigned unambiguously. For every pair of clusters, the relative overlap (i.e. the fraction of
instances that occur in both clusters) is computed and clusters are merged if the overlap exceeds 50%.
Cluster members are finally ranked by their CM bitscore.

    ]]>
  </help>
  <citations>
      <citation type="doi">10.5281/zenodo.597695</citation>
  </citations>
</tool>