Mercurial > repos > rnateam > graphclust_postprocessing
changeset 5:4310ac018d05 draft
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 80c721dcfe02a2b8baf8e2c64b76cbcd71b23d86
author | rnateam |
---|---|
date | Sat, 21 Jan 2017 17:39:21 -0500 |
parents | 4a9754d476fe |
children | 869a6e807d76 |
files | evaluation.py glob_report.xml test-data/RESULTS.zip |
diffstat | 3 files changed, 182 insertions(+), 119 deletions(-) [+] |
line wrap: on
line diff
--- a/evaluation.py Fri Jan 13 16:59:29 2017 -0500 +++ b/evaluation.py Sat Jan 21 17:39:21 2017 -0500 @@ -2,6 +2,7 @@ from os import system import re from sklearn import metrics +from shutil import make_archive def sh(script): system("bash -c '%s'" % script) @@ -67,3 +68,6 @@ with open("RESULTS/evaluation.txt", "w") as fOut: fOut.write(toWrite) + + +make_archive('RESULTS', 'zip', root_dir='RESULTS')
--- a/glob_report.xml Fri Jan 13 16:59:29 2017 -0500 +++ b/glob_report.xml Sat Jan 21 17:39:21 2017 -0500 @@ -1,19 +1,19 @@ <tool id="glob_report" name="Report_Results" version="0.1"> - <requirements> - <requirement type="package" version="0.1">graphclust-wrappers</requirement> - <requirement type="package" version='0.5'>perl-array-utils</requirement> - <requirement type="package" version='0.18.1'>scikit-learn</requirement> - <requirement type="package" version='1.8.10'>locarna</requirement> - <requirement type="package" version='2.1'>rnaz</requirement> - <requirement type="package" version="1.1">infernal</requirement> - <requirement type="package" version='2.2.10'>viennarna</requirement> - <requirement type="package" version='1.3.23'>graphicsmagick</requirement> - </requirements> - <stdio> - <exit_code range="1:" /> - </stdio> - <command> - <![CDATA[ + <requirements> + <requirement type="package" version="0.1.7">graphclust-wrappers</requirement> + <requirement type="package" version='0.5'>perl-array-utils</requirement> + <requirement type="package" version='0.18.1'>scikit-learn</requirement> + <requirement type="package" version='1.8.10'>locarna</requirement> + <requirement type="package" version='2.1'>rnaz</requirement> + <requirement type="package" version="1.1">infernal</requirement> + <requirement type="package" version='2.2.10'>viennarna</requirement> + <requirement type="package" version='1.3.23'>graphicsmagick</requirement> + </requirements> + <stdio> + <exit_code range="1:" /> + </stdio> + <command> + <![CDATA[ unzip $FASTA &> /dev/null && #set $inputFiles = "" @@ -29,7 +29,7 @@ #set $inputFilesTrees += str($mods)+',' #end for #set $inputFilesTrees = $inputFilesTrees[:-1] - glob_res.pl + 'glob_res.pl' '$inputFiles' $merge_cluster_ol $merge_overlap @@ -47,110 +47,169 @@ $final_partition_used_cmsearch #end if + #if str($advanced_opts.advanced_opts_selector) == "show": + #if str($advanced_opts.param_type.param_type_selector) == "gclust": + $advanced_opts.param_type.p + $advanced_opts.param_type.max_diff_am + $advanced_opts.param_type.max_diff + $advanced_opts.param_type.tau + $advanced_opts.param_type.struct_weight + $advanced_opts.param_type.indel_opening + $advanced_opts.param_type.indel + $advanced_opts.param_type.alifold_consensus_dp + #end if + #end if + && python '$__tool_directory__/evaluation.py' - #if $cdhit: - && - python '$__tool_directory__/addCdhitseqs.py' '$cdhit' - #end if + #if $cdhit: + && + python '$__tool_directory__/addCdhitseqs.py' '$cdhit' + #end if ]]> - </command> - <inputs> - <param type="data" name="FASTA" format="zip" /> - <param type="data" name="cmsearch_results" format="tabular" multiple="True"/> - <param type="data" name="model_tree_files" format="txt" multiple="True"/> - <param name="partition_type" type="boolean" checked="True" truevalue="0" falsevalue="1" label="Hard partition"/> - <param name="cut_type" type="boolean" checked="True" truevalue="0" falsevalue="1" label="Use CM score for cutoff" help="otherwise use E-value"/> - <param type="data" name="cdhit" format="txt" optional="true"/> - <conditional name="iteration_num"> - <param name="iteration_num_selector" type="boolean" checked="no" label="Multiple iterations" help="for single iteration- NO, for multiple-YES"/> - <when value="true"> - <param name="CI" type="integer" value="2" size="5" label="Number of current iteration "/> - <param type="data" name="final_partition_soft" format="txt" /> - <param type="data" name="final_partition_used_cmsearch" format="txt" /> - </when> - <when value="false" ></when> - </conditional> - <param name="merge_cluster_ol" type="float" value="0.66" size="5" label="merge_cluster_ol" help=""/> - <param name="merge_overlap" type="float" value="0.51" size="5" label="merge_overlap" help=""/> - <param name="min_cluster_size" type="integer" value="3" size="5" label="min_cluster_size" help=""/> - <param name="cm_min_bitscore" type="integer" value="20" size="5" label="cm_min_bitscore" help=""/> - <param name="cm_max_eval" type="float" value="0.001" size="5" label="cm_max_eval" help=""/> - <param name="cm_bitscore_sig" type="integer" value="1" size="5" label="cm_bitscore_sig" help=""/> - <param name="results_top_num" type="integer" value="5" size="5" label="results_top_num" help=""/> - </inputs> - <outputs> - <data name="final_stats" format="txt" from_work_dir="RESULTS/cluster.final.stats" label="cluster.final.stats" /> - <data name="tableForEval" format="tabular" from_work_dir="RESULTS/fullTab.tabular" label="tableForEval" /> - <data name="final_soft" format="txt" from_work_dir="RESULTS/partitions/final_partition.soft" label="soft_part" /> - <data name="final_used_cmsearch" format="txt" from_work_dir="RESULTS/partitions/final_partition.used_cmsearch" label="final_partition_used_cmsearch" /> - <data name="evaluation" format="txt" from_work_dir="RESULTS/evaluation.txt" label="evaluation_of_clusters" /> - <collection name="clusters" type="list" label="CLUSTERS"> - <discover_datasets pattern="(?P<name>^.*\.all$)" directory="RESULTS" /> - </collection> - <collection name="partitions" type="list" label="Partitions"> - <discover_datasets pattern="(?P<name>^.*$)" directory="RESULTS/partitions" /> - </collection> - <collection name="topSecondaryStruct" type="list" label="Top $results_top_num alirna.ps"> - <discover_datasets format="png" pattern="(?P<name>^.*\.alirna.png$)" /> - </collection> - <collection name="topDot" type="list" label="Top $results_top_num aln.ps"> - <discover_datasets format="png" pattern="(?P<name>^.*\.aln.png$)" /> - </collection> - </outputs> - <tests> - <test> - <param name="FASTA" value="FASTA.zip" ftype="searchgui_archive"/> - <param name="cmsearch_results" value="1.tabular,2.tabular"/> - <param name="model_tree_files" value="1.1.model.tree.fa,1.2.model.tree.fa"/> - <param name="partition_type" value="0"/> - <param name="cut_type" value="0"/> - <conditional name="iteration_num"> - <param name="iteration_num_selector" value="false"/> - </conditional> - <param name="merge_cluster_ol" value="0.66"/> - <param name="merge_overlap" value="0.51"/> - <param name="min_cluster_size" value="3"/> - <param name="cm_min_bitscore" value="20"/> - <param name="cm_max_eval" value="0.001"/> - <param name="cm_bitscore_sig" value="1"/> - <output name="final_stats" file="RESULTS/cluster.final.stats" /> - <output_collection name="clusters" type="list"> - <element name="1.cluster.all" file="RESULTS/1.cluster.all" compare="contains"/> - <element name="2.cluster.all" file="RESULTS/2.cluster.all" compare="contains"/> - </output_collection> - <output_collection name="partitions"> - <element name="final_overlap.map" file="RESULTS/partitions/final_overlap.map" compare="contains"> - <assert_contents> - <has_text text="1.1 1.1" /> - <has_text text="1.2 1.2" /> - </assert_contents> - </element> - <element name="final_overlap.matrix" file="RESULTS/partitions/final_overlap.matrix" compare="contains"> - <assert_contents> - <has_text text="MODEL CLASS 0 0" /> - <has_text text="1.2" /> - <has_text text="1.1" /> - </assert_contents> - </element> - <element name="final_partition.hard.best" file="RESULTS/partitions/final_partition.hard.best" /> - <element name="final_partition.hard.merged" file="RESULTS/partitions/final_partition.hard.merged" /> - <element name="final_partition.soft" file="RESULTS/partitions/final_partition.soft" /> - <element name="final_partition.used_cmsearch" file="RESULTS/partitions/final_partition.used_cmsearch" compare="contains"/> - </output_collection> - <param name="results_top_num" value="5"/> - <output_collection name="topSecondaryStruct" type="list"> - <element name="1.cluster.top5.alirna.png" file="1.cluster.top5.alirna.png" ftype="png" compare="sim_size" /> - <element name="2.cluster.top5.alirna.png" file="2.cluster.top5.alirna.png" ftype="png" compare="sim_size" /> - </output_collection> - <output_collection name="topDot" type="list"> - <element name="1.cluster.top5.aln.png" file="1.cluster.top5.aln.png" ftype="png" compare="sim_size"/> - <element name="2.cluster.top5.aln.png" file="2.cluster.top5.aln.png" ftype="png" compare="sim_size"/> - </output_collection> - </test> - </tests> - <help> - <![CDATA[ + </command> + <inputs> + <param type="data" name="FASTA" format="zip" /> + <param type="data" name="cmsearch_results" format="tabular" multiple="True"/> + <param type="data" name="model_tree_files" format="txt" multiple="True"/> + <param name="partition_type" type="boolean" checked="True" truevalue="0" falsevalue="1" label="Hard partition"/> + <param name="cut_type" type="boolean" checked="True" truevalue="0" falsevalue="1" label="Use CM score for cutoff" help="otherwise use E-value"/> + <param type="data" name="cdhit" format="txt" optional="true"/> + <conditional name="iteration_num"> + <param name="iteration_num_selector" type="boolean" checked="no" label="Multiple iterations" help="for single iteration- NO, for multiple-YES"/> + <when value="true"> + <param name="CI" type="integer" value="2" size="5" label="Number of current iteration "/> + <param type="data" name="final_partition_soft" format="txt" /> + <param type="data" name="final_partition_used_cmsearch" format="txt" /> + </when> + <when value="false" ></when> + </conditional> + <param name="merge_cluster_ol" type="float" value="0.66" size="5" label="merge_cluster_ol" help=""/> + <param name="merge_overlap" type="float" value="0.51" size="5" label="merge_overlap" help=""/> + <param name="min_cluster_size" type="integer" value="3" size="5" label="min_cluster_size" help=""/> + <param name="cm_min_bitscore" type="integer" value="20" size="5" label="cm_min_bitscore" help=""/> + <param name="cm_max_eval" type="float" value="0.001" size="5" label="cm_max_eval" help=""/> + <param name="cm_bitscore_sig" type="integer" value="1" size="5" label="cm_bitscore_sig" help=""/> + <param name="results_top_num" type="integer" value="5" size="5" label="results_top_num" help=""/> + + <conditional name="advanced_opts"> + <param name="advanced_opts_selector" type="select" label="Advanced Options"> + <option value="hide" selected="True">Hide</option> + <option value="show">Show</option> + </param> + <when value="hide"></when> + <when value="show"> + + <conditional name="param_type"> + <param name="param_type_selector" type="select" label="Choose the type of parameters"> + <option value="locarna">LocARNA defaults</option> + <option value="gclust" selected="True">GrapClust defaults(changeable)</option> + </param> + <when value="gclust"> + + <param name="p" type="float" value="0.001" size="5" label="minimal probability" help="-p"/> + <param name="max_diff_am" type="integer" value="50" size="5" label=" maximal difference for sizes of matched arcs" help="--max-diff-am"/> + <param argument="tau" type="integer" value="50" min="0" max="200" label="Sequence contribution at structure match in percent"/> + <param name="max_diff" type="integer" value="100" size="5" label="maximal difference for alignment traces" help="--max-diff"/> + + <param name="struct_weight" argument="struct-weight" + label="Structure weight" type="integer" + value="180" min="0" max="800" /> + <param name="indel_opening" argument="indel-opening" + label="Indel opening score" type="integer" + value="-400" max="0" min="-1500" /> + <param argument="indel" label="Indel score" type="integer" + value="-200" min="-1000" max="0" /> + + <param name="alifold_consensus_dp" + type="boolean" checked="True" + truevalue="--alifold-consensus-dp" falsevalue=" " + label="Compute consensus dot plot by alifold" /> + + </when> + <when value="locarna"> + </when> + </conditional> + + </when> + </conditional> + + </inputs> + <outputs> + <data name="final_stats" format="txt" from_work_dir="RESULTS/cluster.final.stats" label="cluster.final.stats" /> + <data name="tableForEval" format="tabular" from_work_dir="RESULTS/fullTab.tabular" label="tableForEval" /> + <data name="final_soft" format="txt" from_work_dir="RESULTS/partitions/final_partition.soft" label="soft_part" /> + <data name="final_used_cmsearch" format="txt" from_work_dir="RESULTS/partitions/final_partition.used_cmsearch" label="final_partition_used_cmsearch" /> + <data name="evaluation" format="txt" from_work_dir="RESULTS/evaluation.txt" label="evaluation_of_clusters" /> + <collection name="clusters" type="list" label="CLUSTERS"> + <discover_datasets pattern="(?P<name>^.*\.all$)" directory="RESULTS" /> + </collection> + <collection name="partitions" type="list" label="Partitions"> + <discover_datasets pattern="(?P<name>^.*$)" directory="RESULTS/partitions" /> + </collection> + <collection name="topSecondaryStruct" type="list" label="Top $results_top_num alirna.ps"> + <discover_datasets format="png" pattern="(?P<name>^.*\.alirna.png$)" /> + </collection> + <collection name="topDot" type="list" label="Top $results_top_num aln.ps"> + <discover_datasets format="png" pattern="(?P<name>^.*\.aln.png$)" /> + </collection> + <data name="RESULTS_zip" format="zip" from_work_dir="RESULTS.zip" label="RESULTS.zip" /> + </outputs> + <tests> + <test> + <param name="FASTA" value="FASTA.zip" ftype="searchgui_archive"/> + <param name="cmsearch_results" value="1.tabular,2.tabular"/> + <param name="model_tree_files" value="1.1.model.tree.fa,1.2.model.tree.fa"/> + <param name="partition_type" value="0"/> + <param name="cut_type" value="0"/> + <conditional name="iteration_num"> + <param name="iteration_num_selector" value="false"/> + </conditional> + <param name="merge_cluster_ol" value="0.66"/> + <param name="merge_overlap" value="0.51"/> + <param name="min_cluster_size" value="3"/> + <param name="cm_min_bitscore" value="20"/> + <param name="cm_max_eval" value="0.001"/> + <param name="cm_bitscore_sig" value="1"/> + <output name="final_stats" file="RESULTS/cluster.final.stats" /> + <output_collection name="clusters" type="list"> + <element name="1.cluster.all" file="RESULTS/1.cluster.all" compare="contains"/> + <element name="2.cluster.all" file="RESULTS/2.cluster.all" compare="contains"/> + </output_collection> + <output_collection name="partitions"> + <element name="final_overlap.map" file="RESULTS/partitions/final_overlap.map" compare="contains"> + <assert_contents> + <has_text text="1.1 1.1" /> + <has_text text="1.2 1.2" /> + </assert_contents> + </element> + <element name="final_overlap.matrix" file="RESULTS/partitions/final_overlap.matrix" compare="contains"> + <assert_contents> + <has_text text="MODEL CLASS 0 0" /> + <has_text text="1.2" /> + <has_text text="1.1" /> + </assert_contents> + </element> + <element name="final_partition.hard.best" file="RESULTS/partitions/final_partition.hard.best" /> + <element name="final_partition.hard.merged" file="RESULTS/partitions/final_partition.hard.merged" /> + <element name="final_partition.soft" file="RESULTS/partitions/final_partition.soft" /> + <element name="final_partition.used_cmsearch" file="RESULTS/partitions/final_partition.used_cmsearch" compare="contains"/> + </output_collection> + <param name="results_top_num" value="5"/> + <output_collection name="topSecondaryStruct" type="list"> + <element name="1.cluster.top5.alirna.png" file="1.cluster.top5.alirna.png" ftype="png" compare="sim_size" /> + <element name="2.cluster.top5.alirna.png" file="2.cluster.top5.alirna.png" ftype="png" compare="sim_size" /> + </output_collection> + <output_collection name="topDot" type="list"> + <element name="1.cluster.top5.aln.png" file="1.cluster.top5.aln.png" ftype="png" compare="sim_size" /> + <element name="2.cluster.top5.aln.png" file="2.cluster.top5.aln.png" ftype="png" compare="sim_size" /> + </output_collection> + <output name="RESULTS_zip" file="RESULTS.zip" ftype="zip" compare="sim_size" /> + + </test> + </tests> + <help> + <![CDATA[ **What it does** @@ -160,8 +219,8 @@ Cluster members are finally ranked by their CM bitscore. ]]> - </help> - <citations> + </help> + <citations> <citation type="bibtex">@inproceedings{costa2010fast, title={Fast neighborhood subgraph pairwise distance kernel}, author={Costa, Fabrizio and De Grave, Kurt},