Mercurial > repos > rnateam > graphclust_postprocessing
diff glob_report.xml.orig @ 17:f93c868203cc draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/GraphClust/CollectResults commit 4406735e44aba20859c252be39f4e99df28c7a92
author | rnateam |
---|---|
date | Sat, 27 Oct 2018 13:23:06 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/glob_report.xml.orig Sat Oct 27 13:23:06 2018 -0400 @@ -0,0 +1,268 @@ +<<<<<<< HEAD +<tool id="glob_report" name="cluster_collection_report" version="0.4" > +======= +<tool id="glob_report" name="cluster_collection_report" version="0.3" > +>>>>>>> edc317491e1fdf1233bd9b45376dc05abf6eabd5 + <requirements> + <requirement type="package" version="0.5.2">graphclust-wrappers</requirement> + <requirement type="package" version='0.5'>perl-array-utils</requirement> + <requirement type="package" version='0.18.1'>scikit-learn</requirement> + <requirement type="package" version='1.8.10'>locarna</requirement> + <requirement type="package" version='2.1'>rnaz</requirement> + <requirement type="package" version="1.1.2">infernal</requirement> + <requirement type="package" version='2.2.10'>viennarna</requirement> + <requirement type="package" version='1.3.26'>graphicsmagick</requirement> + <requirement type="package" version='0.6.1'>rscape</requirement> + <requirement type="package" version='6.0'>unzip</requirement> + + </requirements> + <stdio> + <exit_code range="1:" /> + </stdio> + <command> + <![CDATA[ + unzip $FASTA &> /dev/null && + + mkdir ./CMSEARCH && + mkdir ./MODEL && + + #set $inputFiles = "" + + #for $cms_res in $cmsearch_results: + ###set $inputFiles += str($cms_res.element_identifier)+',' + ln -f -s '$cms_res' ./CMSEARCH/$cms_res.element_identifier && + #end for + #set $inputFiles = $inputFiles[:-1] + + #set $inputFilesTrees = "" + + #for $mods in $model_tree_files: + ###set $inputFilesTrees += str($mods.element_identifier)+',' + ln -f -s '$mods' ./MODEL/$mods.element_identifier && + #end for + #set $inputFilesTrees = $inputFilesTrees[:-1] + + + 'glob_res.pl' + ##'$inputFiles' + $merge_cluster_ol + $merge_overlap + $min_cluster_size + $cm_min_bitscore + $cm_max_eval + $cm_bitscore_sig + $partition_type '' + $cut_type + ##'$inputFilesTrees' + $results_top_num + #if $iteration_num.iteration_num_selector: + $iteration_num.CI + $final_partition_soft + $final_partition_used_cmsearch + '$combined_cm' + + #end if + + #if str($advanced_opts.advanced_opts_selector) == "show": + #if str($advanced_opts.param_type.param_type_selector) == "gclust": + $advanced_opts.param_type.p + $advanced_opts.param_type.max_diff_am + $advanced_opts.param_type.max_diff + $advanced_opts.param_type.tau + $advanced_opts.param_type.struct_weight + $advanced_opts.param_type.indel_opening + $advanced_opts.param_type.indel + $advanced_opts.param_type.alifold_consensus_dp + #end if + #end if + + && +<<<<<<< HEAD + python '$__tool_directory__/evaluation.py' FASTA/ RESULTS/ +======= + python '$__tool_directory__/evaluation.py' +>>>>>>> edc317491e1fdf1233bd9b45376dc05abf6eabd5 + + #if $cdhit: + && + python '$__tool_directory__/addCdhitseqs.py' '$cdhit' + #end if +]]> + </command> + <inputs> + <param type="data" name="FASTA" format="zip" /> + <param type="data" name="cmsearch_results" format="tabular" multiple="True"/> + <param type="data" name="model_tree_files" format="txt" multiple="True"/> + <param name="partition_type" type="boolean" checked="True" truevalue="0" falsevalue="1" label="Hard partition"/> + <param name="cut_type" type="boolean" checked="True" truevalue="0" falsevalue="1" label="Use CM score for cutoff" help="otherwise use E-value"/> + <param type="data" name="cdhit" format="txt" optional="true"/> + <conditional name="iteration_num"> + <param name="iteration_num_selector" type="boolean" checked="no" label="Multiple iterations" help="for single iteration- NO, for multiple-YES"/> + <when value="true"> + <param name="CI" type="integer" value="2" size="5" label="Number of current iteration "/> + <param type="data" name="final_partition_soft" format="txt" /> + <param type="data" name="final_partition_used_cmsearch" format="txt" /> + <param type="data" name="combined_cm" format="txt" /> + </when> + <when value="false" ></when> + </conditional> + <param name="merge_cluster_ol" type="float" value="0.66" size="5" label="merge_cluster_ol" help=""/> + <param name="merge_overlap" type="float" value="0.51" size="5" label="merge_overlap" help=""/> + <param name="min_cluster_size" type="integer" value="3" size="5" label="min_cluster_size" help=""/> + <param name="cm_min_bitscore" type="integer" value="20" size="5" label="cm_min_bitscore" help=""/> + <param name="cm_max_eval" type="float" value="0.001" size="5" label="cm_max_eval" help=""/> + <param name="cm_bitscore_sig" type="integer" value="1" size="5" label="cm_bitscore_sig" help=""/> + <param name="results_top_num" type="integer" value="5" size="5" label="results_top_num" help=""/> + + <conditional name="advanced_opts"> + <param name="advanced_opts_selector" type="select" label="Advanced Options"> + <option value="hide" selected="True">Hide</option> + <option value="show">Show</option> + </param> + <when value="hide"></when> + <when value="show"> + + <conditional name="param_type"> + <param name="param_type_selector" type="select" label="Choose the type of parameters"> + <option value="locarna">LocARNA defaults</option> + <option value="gclust" selected="True">GrapClust defaults(changeable)</option> + </param> + <when value="gclust"> + + <param name="p" type="float" value="0.001" size="5" label="minimal probability" help="-p"/> + <param name="max_diff_am" type="integer" value="50" size="5" label=" maximal difference for sizes of matched arcs" help="--max-diff-am"/> + <param argument="tau" type="integer" value="50" min="0" max="200" label="Sequence contribution at structure match in percent"/> + <param name="max_diff" type="integer" value="100" size="5" label="maximal difference for alignment traces" help="--max-diff"/> + + <param name="struct_weight" argument="struct-weight" + label="Structure weight" type="integer" + value="180" min="0" max="800" /> + <param name="indel_opening" argument="indel-opening" + label="Indel opening score" type="integer" + value="-400" max="0" min="-1500" /> + <param argument="indel" label="Indel score" type="integer" + value="-200" min="-1000" max="0" /> + + <param name="alifold_consensus_dp" + type="boolean" checked="True" + truevalue="--alifold-consensus-dp" falsevalue=" " + label="Compute consensus dot plot by alifold" /> + + </when> + <when value="locarna"> + </when> + </conditional> + + </when> + </conditional> + + </inputs> + <outputs> + <data name="final_stats" format="txt" from_work_dir="RESULTS/cluster.final.stats" label="cluster.final.stats" /> + <data name="tableForEval" format="tabular" from_work_dir="RESULTS/fullTab.tabular" label="tableForEval" /> + <data name="final_soft" format="txt" from_work_dir="RESULTS/partitions/final_partition.soft" label="soft_part" /> + <data name="final_used_cmsearch" format="txt" from_work_dir="RESULTS/partitions/final_partition.used_cmsearch" label="final_partition_used_cmsearch" /> + <data name="evaluation" format="txt" from_work_dir="RESULTS/evaluation.txt" label="evaluation_of_clusters" /> + <data name="combined_cm_out" format="txt" from_work_dir="combined_cm_out" label="combined_cmsearch_output" /> + <collection name="clusters" type="list" label="CLUSTERS"> + <discover_datasets pattern="(?P<name>^.*\.all$)" directory="RESULTS" /> + </collection> + <collection name="partitions" type="list" label="Partitions"> + <discover_datasets pattern="(?P<name>^.*$)" directory="RESULTS/partitions" /> + </collection> + <collection name="topSecondaryStruct" type="list" label="Top $results_top_num alirna.ps"> + <discover_datasets format="png" pattern="(?P<name>^.*\.alirna.png$)" /> + </collection> + <collection name="topDot" type="list" label="Top $results_top_num aln.ps"> + <discover_datasets format="png" pattern="(?P<name>^.*\.aln.png$)" /> + </collection> + <collection name="rscapePlot" type="list" label="R-scape Plot"> + <discover_datasets format="pdf" pattern="(?P<name>^.*\.pdf$)" /> + </collection> + <data name="RESULTS_zip" format="zip" from_work_dir="RESULTS.zip" label="RESULTS.zip" /> + </outputs> + <tests> + <test> + <param name="FASTA" value="FASTA.zip" ftype="searchgui_archive"/> + <param name="cmsearch_results" value="1.1.tree,1.2.tree"/> + <param name="model_tree_files" value="1.1.model.tree.fa,1.2.model.tree.fa"/> + <param name="combined_cm_out" value="combined_cm_out"/> + <param name="partition_type" value="0"/> + <param name="cut_type" value="0"/> + <conditional name="iteration_num"> + <param name="iteration_num_selector" value="false"/> + </conditional> + <param name="merge_cluster_ol" value="0.66"/> + <param name="merge_overlap" value="0.51"/> + <param name="min_cluster_size" value="3"/> + <param name="cm_min_bitscore" value="20"/> + <param name="cm_max_eval" value="0.001"/> + <param name="cm_bitscore_sig" value="0"/> + <output name="final_stats" file="RESULTS/cluster.final.stats" /> + <output_collection name="clusters" type="list"> + <element name="1.cluster.all" file="RESULTS/1.cluster.all" compare="contains"/> + <element name="2.cluster.all" file="RESULTS/2.cluster.all" compare="contains"/> + + </output_collection> + <output_collection name="partitions"> + <element name="final_overlap.map" file="RESULTS/partitions/final_overlap.map" compare="contains"> + <assert_contents> + <has_text text="1.1 1.1 " /> + <has_text text="1.2 1.2" /> + </assert_contents> + </element> + <element name="final_overlap.matrix" file="RESULTS/partitions/final_overlap.matrix" compare="contains"> + <assert_contents> + <has_text text="MODEL CLASS 0 0" /> + <!--has_text text="1.2" /> + <has_text text="1.1" /--> + </assert_contents> + </element> + <element name="final_partition.hard.best" file="RESULTS/partitions/final_partition.hard.best" /> + <element name="final_partition.hard.merged" file="RESULTS/partitions/final_partition.hard.merged" /> + <element name="final_partition.soft" file="RESULTS/partitions/final_partition.soft" /> + <element name="final_partition.used_cmsearch" file="RESULTS/partitions/final_partition.used_cmsearch" compare="contains"/> + </output_collection> + <param name="results_top_num" value="5"/> + <output_collection name="topSecondaryStruct" type="list"> + <element name="1.cluster.top5.alirna.png" file="1.cluster.top5.alirna.png" ftype="png" compare="sim_size" /> + <element name="2.cluster.top5.alirna.png" file="2.cluster.top5.alirna.png" ftype="png" compare="sim_size" /> + </output_collection> + <output_collection name="topDot" type="list"> + <element name="1.cluster.top5.aln.png" file="1.cluster.top5.aln.png" ftype="png" compare="sim_size" /> + <element name="2.cluster.top5.aln.png" file="2.cluster.top5.aln.png" ftype="png" compare="sim_size" /> + </output_collection> + + <output_collection name="rscapePlot" type="list"> + <element name="1.cluster.top5.result.aln_1.R2R.sto.pdf" file="1.cluster.top5.result.aln_1.R2R.sto.pdf" ftype="pdf" compare="sim_size" /> + <element name="2.cluster.top5.result.aln_1.R2R.sto.pdf" file="2.cluster.top5.result.aln_1.R2R.sto.pdf" ftype="pdf" compare="sim_size" /> + </output_collection> + + <output name="RESULTS_zip" file="RESULTS.zip" ftype="zip" compare="sim_size" delta="20000"/> + + </test> + </tests> + <help> + <![CDATA[ + +**What it does** + +Post-processing. Redundant clusters are merged and instances that belong to multiple clusters +are assigned unambiguously. For every pair of clusters, the relative overlap (i.e. the fraction of +instances that occur in both clusters) is computed and clusters are merged if the overlap exceeds 50%. +Cluster members are finally ranked by their CM bitscore. + + ]]> + </help> + <citations> + <citation type="bibtex">@inproceedings{costa2010fast, + title={Fast neighborhood subgraph pairwise distance kernel}, + author={Costa, Fabrizio and De Grave, Kurt}, + booktitle={Proceedings of the 26th International Conference on Machine Learning}, + pages={255--262}, + year={2010}, + organization={Omnipress} + } + </citation> + </citations> +</tool>