view locarna_best_subtree.xml @ 0:15bd4fb05e5c draft

planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/LocARNAGraphClust commit 21aaee40723b5341b4236edeb0e72995c2054053
author rnateam
date Fri, 16 Dec 2016 07:35:29 -0500
parents
children c6c4a7adb099
line wrap: on
line source

<tool id="locarna_best_subtree" name="locarna_best_subtree" version="0.1.0" >
	<requirements>
		<requirement type="package" version="0.1">graphclust-wrappers</requirement>
		<requirement type="package" version='1.8.10'>locarna</requirement>
		<requirement type="package" version='2.1'>rnaz</requirement>
		<requirement type="package" version='0.07'>perl-math-round</requirement>
	</requirements>
	<stdio>
		<exit_code range="1:" />
	</stdio>
	<command>
		<![CDATA[

		    'locARNAGraphClust.pl'  '$center_fa_file' '$tree_file' '$tree_matrix' $p $max_diff_am $tau $max_diff '' '$data_map' $plfold_minlen
        ]]>
	</command>
	<inputs>
		<param type="data" name="center_fa_file" label="centers" format="fa, fasta" help="fasta format" />
		<param type="data" name="tree_file" label="trees" format="text" help="text format" />
		<param type="data" name="tree_matrix" label="tree_matrix" format="text" help="text format" />
		<param type="data" name="data_map" label="data_map" format="txt" help="text format" />
		<param name="p" type="float" value="0.001" size="5" label="minimal probability" help="-p"/>
		<param name="max_diff_am" type="integer" value="50" size="5" label=" maximal difference for sizes of matched arcs" help="--max-diff-am"/>
		<param name="tau" type="integer" value="50" size="5" label="Tau factor in percent" help="--tau"/>
		<param name="max_diff" type="integer" value="100" size="5" label="maximal difference for alignment traces" help="--max-diff"/>
		<param name="plfold_minlen" type="integer" value="210" size="5" label="Minimal length of a sequences for which RNAplfold is used" />
	</inputs>
	<outputs>

		<data name="model_tree_stk" format="stockholm" label="model.tree.stk" from_work_dir="MODEL/best_subtree.aln" />
	</outputs>
	<tests>
		<test>
			<param name="tree_file" value="1.1.tree"/>
			<param name="center_fa_file" value="1.1.center.fa"/>
			<param name="data_map" value="data.map"/>
			<param name="tree_matrix" value="1.1.matrix.tree"/>
			<param name="p" value="0.001"/>
			<param name="max-diff-am" value="50"/>
			<param name="tau" value="50"/>
			<param name="max-diff-am" value="100"/>
			<output name="model_tree_stk" file="best_subtree.aln"/>
		</test>
	</tests>
	<help>
		<![CDATA[
**What it does**

MLocARNA computes a multiple sequence-structure alignment of RNA sequences.
It uses *treefile* - file with guide tree in NEWICK format. The given tree is used as guide tree for the progressive alignment.
This saves the calculation of pairwise all-vs-all similarities and construction of the guide tree.



]]>
	</help>
	<citations>
		<citation type="bibtex">@inproceedings{costa2010fast,
        title={Fast neighborhood subgraph pairwise distance kernel},
        author={Costa, Fabrizio and De Grave, Kurt},
        booktitle={Proceedings of the 26th International Conference on Machine Learning},
        pages={255--262},
        year={2010},
        organization={Omnipress}
      }
      </citation>
			<citation type="bibtex">@Article{Will_Joshi_Hofacker-LocAR_Accur_bound-2012,
  author =	 {Will, Sebastian and Joshi, Tejal and Hofacker, Ivo L. and
                  Stadler, Peter F. and Backofen, Rolf},
  title =	 {{LocARNA}-{P}: {Accurate} boundary prediction and improved
                  detection of structural {RNAs}},
  journal =	 {RNA},
  year =	 {2012},
  volume =	 {18},
  number =	 {5},
  pages =	 {900-14},
  user =	 {will},
  pmid =	 {22450757},
  doi = 	 {10.1261/rna.029041.111},
  issn = 	 {1469-9001},
  issn = 	 {1355-8382},
  abstract =	 {Current genomic screens for noncoding RNAs (ncRNAs) predict
                  a large number of genomic regions containing potential
                  structural ncRNAs. The analysis of these data requires
                  highly accurate prediction of ncRNA boundaries and
                  discrimination of promising candidate ncRNAs from weak
                  predictions. Existing methods struggle with these goals
                  because they rely on sequence-based multiple sequence
                  alignments, which regularly misalign RNA structure and
                  therefore do not support identification of structural
                  similarities. To overcome this limitation, we compute
                  columnwise and global reliabilities of alignments based on
                  sequence and structure similarity; we refer to these
                  structure-based alignment reliabilities as STARs. The
                  columnwise STARs of alignments, or STAR profiles, provide a
                  versatile tool for the manual and automatic analysis of
                  ncRNAs. In particular, we improve the boundary prediction of
                  the widely used ncRNA gene finder RNAz by a factor of 3 from
                  a median deviation of 47 to 13 nt. Post-processing RNAz
                  predictions, LocARNA-P's STAR score allows much stronger
                  discrimination between true- and false-positive predictions
                  than RNAz's own evaluation. The improved accuracy, in this
                  scenario increased from AUC 0.71 to AUC 0.87, significantly
                  reduces the cost of successive analysis steps. The
                  ready-to-use software tool LocARNA-P produces
                  structure-based multiple RNA alignments with associated
                  columnwise STARs and predicts ncRNA boundaries. We provide
                  additional results, a web server for LocARNA/LocARNA-P, and
                  the software package, including documentation and a pipeline
                  for refining screens for structural ncRNA, at
                  http://www.bioinf.uni-freiburg.de/Supplements/LocARNA-P/.}
}
				</citation>
				<citation type="bibtex">@Article{Will:etal:_infer_non_codin_rna_famil:PLOS2007,
  author =	 {Sebastian Will and Kristin Reiche and Ivo L. Hofacker and
                  Peter F. Stadler and Rolf Backofen},
  title =	 {Inferring Non-Coding {RNA} Families and Classes by Means of
                  Genome-Scale Structure-Based Clustering},
  journal =	 {PLoS Comput Biol},
  year =	 2007,
  volume =       {3},
  number =       {4},
  pages =        {e65},
  issn =         {1553-7358},
  issn =         {1553-734X},
  pmid =         {17432929},
  doi =          {10.1371/journal.pcbi.0030065},
  user =	 {will},
  abstract =	 {The RFAM database defines families of ncRNAs by means of
                  sequence similarities that are sufficientto establish
                  homology. In some cases, such as microRNAs, box H/ACA
                  snoRNAs, functional commonalities define classes of RNAs
                  that are characterized by structural similarities, and
                  typically consist ofmultiple RNA families. Recent advances
                  in high-throughput transcriptomics and comparative genomics
                  have produced very large sets of putative non-coding RNAs
                  and regulatory RNA signals. For many ofthem, evidence for
                  stabilizing selection acting on their secondary structures
                  has been derived, and at least approximate models of their
                  structures have been computed. The overwhelming majority of
                  these hypo-thetical RNAs cannot be assigned to established
                  families or classes. We present here a structure-based
                  clustering approach that is capable of extracting putative
                  RNA classesfrom genome-wide surveys for structured RNAs. The
                  LocARNA tool implements a novel variant of theSankoff
                  algorithm that is sufficiently fast to deal with several
                  thousand candidate sequences. The method is also robust
                  against false positive predictions, i.e., a contamination of
                  the input data with unstructured ornon-conserved
                  sequences. We have successfully tested the LocARNA-based
                  clustering approach on the sequences of the
                  RFAM-seedalignments. Furthermore, we have applied it to a
                  previously published set of 3332 predicted structured
                  elements in the Ciona intestinalis genomes (Missal et al.,
                  Bioinformatics 21(S2), i77-i78). In addition torecovering
                  e.g. tRNAs as a structure-based class, the method identifies
                  several RNA families, including microRNA and snoRNA
                  candidates, and suggests several novel classes of ncRNAs for
                  which to-date norepresentative has been experimentally
                  characterized.}
}

					</citation>
					<citation type="bibtex">@Article{Smith:Heyne:Richter:Freib_RNA_Tools:NAR2010,
  author =	 {Smith, Cameron and Heyne, Steffen and Richter, Andreas S.
                  and Will, Sebastian and Backofen, Rolf},
  title =	 {Freiburg {RNA} {Tools}: a web server integrating {IntaRNA},
                  {ExpaRNA} and {LocARNA}},
  journal =	 NAR,
  year =	 {2010},
  volume =	 {38 Suppl},
  number =	 {},
  pages =	 {W373-7},
  user =	 {arichter},
  pmid =	 {20444875},
  doi = 	 {10.1093/nar/gkq316},
  issn = 	 {0305-1048},
  issn =	 {1362-4962},
  abstract =	 {The Freiburg RNA tools web server integrates three tools
                  for the advanced analysis of RNA in a common web-based user
                  interface. The tools IntaRNA, ExpaRNA and LocARNA support
                  the prediction of RNA-RNA interaction, exact RNA matching
                  and alignment of RNA, respectively. The Freiburg RNA tools
                  web server and the software packages of the stand-alone
                  tools are freely accessible at
                  http://rna.informatik.uni-freiburg.de.}
}
						</citation>
	</citations>
</tool>