test: pyCRAC/pyClusterReads.xml comparison

comparison pyCRAC/pyClusterReads.xml @ 0:19b20927172d draft

Uploaded

author	swebb
date	Tue, 18 Jun 2013 09:11:00 -0400
parents
children

comparison

equal deleted inserted replaced

--1:000000000000
+:19b20927172d
+<tool id="pyClusterReads" name="pyClusterReads" force_history_refresh="True">
+	<requirements>
+	<requirement type="package">pyCRAC</requirement>
+	</requirements>
+	<command interpreter="python">
+	/usr/local/bin/pyClusterReads.py
+	-f $input
+	--gtf=$addGTF.gtf
+#if $addGTF.annotate.annotations != "all":
+#if $addGTF.gtfFile == "default" and $addGTF.annotate.annotations == "auto":
+--annotation=$addGTF.annotate.scan.annotation
+#else:
+--annotation=$addGTF.annotate.annotation
+#end if#
+	#end if#
+	-o $output
+	#if $addOpt.options == "edit":
+		--range=$addOpt.range
+		--cic=$addOpt.cic
+		--co=$addOpt.co
+		--ch=$addOpt.ch
+		--cl=$addOpt.cl
+		--mutsfreq=$addOpt.mutsfreq
+	#end if#
+	</command>
+	<version_command>/usr/local/bin/pyClusterReads.py --version</version_command>
+	<inputs>
+	    <param format="gtf" name="input" type="data" label="Input Read Data File -f" help="GTF format sorted by position i.e. pyReadCounters output file."/>
+	    <conditional name="addGTF">
+<param name="gtfFile" type="select"  label="Choose GTF File from">
+<option value="default" selected="true">Defaults</option>
+<option value="other">History</option>
+</param>
+<when value="default">
+<param name="gtf" type="select"  label="GTF File --gtf" help="GTF file containing gene ID co-ordinates">
+<options from_data_table="pycrac_gtf"/>
+</param>
+<conditional name="annotate">
+<param name="annotations" type="select"  label="Select annotation">
+<option value="all" selected="true">All</option>
+<option value="manual">Enter in text box</option>
+<option value="auto">Scan pyGetGTFSources file</option>
+</param>
+<when value="all">
+<param name="annotation" type="hidden" format="txt" size="10" value="all"/>
+</when>
+<when value="manual">
+<param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool">
+<validator type="empty_field" message="Please enter a value"/>
+</param>
+</when>
+<when value="auto">
+<param format="tabular" name="gtf_annotation" type="data" label="GTF annotation File (pyGetGTFSources output)" help="Tabular file containing unique list of annotations/sources in selected GTF file. Refer to pyGetGTFSources"/>
+<conditional name="scan">
+<param name="annotations" type="select"  label="Scan this file for annotations" help="Choose the correct GTF file then choose GO">
+<option value="wait" selected="true">Waiting</option>
+<option value="scanning">Go</option>
+</param>
+<when value="wait">
+</when>
+<when value="scanning">
+<param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation">
+<options from_dataset="gtf_annotation">
+<column name="name" index="0"/>
+<column name="value" index="0"/>
+</options>
+</param>
+</when>
+</conditional>
+</when>
+</conditional>
+</when>
+<when value="other">
+<param format="GTF" name="gtf" type="data" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"/>
+<conditional name="annotate">
+<param name="annotations" type="select"  label="Select annotation">
+<option value="all" selected="true">All</option>
+<option value="manual">Enter in text box</option>
+<option value="auto">Scan selected file</option>
+</param>
+<when value="all">
+<param name="annotation" type="hidden" format="txt" size="10" value="all"/>
+</when>
+<when value="manual">
+<param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool">
+<validator type="empty_field" message="Please enter a value"/>
+</param>
+</when>
+<when value="auto">
+<param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation">
+<options from_dataset="gtf">
+<column name="name" index="1"/>
+<column name="value" index="1"/>
+<filter type="unique_value" name="unique" column="1"/>
+</options>
+</param>
+</when>
+</conditional>
+</when>
+	  </conditional>
+	  <conditional name="addOpt">
+		<param name="options" type="select"  label="Standard Options">
+			<option value="default" selected="true">Default</option>
+			<option value="edit">Edit</option>
+		</param>
+		<when value="edit">
+<param format="integer" name="range" type="integer" label="Range --range" value="0" size="5" help="Manually set the length of the 5' and 3' UTRs 0>50000">
+<validator type="in_range" min="0" max="50000" message="Please enter a value between 0 and 50000"/>
+</param>
+<param format="integer" name="ch" type="integer" label="Cluster height --ch" value="2" size="10" help="Minimal height of a cluster">
+<validator type="in_range" min="1" message="Please enter a value >= 1"/>
+</param>
+			<param format="integer" name="cl" type="integer" label="Cluster length --cl" value="1" size="10" help="Maximum length of a cluster">
+<validator type="in_range" min="1" message="Please enter a value >= 1"/>
+			</param>
+			<param format="integer" name="cic" type="integer" label="cDNAs in clusters --cic" value="2" size="10" >
+				<validator type="in_range" min="2" message="Please enter a value >= 1"/>
+			</param>
+			<param format="integer" name="co" type="integer" label="cDNA-cluster nucleotide overlap --co" value="1" size="10" >
+				<validator type="in_range" min="1" message="Please enter a value >= 1"/>
+			</param>
+<param format="integer" name="mutsfreq" type="integer" label="Minimum mutation frequency for a cluster position --mutsfreq" value="0" size="3" >
+<validator type="in_range" min="0" max="100" message="Please enter a value between 0 and 100"/>
+</param>
+		</when>
+		<when value="default">
+		</when>
+	</conditional>
+	<param name="label" type="text" format="txt" size="30" value="pyClusterReads" label="Enter output file label -o" />
+	</inputs>
+	<outputs>
+		<data format="gtf" name="output" label="${label.value}_clusters.gtf"/>
+	</outputs>
+	<help>
+.. class:: infomark
+**pyClusterReads**
+pyClusterReads is part of the pyCRAC_ package. Takes a reads_count_output GTF file from pyReadCounters generates clusters from the interval coordinates.
+Produces a GTF output file with cluster intervals and overlapping genomic features.
+It also includes mutation frequencies (after the # character) for nucleotides in intervals using chromosomal coordinates
+The pyClusterReads GTF output file essentially has the same layout as other pyCRAC GTF output files.
+**NOTE!** By default it calls each cluster an "exon" but this has no meaning. It may overlap with an intron.
+Use bedtools to extract those intervals that overlap with introns or other features
+The maximum height of the cluster is indicated in column 8.
+The hash character at the end of each line (#) shows chromosomal coordinates of mutated nucleotides within the cluster interval and their mutation frequencies.
+For example::
+# 114099S100.0
+indicates that 100% of the nucleotides in position 114099 were substituted in the cluster.
+An example of a pyClusterReads output file::
+##gff-version 2
+# generated by pyClusterReads.py version 0.0.1, Fri Jan 18 11:59:42 2013
+# pyClusterReads.py -f count_output_reads.gtf -o count_output_clusters.gtf -v
+# chromosome    feature source  start   end     cDNAs   strand  height  attributes
+chrI    cluster exon    112583  112643  6       -       5   gene_id "INT_0_114,YAL021C"; gene_name "INT_0_114,CCR4"; # 112612S75.0;
+chrI    cluster exon    113176  113232  3       -       3   gene_id "INT_0_114,YAL021C"; gene_name "INT_0_114,CCR4"; # 113184S100.0;
+chrI    cluster exon    113334  113386  2       -       2   gene_id "INT_0_114,YAL021C"; gene_name "INT_0_114,CCR4"; # 113349S50.0,113379S100.0;
+chrI    cluster exon    113534  113564  3       -       3   gene_id "INT_0_119,INT_0_114"; gene_name "INT_0_119,INT_0_114"; # 113554S33.3,113556S33.3,113557S33.3;
+chrI    cluster exon    113644  113691  5       -       4   gene_id "YAL020C,INT_0_114"; gene_name "ATS1,INT_0_114"; # 113649S50.0,113657S33.3,113679S25.0
+chrI    cluster exon    113912  113958  2       -       2   gene_id "YAL020C,INT_0_114"; gene_name "ATS1,INT_0_114"; # 113932S50.0,113946S50.0;
+chrI    cluster exon    113966  114066  5       -       3   gene_id "YAL020C,INT_0_114"; gene_name "ATS1,INT_0_114"; # 113987S50.0,114033S33.3,114039S33.3;
+chrI    cluster exon    114067  114130  3       -       3   gene_id "YAL020C,INT_0_114"; gene_name "ATS1,INT_0_114"; # 114099S100.0;
+.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
+------
+**Parameter list**
+File input options::
+-f reads.gtf, --input_file=reads.gtf
+provide the path to your GTF read data file. NOTE the
+				  file has to be correctly sorted! If you used
+				  pyReadCounters to generate the file you should be
+				  fine. If you modified it, use the sort command
+				  described in the manual to sort your file first by
+				  chromosome, then by strand and then by start position.
+-o clusters.gtf, --output_file=clusters.gtf
+provide a name for an output file. By default it
+				  writes to the standard output
+--gtf=Yourfavoritegtf.gtf
+type the path to the gtf annotation file that you want
+				  to use
+Common pyCRAC options::
+-r 100, --range=100
+allows you to set the length of the UTR regions. If
+				  you set '-r 50' or '--range=50', then the program will
+				  set a fixed length (50 bp) regardless of whether the
+				  GTF annotation file has genes with annotated UTRs.
+-a protein_coding, --annotation=protein_coding
+select which annotation (i.e. protein_coding, ncRNA,
+				  sRNA, rRNA,snoRNA,snRNA, depending on the source of
+				  your GTF file) you would like to focus your analysis
+				  on. Default = all annotations
+Options for cluster analysis::
+--cic=2, --cdnasinclusters=2
+sets the minimal number of overlapping cDNAs in each
+				  cluster. Default = 2
+--co=5, --clusteroverlap=5
+sets the number of nucleotides cDNA sequences have to
+				  overlap to form a cluster. Default = 1 nucleotide
+--ch=5, --clusterheight=5
+sets the minimal height of the cluster. Default = 2
+				  nucleotides
+--cl=100, --clusterlength=100
+to set the maximum cluster sequence length
+--mutsfreq=10, --mutationfrequency=10
+sets the minimal mutations frequency for a cluster
+				  position in the GTF output file. Default = 0%.
+				  Example: if the mutsfrequency is set at 10 and a
+				  cluster position has a mutated in less than 10% of the
+				  reads, then the mutation will not be reported.
+	</help>
+</tool>

Mercurial > repos > swebb > pycrac

comparison pyCRAC/pyClusterReads.xml @ 0:19b20927172d draft