Mercurial > repos > swebb > pycrac
view pyCRAC/pyClusterReads.xml @ 1:7c9574213c0a draft default tip
Uploaded
author | swebb |
---|---|
date | Thu, 20 Jun 2013 12:13:43 -0400 |
parents | 19b20927172d |
children |
line wrap: on
line source
<tool id="pyClusterReads" name="pyClusterReads" force_history_refresh="True"> <requirements> <requirement type="package">pyCRAC</requirement> </requirements> <command interpreter="python"> /usr/local/bin/pyClusterReads.py -f $input --gtf=$addGTF.gtf #if $addGTF.annotate.annotations != "all": #if $addGTF.gtfFile == "default" and $addGTF.annotate.annotations == "auto": --annotation=$addGTF.annotate.scan.annotation #else: --annotation=$addGTF.annotate.annotation #end if# #end if# -o $output #if $addOpt.options == "edit": --range=$addOpt.range --cic=$addOpt.cic --co=$addOpt.co --ch=$addOpt.ch --cl=$addOpt.cl --mutsfreq=$addOpt.mutsfreq #end if# </command> <version_command>/usr/local/bin/pyClusterReads.py --version</version_command> <inputs> <param format="gtf" name="input" type="data" label="Input Read Data File -f" help="GTF format sorted by position i.e. pyReadCounters output file."/> <conditional name="addGTF"> <param name="gtfFile" type="select" label="Choose GTF File from"> <option value="default" selected="true">Defaults</option> <option value="other">History</option> </param> <when value="default"> <param name="gtf" type="select" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"> <options from_data_table="pycrac_gtf"/> </param> <conditional name="annotate"> <param name="annotations" type="select" label="Select annotation"> <option value="all" selected="true">All</option> <option value="manual">Enter in text box</option> <option value="auto">Scan pyGetGTFSources file</option> </param> <when value="all"> <param name="annotation" type="hidden" format="txt" size="10" value="all"/> </when> <when value="manual"> <param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool"> <validator type="empty_field" message="Please enter a value"/> </param> </when> <when value="auto"> <param format="tabular" name="gtf_annotation" type="data" label="GTF annotation File (pyGetGTFSources output)" help="Tabular file containing unique list of annotations/sources in selected GTF file. Refer to pyGetGTFSources"/> <conditional name="scan"> <param name="annotations" type="select" label="Scan this file for annotations" help="Choose the correct GTF file then choose GO"> <option value="wait" selected="true">Waiting</option> <option value="scanning">Go</option> </param> <when value="wait"> </when> <when value="scanning"> <param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation"> <options from_dataset="gtf_annotation"> <column name="name" index="0"/> <column name="value" index="0"/> </options> </param> </when> </conditional> </when> </conditional> </when> <when value="other"> <param format="GTF" name="gtf" type="data" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"/> <conditional name="annotate"> <param name="annotations" type="select" label="Select annotation"> <option value="all" selected="true">All</option> <option value="manual">Enter in text box</option> <option value="auto">Scan selected file</option> </param> <when value="all"> <param name="annotation" type="hidden" format="txt" size="10" value="all"/> </when> <when value="manual"> <param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool"> <validator type="empty_field" message="Please enter a value"/> </param> </when> <when value="auto"> <param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation"> <options from_dataset="gtf"> <column name="name" index="1"/> <column name="value" index="1"/> <filter type="unique_value" name="unique" column="1"/> </options> </param> </when> </conditional> </when> </conditional> <conditional name="addOpt"> <param name="options" type="select" label="Standard Options"> <option value="default" selected="true">Default</option> <option value="edit">Edit</option> </param> <when value="edit"> <param format="integer" name="range" type="integer" label="Range --range" value="0" size="5" help="Manually set the length of the 5' and 3' UTRs 0>50000"> <validator type="in_range" min="0" max="50000" message="Please enter a value between 0 and 50000"/> </param> <param format="integer" name="ch" type="integer" label="Cluster height --ch" value="2" size="10" help="Minimal height of a cluster"> <validator type="in_range" min="1" message="Please enter a value >= 1"/> </param> <param format="integer" name="cl" type="integer" label="Cluster length --cl" value="1" size="10" help="Maximum length of a cluster"> <validator type="in_range" min="1" message="Please enter a value >= 1"/> </param> <param format="integer" name="cic" type="integer" label="cDNAs in clusters --cic" value="2" size="10" > <validator type="in_range" min="2" message="Please enter a value >= 1"/> </param> <param format="integer" name="co" type="integer" label="cDNA-cluster nucleotide overlap --co" value="1" size="10" > <validator type="in_range" min="1" message="Please enter a value >= 1"/> </param> <param format="integer" name="mutsfreq" type="integer" label="Minimum mutation frequency for a cluster position --mutsfreq" value="0" size="3" > <validator type="in_range" min="0" max="100" message="Please enter a value between 0 and 100"/> </param> </when> <when value="default"> </when> </conditional> <param name="label" type="text" format="txt" size="30" value="pyClusterReads" label="Enter output file label -o" /> </inputs> <outputs> <data format="gtf" name="output" label="${label.value}_clusters.gtf"/> </outputs> <help> .. class:: infomark **pyClusterReads** pyClusterReads is part of the pyCRAC_ package. Takes a reads_count_output GTF file from pyReadCounters generates clusters from the interval coordinates. Produces a GTF output file with cluster intervals and overlapping genomic features. It also includes mutation frequencies (after the # character) for nucleotides in intervals using chromosomal coordinates The pyClusterReads GTF output file essentially has the same layout as other pyCRAC GTF output files. **NOTE!** By default it calls each cluster an "exon" but this has no meaning. It may overlap with an intron. Use bedtools to extract those intervals that overlap with introns or other features The maximum height of the cluster is indicated in column 8. The hash character at the end of each line (#) shows chromosomal coordinates of mutated nucleotides within the cluster interval and their mutation frequencies. For example:: # 114099S100.0 indicates that 100% of the nucleotides in position 114099 were substituted in the cluster. An example of a pyClusterReads output file:: ##gff-version 2 # generated by pyClusterReads.py version 0.0.1, Fri Jan 18 11:59:42 2013 # pyClusterReads.py -f count_output_reads.gtf -o count_output_clusters.gtf -v # chromosome feature source start end cDNAs strand height attributes chrI cluster exon 112583 112643 6 - 5 gene_id "INT_0_114,YAL021C"; gene_name "INT_0_114,CCR4"; # 112612S75.0; chrI cluster exon 113176 113232 3 - 3 gene_id "INT_0_114,YAL021C"; gene_name "INT_0_114,CCR4"; # 113184S100.0; chrI cluster exon 113334 113386 2 - 2 gene_id "INT_0_114,YAL021C"; gene_name "INT_0_114,CCR4"; # 113349S50.0,113379S100.0; chrI cluster exon 113534 113564 3 - 3 gene_id "INT_0_119,INT_0_114"; gene_name "INT_0_119,INT_0_114"; # 113554S33.3,113556S33.3,113557S33.3; chrI cluster exon 113644 113691 5 - 4 gene_id "YAL020C,INT_0_114"; gene_name "ATS1,INT_0_114"; # 113649S50.0,113657S33.3,113679S25.0 chrI cluster exon 113912 113958 2 - 2 gene_id "YAL020C,INT_0_114"; gene_name "ATS1,INT_0_114"; # 113932S50.0,113946S50.0; chrI cluster exon 113966 114066 5 - 3 gene_id "YAL020C,INT_0_114"; gene_name "ATS1,INT_0_114"; # 113987S50.0,114033S33.3,114039S33.3; chrI cluster exon 114067 114130 3 - 3 gene_id "YAL020C,INT_0_114"; gene_name "ATS1,INT_0_114"; # 114099S100.0; .. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html ------ **Parameter list** File input options:: -f reads.gtf, --input_file=reads.gtf provide the path to your GTF read data file. NOTE the file has to be correctly sorted! If you used pyReadCounters to generate the file you should be fine. If you modified it, use the sort command described in the manual to sort your file first by chromosome, then by strand and then by start position. -o clusters.gtf, --output_file=clusters.gtf provide a name for an output file. By default it writes to the standard output --gtf=Yourfavoritegtf.gtf type the path to the gtf annotation file that you want to use Common pyCRAC options:: -r 100, --range=100 allows you to set the length of the UTR regions. If you set '-r 50' or '--range=50', then the program will set a fixed length (50 bp) regardless of whether the GTF annotation file has genes with annotated UTRs. -a protein_coding, --annotation=protein_coding select which annotation (i.e. protein_coding, ncRNA, sRNA, rRNA,snoRNA,snRNA, depending on the source of your GTF file) you would like to focus your analysis on. Default = all annotations Options for cluster analysis:: --cic=2, --cdnasinclusters=2 sets the minimal number of overlapping cDNAs in each cluster. Default = 2 --co=5, --clusteroverlap=5 sets the number of nucleotides cDNA sequences have to overlap to form a cluster. Default = 1 nucleotide --ch=5, --clusterheight=5 sets the minimal height of the cluster. Default = 2 nucleotides --cl=100, --clusterlength=100 to set the maximum cluster sequence length --mutsfreq=10, --mutationfrequency=10 sets the minimal mutations frequency for a cluster position in the GTF output file. Default = 0%. Example: if the mutsfrequency is set at 10 and a cluster position has a mutated in less than 10% of the reads, then the mutation will not be reported. </help> </tool>