Mercurial > repos > swebb > pycrac
diff pyCRAC/pyClusterReads.xml @ 0:19b20927172d draft
Uploaded
author | swebb |
---|---|
date | Tue, 18 Jun 2013 09:11:00 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyCRAC/pyClusterReads.xml Tue Jun 18 09:11:00 2013 -0400 @@ -0,0 +1,230 @@ +<tool id="pyClusterReads" name="pyClusterReads" force_history_refresh="True"> + <requirements> + <requirement type="package">pyCRAC</requirement> + </requirements> + <command interpreter="python"> + /usr/local/bin/pyClusterReads.py + -f $input + --gtf=$addGTF.gtf + #if $addGTF.annotate.annotations != "all": + #if $addGTF.gtfFile == "default" and $addGTF.annotate.annotations == "auto": + --annotation=$addGTF.annotate.scan.annotation + #else: + --annotation=$addGTF.annotate.annotation + #end if# + #end if# + -o $output + #if $addOpt.options == "edit": + --range=$addOpt.range + --cic=$addOpt.cic + --co=$addOpt.co + --ch=$addOpt.ch + --cl=$addOpt.cl + --mutsfreq=$addOpt.mutsfreq + #end if# + </command> + <version_command>/usr/local/bin/pyClusterReads.py --version</version_command> + <inputs> + <param format="gtf" name="input" type="data" label="Input Read Data File -f" help="GTF format sorted by position i.e. pyReadCounters output file."/> + <conditional name="addGTF"> + <param name="gtfFile" type="select" label="Choose GTF File from"> + <option value="default" selected="true">Defaults</option> + <option value="other">History</option> + </param> + <when value="default"> + <param name="gtf" type="select" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"> + <options from_data_table="pycrac_gtf"/> + </param> + <conditional name="annotate"> + <param name="annotations" type="select" label="Select annotation"> + <option value="all" selected="true">All</option> + <option value="manual">Enter in text box</option> + <option value="auto">Scan pyGetGTFSources file</option> + </param> + <when value="all"> + <param name="annotation" type="hidden" format="txt" size="10" value="all"/> + </when> + <when value="manual"> + <param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool"> + <validator type="empty_field" message="Please enter a value"/> + </param> + </when> + <when value="auto"> + <param format="tabular" name="gtf_annotation" type="data" label="GTF annotation File (pyGetGTFSources output)" help="Tabular file containing unique list of annotations/sources in selected GTF file. Refer to pyGetGTFSources"/> + <conditional name="scan"> + <param name="annotations" type="select" label="Scan this file for annotations" help="Choose the correct GTF file then choose GO"> + <option value="wait" selected="true">Waiting</option> + <option value="scanning">Go</option> + </param> + <when value="wait"> + </when> + <when value="scanning"> + <param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation"> + <options from_dataset="gtf_annotation"> + <column name="name" index="0"/> + <column name="value" index="0"/> + </options> + </param> + </when> + </conditional> + </when> + </conditional> + + </when> + <when value="other"> + <param format="GTF" name="gtf" type="data" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"/> + <conditional name="annotate"> + <param name="annotations" type="select" label="Select annotation"> + <option value="all" selected="true">All</option> + <option value="manual">Enter in text box</option> + <option value="auto">Scan selected file</option> + </param> + <when value="all"> + <param name="annotation" type="hidden" format="txt" size="10" value="all"/> + </when> + <when value="manual"> + <param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool"> + <validator type="empty_field" message="Please enter a value"/> + </param> + </when> + <when value="auto"> + <param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation"> + <options from_dataset="gtf"> + <column name="name" index="1"/> + <column name="value" index="1"/> + <filter type="unique_value" name="unique" column="1"/> + </options> + </param> + </when> + </conditional> + </when> + </conditional> + + <conditional name="addOpt"> + <param name="options" type="select" label="Standard Options"> + <option value="default" selected="true">Default</option> + <option value="edit">Edit</option> + </param> + <when value="edit"> + <param format="integer" name="range" type="integer" label="Range --range" value="0" size="5" help="Manually set the length of the 5' and 3' UTRs 0>50000"> + <validator type="in_range" min="0" max="50000" message="Please enter a value between 0 and 50000"/> + </param> + <param format="integer" name="ch" type="integer" label="Cluster height --ch" value="2" size="10" help="Minimal height of a cluster"> + <validator type="in_range" min="1" message="Please enter a value >= 1"/> + </param> + <param format="integer" name="cl" type="integer" label="Cluster length --cl" value="1" size="10" help="Maximum length of a cluster"> + <validator type="in_range" min="1" message="Please enter a value >= 1"/> + </param> + <param format="integer" name="cic" type="integer" label="cDNAs in clusters --cic" value="2" size="10" > + <validator type="in_range" min="2" message="Please enter a value >= 1"/> + </param> + <param format="integer" name="co" type="integer" label="cDNA-cluster nucleotide overlap --co" value="1" size="10" > + <validator type="in_range" min="1" message="Please enter a value >= 1"/> + </param> + <param format="integer" name="mutsfreq" type="integer" label="Minimum mutation frequency for a cluster position --mutsfreq" value="0" size="3" > + <validator type="in_range" min="0" max="100" message="Please enter a value between 0 and 100"/> + </param> + </when> + <when value="default"> + </when> + </conditional> + <param name="label" type="text" format="txt" size="30" value="pyClusterReads" label="Enter output file label -o" /> + </inputs> + <outputs> + <data format="gtf" name="output" label="${label.value}_clusters.gtf"/> + </outputs> + <help> + +.. class:: infomark + +**pyClusterReads** + +pyClusterReads is part of the pyCRAC_ package. Takes a reads_count_output GTF file from pyReadCounters generates clusters from the interval coordinates. +Produces a GTF output file with cluster intervals and overlapping genomic features. +It also includes mutation frequencies (after the # character) for nucleotides in intervals using chromosomal coordinates +The pyClusterReads GTF output file essentially has the same layout as other pyCRAC GTF output files. + +**NOTE!** By default it calls each cluster an "exon" but this has no meaning. It may overlap with an intron. +Use bedtools to extract those intervals that overlap with introns or other features + +The maximum height of the cluster is indicated in column 8. +The hash character at the end of each line (#) shows chromosomal coordinates of mutated nucleotides within the cluster interval and their mutation frequencies. + +For example:: + + # 114099S100.0 + +indicates that 100% of the nucleotides in position 114099 were substituted in the cluster. + +An example of a pyClusterReads output file:: + + ##gff-version 2 + # generated by pyClusterReads.py version 0.0.1, Fri Jan 18 11:59:42 2013 + # pyClusterReads.py -f count_output_reads.gtf -o count_output_clusters.gtf -v + # chromosome feature source start end cDNAs strand height attributes + chrI cluster exon 112583 112643 6 - 5 gene_id "INT_0_114,YAL021C"; gene_name "INT_0_114,CCR4"; # 112612S75.0; + chrI cluster exon 113176 113232 3 - 3 gene_id "INT_0_114,YAL021C"; gene_name "INT_0_114,CCR4"; # 113184S100.0; + chrI cluster exon 113334 113386 2 - 2 gene_id "INT_0_114,YAL021C"; gene_name "INT_0_114,CCR4"; # 113349S50.0,113379S100.0; + chrI cluster exon 113534 113564 3 - 3 gene_id "INT_0_119,INT_0_114"; gene_name "INT_0_119,INT_0_114"; # 113554S33.3,113556S33.3,113557S33.3; + chrI cluster exon 113644 113691 5 - 4 gene_id "YAL020C,INT_0_114"; gene_name "ATS1,INT_0_114"; # 113649S50.0,113657S33.3,113679S25.0 + chrI cluster exon 113912 113958 2 - 2 gene_id "YAL020C,INT_0_114"; gene_name "ATS1,INT_0_114"; # 113932S50.0,113946S50.0; + chrI cluster exon 113966 114066 5 - 3 gene_id "YAL020C,INT_0_114"; gene_name "ATS1,INT_0_114"; # 113987S50.0,114033S33.3,114039S33.3; + chrI cluster exon 114067 114130 3 - 3 gene_id "YAL020C,INT_0_114"; gene_name "ATS1,INT_0_114"; # 114099S100.0; + +.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html + +------ + +**Parameter list** + + +File input options:: + + -f reads.gtf, --input_file=reads.gtf + provide the path to your GTF read data file. NOTE the + file has to be correctly sorted! If you used + pyReadCounters to generate the file you should be + fine. If you modified it, use the sort command + described in the manual to sort your file first by + chromosome, then by strand and then by start position. + -o clusters.gtf, --output_file=clusters.gtf + provide a name for an output file. By default it + writes to the standard output + --gtf=Yourfavoritegtf.gtf + type the path to the gtf annotation file that you want + to use + +Common pyCRAC options:: + + -r 100, --range=100 + allows you to set the length of the UTR regions. If + you set '-r 50' or '--range=50', then the program will + set a fixed length (50 bp) regardless of whether the + GTF annotation file has genes with annotated UTRs. + -a protein_coding, --annotation=protein_coding + select which annotation (i.e. protein_coding, ncRNA, + sRNA, rRNA,snoRNA,snRNA, depending on the source of + your GTF file) you would like to focus your analysis + on. Default = all annotations + +Options for cluster analysis:: + + --cic=2, --cdnasinclusters=2 + sets the minimal number of overlapping cDNAs in each + cluster. Default = 2 + --co=5, --clusteroverlap=5 + sets the number of nucleotides cDNA sequences have to + overlap to form a cluster. Default = 1 nucleotide + --ch=5, --clusterheight=5 + sets the minimal height of the cluster. Default = 2 + nucleotides + --cl=100, --clusterlength=100 + to set the maximum cluster sequence length + --mutsfreq=10, --mutationfrequency=10 + sets the minimal mutations frequency for a cluster + position in the GTF output file. Default = 0%. + Example: if the mutsfrequency is set at 10 and a + cluster position has a mutated in less than 10% of the + reads, then the mutation will not be reported. + </help> +</tool> \ No newline at end of file