Mercurial > repos > swebb > pycrac
diff pyCRAC/pyBinCollector.xml @ 0:19b20927172d draft
Uploaded
author | swebb |
---|---|
date | Tue, 18 Jun 2013 09:11:00 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyCRAC/pyBinCollector.xml Tue Jun 18 09:11:00 2013 -0400 @@ -0,0 +1,290 @@ + <tool id ="pyBinCollector" name="pyBinCollector"> + <requirements> + <requirement type="package">pyCRAC</requirement> + </requirements> + <command interpreter="perl"> + pyBinCollector.pl + -f $input + --gtf $addGTF.gtf + #if $addGTF.gtfFile == "default" and $addGTF.annotate.annotations == "auto": + --annotation $addGTF.annotate.scan.annotation + #else: + --annotation $addGTF.annotate.annotation + #end if# + #if $addOpt.options == "edit": + --options + --range $addOpt.range + --min_length $addOpt.min_length + --max_length $addOpt.max_length + --numberofbins $addOpt.numberofbins + -s $addOpt.sequence + #if $addOpt.limitBins.binselect == "yes": + --bins1 $addOpt.limitBins.bs_first + --bins2 $addOpt.limitBins.bs_last + #end if# + $addOpt.ignore + $addOpt.oall.outputall + #end if# + -o "$input.name" + #if $addOpt.options == "edit" and $addOpt.oall.outputall == "--outputall": + --id $sd.id + --sd $sd + --ssub $ssub + --sdel $sdel + --asd $asd + --assub $assub + --asdel $asdel + #else: + --out $out + --id $out.id + #end if# + </command> + <version_command>/usr/local/bin/pyBinCollector.py --version</version_command> + <inputs> + <param format="gtf" name="input" type="data" label="Input File -f" help="pyReadCounters or pyMotif gtf output files" /> + + <conditional name="addGTF"> + <param name="gtfFile" type="select" label="Choose GTF File from"> + <option value="default" selected="true">Defaults</option> + <option value="other">History</option> + </param> + <when value="default"> + <param name="gtf" type="select" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"> + <options from_data_table="pycrac_gtf"/> + </param> + + <conditional name="annotate"> + <param name="annotations" type="select" label="Select annotation"> + <option value="all" selected="true">All</option> + <option value="manual">Enter in text box</option> + <option value="auto">Scan pyGetGTFSources file</option> + </param> + <when value="all"> + <param name="annotation" type="hidden" format="txt" size="10" value="all"/> + </when> + <when value="manual"> + <param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool"> + <validator type="empty_field" message="Please enter a value"/> + </param> + </when> + <when value="auto"> + <param format="tabular" name="gtf_annotation" type="data" label="GTF annotation File (pyGetGTFSources output)" help="Tabular file containing unique list of annotations/sources in selected GTF file. Refer to pyGetGTFSources"/> + <conditional name="scan"> + <param name="annotations" type="select" label="Scan this file for annotations" help="Choose the correct GTF file then choose GO"> + <option value="wait" selected="true">Waiting</option> + <option value="scanning">Go</option> + </param> + <when value="wait"> + </when> + <when value="scanning"> + <param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation"> + <options from_dataset="gtf_annotation"> + <column name="name" index="0"/> + <column name="value" index="0"/> + </options> + </param> + </when> + </conditional> + </when> + </conditional> + + </when> + <when value="other"> + <param format="gtf" name="gtf" type="data" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"/> + <conditional name="annotate"> + <param name="annotations" type="select" label="Select annotation"> + <option value="all" selected="true">All</option> + <option value="manual">Enter in text box</option> + <option value="auto">Scan selected file</option> + </param> + <when value="all"> + <param name="annotation" type="hidden" format="txt" size="10" value="all"/> + </when> + <when value="manual"> + <param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool"> + <validator type="empty_field" message="Please enter a value"/> + </param> + </when> + <when value="auto"> + <param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation"> + <options from_dataset="gtf"> + <column name="name" index="1"/> + <column name="value" index="1"/> + <filter type="unique_value" name="unique" column="1"/> + </options> + </param> + </when> + </conditional> + </when> + </conditional> + + + <conditional name="addOpt"> + <param name="options" type="select" label="Options"> + <option value="default" selected="true">Default</option> + <option value="edit">Edit</option> + </param> + <when value="edit"> + <param format="integer" name="range" type="integer" label="Range --range" value="0" size="5" help="Manually set the length of the 5' and 3' UTRs 0>50000"> + <validator type="in_range" min="0" max="50000" message="Please enter a value between 0 and 50000"/> + </param> + <param format="integer" name="numberofbins" type="integer" label="Set the number of bins --numberofbins" value="20" size="7" help="Set the number of bins you want to divide the genes into"> + <validator type="in_range" min="20" max="1000" message="Please enter a value between 20 and 1000"/> + </param> + <param format="integer" name="min_length" type="integer" label="Set the minimum gene length (nt) --min_length" value="50" size="7" help="To filter the data for gene length (nucleotides)" > + <validator type="in_range" min="20" message="Please enter a value greater than 20"/> + </param> + <param format="integer" name="max_length" type="integer" label="Set the maximum gene length (nt) --max_length" help="Default = 100000000" value="100000000" size="10" > + <validator type="in_range" min="50" max="100000000" message="Please enter a value between 50 and 100000000"/> + </param> + <param name="sequence" type="select" label="What sequences do you want to run pyBinCollector on? --sequence"> + <option value="genomic" selected="true">Genomic Sequence</option> + <option value="coding">Coding Sequence</option> + <option value="intron">Introns</option> + <option value="exon">Exons</option> + <option value="CDS">CDS</option> + <option value="5UTR">5UTR</option> + <option value="3UTR">3UTR</option> + </param> + <conditional name="limitBins"> + <param name="binselect" type="select" label="Select sequences that map to specific bins --binselect"> + <option value="no" selected="true">No</option> + <option value="yes">Yes</option> + </param> + <when value="yes"> + <param format="integer" name="bs_first" type="integer" label="Select First Bin" value="1" size="7"> + <validator type="in_range" min="1" message="Please enter a value greater than 0"/> + </param> + <param format="integer" name="bs_last" type="integer" label="Select Last Bin" value="2" size="7"> + <validator type="in_range" min="2" message="Please enter a value greater than 0"/> + </param> + </when> + <when value="no"> + </when> + </conditional> + <param name="ignore" type="select" label="Ignore strand information? --ignorestrand"> + <option value="" selected="true">No</option> + <option value="--ignorestrand">Yes</option> + </param> + <conditional name="oall"> + <param name="outputall" type="select" label="Output all genes --outputall" help="output the normalized distribution for each individual gene, rather than making a cumulative coverage plot"> + <option value="" selected="true">No</option> + <option value="--outputall">Yes</option> + </param> + <when value="--outputall"/> + <when value=""/> + </conditional> + </when> + <when value="default"> + </when> + </conditional> + <param name="label" type="text" format="txt" size="30" value="pyBinCollector" label="Enter output file label -o" /> + </inputs> + + <outputs> + <data format="gtf" name="out" label="${label.value}.gtf"> + <filter>addOpt['oall']['outputall'] == ""</filter> + </data> + <data format="txt" name="sd" label="sense_data_${label.value}.txt"> + <filter>addOpt['options'] == "edit" and addOpt['oall']['outputall'] == "--outputall"</filter> + </data> + <data format="txt" name="ssub" label="sense_subs_${label.value}.txt"> + <filter>addOpt['options'] == "edit" and addOpt['oall']['outputall'] == "--outputall"</filter> + </data> + <data format="txt" name="sdel" label="sense_dels_${label.value}.txt"> + <filter>addOpt['options'] == "edit" and addOpt['oall']['outputall'] == "--outputall"</filter> + </data> + <data format="txt" name="asd" label="anti_sense_data_${label.value}.txt"> + <filter>addOpt['options'] == "edit" and addOpt['oall']['outputall'] == "--outputall"</filter> + </data> + <data format="txt" name="assub" label="anti_sense_subs_${label.value}.txt"> + <filter>addOpt['options'] == "edit" and addOpt['oall']['outputall'] == "--outputall"</filter> + </data> + <data format="txt" name="asdel" label="anti_sense_dels_${label.value}.txt"> + <filter>addOpt['options'] == "edit" and addOpt['oall']['outputall'] == "--outputall"</filter> + </data> + </outputs> + <help> + + +.. class:: infomark + +**pyBinCollector** + +pyBinCollector is part of the pyCRAC_ package. Allows the user to generate genome-wide coverage plots. Normalises gene lengths by dividing genes into a +fixed number of bins and then calculates the hit density in each bin. The program also allows the user to input specific bin numbers to extract +blocks/clusters present in these bins. + + +.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html + +------ + +**Parameter list** + +File input options:: + + -f FILE, --input_file=FILE + Provide the path and name of the pyReadCounters.py or + pyMotif.py GTF file. By default the program expects + data from the standard input. + -o OUTPUT_FILE, --output_file=OUTPUT_FILE + To set an output file name. Do not add a file + extension. By default, if the --outputall flag is not + used, the program writes to the standard output. + --gtf=yeast.gtf + type the path to the gtf annotation file that you want + to use. Default is /usr/local/pyCRAC/db/Saccharomyces_ + cerevisiae.EF2.59.1.2.gtf + +pyBinCollector.py specific options:: + + -a protein_coding, --annotation=protein_coding + select which annotation (i.e. protein_coding, ncRNA, + sRNA, rRNA, tRNA, snoRNA, all) you would like to focus + your search on. Default = all + --min_length=20 + to set a minimum length threshold for genes. Genes + shorter than the minimal length will be discarded. + Default = 1 + --max_length=10000 + to set a maximum length threshold for genes. Genes + larger than the maximum length will be discarded. + Default = 100000000 + -n 20, --numberofbins=20 + select the number of bins you want to generate. + Default=20 + --binselect=2 4 + allows selection of sequences that were mapped to + specific bins. This option expects two numbers, one + for each bin, separated by a space. For example: + --binselect 20 30. + --outputall + use this flag to output the normalized distribution + for each individual gene, rather than making a + cumulative coverage plot. Useful for making box plots + or for making heat maps. + +Common options:: + + -r 100, --range=100 + allows you to set the length of the UTR regions. If + you set '-r 50' or '--range=50', then the program will + set a fixed length (50 bp) regardless of whether the + GTF file has genes with annotated UTRs. + -s intron, --sequence=intron + with this option you can select whether you want to + generate bins from the coding or genomic sequence or + introns,exon,CDS, or UTR coordinates. Default = + genomic + --ignorestrand + To ignore strand information and all reads overlapping + with genomic features will be considered sense reads. + Useful for analysing ChIP or RIP data + + + + + + </help> +</tool>