Mercurial > repos > swebb > pycrac
view pyCRAC/pyBinCollector.xml @ 0:19b20927172d draft
Uploaded
author | swebb |
---|---|
date | Tue, 18 Jun 2013 09:11:00 -0400 |
parents | |
children |
line wrap: on
line source
<tool id ="pyBinCollector" name="pyBinCollector"> <requirements> <requirement type="package">pyCRAC</requirement> </requirements> <command interpreter="perl"> pyBinCollector.pl -f $input --gtf $addGTF.gtf #if $addGTF.gtfFile == "default" and $addGTF.annotate.annotations == "auto": --annotation $addGTF.annotate.scan.annotation #else: --annotation $addGTF.annotate.annotation #end if# #if $addOpt.options == "edit": --options --range $addOpt.range --min_length $addOpt.min_length --max_length $addOpt.max_length --numberofbins $addOpt.numberofbins -s $addOpt.sequence #if $addOpt.limitBins.binselect == "yes": --bins1 $addOpt.limitBins.bs_first --bins2 $addOpt.limitBins.bs_last #end if# $addOpt.ignore $addOpt.oall.outputall #end if# -o "$input.name" #if $addOpt.options == "edit" and $addOpt.oall.outputall == "--outputall": --id $sd.id --sd $sd --ssub $ssub --sdel $sdel --asd $asd --assub $assub --asdel $asdel #else: --out $out --id $out.id #end if# </command> <version_command>/usr/local/bin/pyBinCollector.py --version</version_command> <inputs> <param format="gtf" name="input" type="data" label="Input File -f" help="pyReadCounters or pyMotif gtf output files" /> <conditional name="addGTF"> <param name="gtfFile" type="select" label="Choose GTF File from"> <option value="default" selected="true">Defaults</option> <option value="other">History</option> </param> <when value="default"> <param name="gtf" type="select" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"> <options from_data_table="pycrac_gtf"/> </param> <conditional name="annotate"> <param name="annotations" type="select" label="Select annotation"> <option value="all" selected="true">All</option> <option value="manual">Enter in text box</option> <option value="auto">Scan pyGetGTFSources file</option> </param> <when value="all"> <param name="annotation" type="hidden" format="txt" size="10" value="all"/> </when> <when value="manual"> <param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool"> <validator type="empty_field" message="Please enter a value"/> </param> </when> <when value="auto"> <param format="tabular" name="gtf_annotation" type="data" label="GTF annotation File (pyGetGTFSources output)" help="Tabular file containing unique list of annotations/sources in selected GTF file. Refer to pyGetGTFSources"/> <conditional name="scan"> <param name="annotations" type="select" label="Scan this file for annotations" help="Choose the correct GTF file then choose GO"> <option value="wait" selected="true">Waiting</option> <option value="scanning">Go</option> </param> <when value="wait"> </when> <when value="scanning"> <param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation"> <options from_dataset="gtf_annotation"> <column name="name" index="0"/> <column name="value" index="0"/> </options> </param> </when> </conditional> </when> </conditional> </when> <when value="other"> <param format="gtf" name="gtf" type="data" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"/> <conditional name="annotate"> <param name="annotations" type="select" label="Select annotation"> <option value="all" selected="true">All</option> <option value="manual">Enter in text box</option> <option value="auto">Scan selected file</option> </param> <when value="all"> <param name="annotation" type="hidden" format="txt" size="10" value="all"/> </when> <when value="manual"> <param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool"> <validator type="empty_field" message="Please enter a value"/> </param> </when> <when value="auto"> <param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation"> <options from_dataset="gtf"> <column name="name" index="1"/> <column name="value" index="1"/> <filter type="unique_value" name="unique" column="1"/> </options> </param> </when> </conditional> </when> </conditional> <conditional name="addOpt"> <param name="options" type="select" label="Options"> <option value="default" selected="true">Default</option> <option value="edit">Edit</option> </param> <when value="edit"> <param format="integer" name="range" type="integer" label="Range --range" value="0" size="5" help="Manually set the length of the 5' and 3' UTRs 0>50000"> <validator type="in_range" min="0" max="50000" message="Please enter a value between 0 and 50000"/> </param> <param format="integer" name="numberofbins" type="integer" label="Set the number of bins --numberofbins" value="20" size="7" help="Set the number of bins you want to divide the genes into"> <validator type="in_range" min="20" max="1000" message="Please enter a value between 20 and 1000"/> </param> <param format="integer" name="min_length" type="integer" label="Set the minimum gene length (nt) --min_length" value="50" size="7" help="To filter the data for gene length (nucleotides)" > <validator type="in_range" min="20" message="Please enter a value greater than 20"/> </param> <param format="integer" name="max_length" type="integer" label="Set the maximum gene length (nt) --max_length" help="Default = 100000000" value="100000000" size="10" > <validator type="in_range" min="50" max="100000000" message="Please enter a value between 50 and 100000000"/> </param> <param name="sequence" type="select" label="What sequences do you want to run pyBinCollector on? --sequence"> <option value="genomic" selected="true">Genomic Sequence</option> <option value="coding">Coding Sequence</option> <option value="intron">Introns</option> <option value="exon">Exons</option> <option value="CDS">CDS</option> <option value="5UTR">5UTR</option> <option value="3UTR">3UTR</option> </param> <conditional name="limitBins"> <param name="binselect" type="select" label="Select sequences that map to specific bins --binselect"> <option value="no" selected="true">No</option> <option value="yes">Yes</option> </param> <when value="yes"> <param format="integer" name="bs_first" type="integer" label="Select First Bin" value="1" size="7"> <validator type="in_range" min="1" message="Please enter a value greater than 0"/> </param> <param format="integer" name="bs_last" type="integer" label="Select Last Bin" value="2" size="7"> <validator type="in_range" min="2" message="Please enter a value greater than 0"/> </param> </when> <when value="no"> </when> </conditional> <param name="ignore" type="select" label="Ignore strand information? --ignorestrand"> <option value="" selected="true">No</option> <option value="--ignorestrand">Yes</option> </param> <conditional name="oall"> <param name="outputall" type="select" label="Output all genes --outputall" help="output the normalized distribution for each individual gene, rather than making a cumulative coverage plot"> <option value="" selected="true">No</option> <option value="--outputall">Yes</option> </param> <when value="--outputall"/> <when value=""/> </conditional> </when> <when value="default"> </when> </conditional> <param name="label" type="text" format="txt" size="30" value="pyBinCollector" label="Enter output file label -o" /> </inputs> <outputs> <data format="gtf" name="out" label="${label.value}.gtf"> <filter>addOpt['oall']['outputall'] == ""</filter> </data> <data format="txt" name="sd" label="sense_data_${label.value}.txt"> <filter>addOpt['options'] == "edit" and addOpt['oall']['outputall'] == "--outputall"</filter> </data> <data format="txt" name="ssub" label="sense_subs_${label.value}.txt"> <filter>addOpt['options'] == "edit" and addOpt['oall']['outputall'] == "--outputall"</filter> </data> <data format="txt" name="sdel" label="sense_dels_${label.value}.txt"> <filter>addOpt['options'] == "edit" and addOpt['oall']['outputall'] == "--outputall"</filter> </data> <data format="txt" name="asd" label="anti_sense_data_${label.value}.txt"> <filter>addOpt['options'] == "edit" and addOpt['oall']['outputall'] == "--outputall"</filter> </data> <data format="txt" name="assub" label="anti_sense_subs_${label.value}.txt"> <filter>addOpt['options'] == "edit" and addOpt['oall']['outputall'] == "--outputall"</filter> </data> <data format="txt" name="asdel" label="anti_sense_dels_${label.value}.txt"> <filter>addOpt['options'] == "edit" and addOpt['oall']['outputall'] == "--outputall"</filter> </data> </outputs> <help> .. class:: infomark **pyBinCollector** pyBinCollector is part of the pyCRAC_ package. Allows the user to generate genome-wide coverage plots. Normalises gene lengths by dividing genes into a fixed number of bins and then calculates the hit density in each bin. The program also allows the user to input specific bin numbers to extract blocks/clusters present in these bins. .. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html ------ **Parameter list** File input options:: -f FILE, --input_file=FILE Provide the path and name of the pyReadCounters.py or pyMotif.py GTF file. By default the program expects data from the standard input. -o OUTPUT_FILE, --output_file=OUTPUT_FILE To set an output file name. Do not add a file extension. By default, if the --outputall flag is not used, the program writes to the standard output. --gtf=yeast.gtf type the path to the gtf annotation file that you want to use. Default is /usr/local/pyCRAC/db/Saccharomyces_ cerevisiae.EF2.59.1.2.gtf pyBinCollector.py specific options:: -a protein_coding, --annotation=protein_coding select which annotation (i.e. protein_coding, ncRNA, sRNA, rRNA, tRNA, snoRNA, all) you would like to focus your search on. Default = all --min_length=20 to set a minimum length threshold for genes. Genes shorter than the minimal length will be discarded. Default = 1 --max_length=10000 to set a maximum length threshold for genes. Genes larger than the maximum length will be discarded. Default = 100000000 -n 20, --numberofbins=20 select the number of bins you want to generate. Default=20 --binselect=2 4 allows selection of sequences that were mapped to specific bins. This option expects two numbers, one for each bin, separated by a space. For example: --binselect 20 30. --outputall use this flag to output the normalized distribution for each individual gene, rather than making a cumulative coverage plot. Useful for making box plots or for making heat maps. Common options:: -r 100, --range=100 allows you to set the length of the UTR regions. If you set '-r 50' or '--range=50', then the program will set a fixed length (50 bp) regardless of whether the GTF file has genes with annotated UTRs. -s intron, --sequence=intron with this option you can select whether you want to generate bins from the coding or genomic sequence or introns,exon,CDS, or UTR coordinates. Default = genomic --ignorestrand To ignore strand information and all reads overlapping with genomic features will be considered sense reads. Useful for analysing ChIP or RIP data </help> </tool>