view pyCRAC/pyBinCollector.xml @ 1:7c9574213c0a draft default tip

Uploaded
author swebb
date Thu, 20 Jun 2013 12:13:43 -0400
parents 19b20927172d
children
line wrap: on
line source

 <tool id ="pyBinCollector" name="pyBinCollector">
	<requirements>
        	<requirement type="package">pyCRAC</requirement>
    	</requirements>
	<command interpreter="perl"> 
	pyBinCollector.pl
	-f $input
	--gtf $addGTF.gtf
        #if $addGTF.gtfFile == "default" and $addGTF.annotate.annotations == "auto":
		--annotation $addGTF.annotate.scan.annotation
	#else:
		--annotation $addGTF.annotate.annotation
	#end if#
	#if $addOpt.options == "edit":
	        --options
	        --range $addOpt.range
		--min_length $addOpt.min_length
		--max_length $addOpt.max_length
		--numberofbins $addOpt.numberofbins
		-s $addOpt.sequence
		#if $addOpt.limitBins.binselect == "yes":
			--bins1 $addOpt.limitBins.bs_first 
			--bins2 $addOpt.limitBins.bs_last
		#end if#
		$addOpt.ignore
		$addOpt.oall.outputall
	#end if#
	-o "$input.name"	
	#if $addOpt.options == "edit" and $addOpt.oall.outputall == "--outputall":
	   --id $sd.id
	   --sd $sd
	   --ssub $ssub
	   --sdel $sdel
	   --asd $asd
	   --assub $assub
	   --asdel $asdel
	#else:
	   --out $out
	   --id $out.id
	#end if#
	</command>
	<version_command>/usr/local/bin/pyBinCollector.py --version</version_command>
	<inputs>
		<param format="gtf" name="input" type="data" label="Input File -f" help="pyReadCounters or pyMotif gtf output files" />

		<conditional name="addGTF">
			<param name="gtfFile" type="select"  label="Choose GTF File from">
				<option value="default" selected="true">Defaults</option>
				<option value="other">History</option>
			</param>	
			<when value="default">
				<param name="gtf" type="select"  label="GTF File --gtf" help="GTF file containing gene ID co-ordinates">
					<options from_data_table="pycrac_gtf"/>
				</param>

				<conditional name="annotate">
				<param name="annotations" type="select"  label="Select annotation">
					<option value="all" selected="true">All</option>
					<option value="manual">Enter in text box</option>
					<option value="auto">Scan pyGetGTFSources file</option>
				</param>	
				<when value="all">
					<param name="annotation" type="hidden" format="txt" size="10" value="all"/>
				</when>
				<when value="manual">
					<param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool">
						<validator type="empty_field" message="Please enter a value"/>			
					</param>
				</when>
				<when value="auto">
					<param format="tabular" name="gtf_annotation" type="data" label="GTF annotation File (pyGetGTFSources output)" help="Tabular file containing unique list of annotations/sources in selected GTF file. Refer to pyGetGTFSources"/>	
						<conditional name="scan">
						<param name="annotations" type="select"  label="Scan this file for annotations" help="Choose the correct GTF file then choose GO">
							<option value="wait" selected="true">Waiting</option>
							<option value="scanning">Go</option>
						</param>	
						<when value="wait">
						</when>
						<when value="scanning">
						<param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation">
							  <options from_dataset="gtf_annotation">
							    <column name="name" index="0"/>
							    <column name="value" index="0"/>
							  </options>
						</param>      
						</when>
						</conditional>
				</when>
				</conditional>
		
			</when>
			<when value="other">
				<param format="gtf" name="gtf" type="data" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"/>
				<conditional name="annotate">
				<param name="annotations" type="select"  label="Select annotation">
					<option value="all" selected="true">All</option>
					<option value="manual">Enter in text box</option>
					<option value="auto">Scan selected file</option>
				</param>	
				<when value="all">
					<param name="annotation" type="hidden" format="txt" size="10" value="all"/>
				</when>
				<when value="manual">
					<param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool">
						<validator type="empty_field" message="Please enter a value"/>			
					</param>
				</when>
				<when value="auto">
					<param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation">
						  <options from_dataset="gtf">
						    <column name="name" index="1"/>
						    <column name="value" index="1"/>
						    <filter type="unique_value" name="unique" column="1"/>
						  </options>
					</param>      
				</when>
				</conditional>
			</when>
		</conditional>


		<conditional name="addOpt">
			<param name="options" type="select"  label="Options">
				<option value="default" selected="true">Default</option>
				<option value="edit">Edit</option>
			</param>
			<when value="edit">
			        <param format="integer" name="range" type="integer" label="Range --range" value="0" size="5" help="Manually set the length of the 5' and 3' UTRs 0>50000">
				  <validator type="in_range" min="0" max="50000" message="Please enter a value between 0 and 50000"/>
				</param>
				<param format="integer" name="numberofbins" type="integer" label="Set the number of bins --numberofbins" value="20" size="7" help="Set the number of bins you want to divide the genes into">
					<validator type="in_range" min="20" max="1000" message="Please enter a value between 20 and 1000"/>
				</param>
				<param format="integer" name="min_length" type="integer" label="Set the minimum gene length (nt) --min_length" value="50" size="7" help="To filter the data for gene length (nucleotides)" >
					<validator type="in_range" min="20" message="Please enter a value greater than 20"/>
				</param>
				<param format="integer" name="max_length" type="integer" label="Set the maximum gene length (nt) --max_length" help="Default = 100000000" value="100000000" size="10" >
					<validator type="in_range" min="50" max="100000000" message="Please enter a value between 50 and 100000000"/>
				</param>
				<param name="sequence" type="select" label="What sequences do you want to run pyBinCollector on? --sequence">
					<option value="genomic" selected="true">Genomic Sequence</option>
					<option value="coding">Coding Sequence</option>
					<option value="intron">Introns</option>
                                        <option value="exon">Exons</option>
                                        <option value="CDS">CDS</option>
                                        <option value="5UTR">5UTR</option>
                                        <option value="3UTR">3UTR</option>
				</param>
				<conditional name="limitBins">
					<param name="binselect" type="select"  label="Select sequences that map to specific bins --binselect">
						<option value="no" selected="true">No</option>
						<option value="yes">Yes</option>
					</param>
				<when value="yes">
					<param format="integer" name="bs_first" type="integer" label="Select First Bin" value="1" size="7">
						<validator type="in_range" min="1" message="Please enter a value greater than 0"/>
					</param>
					<param format="integer" name="bs_last" type="integer" label="Select Last Bin" value="2" size="7">
						<validator type="in_range" min="2" message="Please enter a value greater than 0"/>
					</param>
				</when>
				<when value="no">
				</when>
				</conditional>
                                <param name="ignore" type="select" label="Ignore strand information? --ignorestrand">
                                        <option value="" selected="true">No</option>
                                        <option value="--ignorestrand">Yes</option>
                                </param>		
				<conditional name="oall">
				<param name="outputall" type="select" label="Output all genes --outputall" help="output the normalized distribution for each individual gene, rather than making a cumulative coverage plot">
                                        <option value="" selected="true">No</option>
                                        <option value="--outputall">Yes</option>
				</param>
				<when value="--outputall"/>
				<when value=""/>
				</conditional>
			</when>	
			<when value="default">
			</when>
		</conditional>
		<param name="label" type="text" format="txt" size="30" value="pyBinCollector" label="Enter output file label -o" />
	</inputs>

	<outputs>
		<data format="gtf" name="out" label="${label.value}.gtf">
                        <filter>addOpt['oall']['outputall'] == ""</filter>
		</data>
                <data format="txt" name="sd" label="sense_data_${label.value}.txt">
                        <filter>addOpt['options'] == "edit" and addOpt['oall']['outputall'] == "--outputall"</filter>		  
		</data>
                <data format="txt" name="ssub" label="sense_subs_${label.value}.txt">
                         <filter>addOpt['options'] == "edit" and addOpt['oall']['outputall'] == "--outputall"</filter>
                </data>
                <data format="txt" name="sdel" label="sense_dels_${label.value}.txt">
                        <filter>addOpt['options'] == "edit" and addOpt['oall']['outputall'] == "--outputall"</filter>
		</data>
                <data format="txt" name="asd" label="anti_sense_data_${label.value}.txt">
                        <filter>addOpt['options'] == "edit" and addOpt['oall']['outputall'] == "--outputall"</filter>
                </data>
                <data format="txt" name="assub" label="anti_sense_subs_${label.value}.txt">
                        <filter>addOpt['options'] == "edit" and addOpt['oall']['outputall'] == "--outputall"</filter>
                </data>
                <data format="txt" name="asdel" label="anti_sense_dels_${label.value}.txt">
                        <filter>addOpt['options'] == "edit" and addOpt['oall']['outputall'] == "--outputall"</filter>
                </data>
	</outputs>
	<help>


.. class:: infomark

**pyBinCollector**

pyBinCollector is part of the pyCRAC_ package. Allows the user to generate genome-wide coverage plots. Normalises gene lengths by dividing genes into a 
fixed number of bins and then calculates the hit density in each bin. The program also allows the user to input specific bin numbers to extract 
blocks/clusters present in these bins.

   
.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
        
------

**Parameter list**

File input options::

    -f FILE, --input_file=FILE
                        Provide the path and name of the pyReadCounters.py or
                        pyMotif.py GTF file. By default the program expects
                        data from the standard input.
    -o OUTPUT_FILE, --output_file=OUTPUT_FILE
                        To set an output file name. Do not add a file
                        extension. By default, if the --outputall flag is not
                        used, the program writes to the standard output.
    --gtf=yeast.gtf     
                        type the path to the gtf annotation file that you want
                        to use. Default is /usr/local/pyCRAC/db/Saccharomyces_
                        cerevisiae.EF2.59.1.2.gtf

pyBinCollector.py specific options::

    -a protein_coding, --annotation=protein_coding
                        select which annotation (i.e. protein_coding, ncRNA,
                        sRNA, rRNA, tRNA, snoRNA, all) you would like to focus
                        your search on. Default = all
    --min_length=20     
                        to set a minimum length threshold for genes. Genes
                        shorter than the minimal length will be discarded.
                        Default = 1
    --max_length=10000  
                        to set a maximum length threshold for genes. Genes
                        larger than the maximum length will be discarded.
                        Default = 100000000
    -n 20, --numberofbins=20
                        select the number of bins you want to generate.
                        Default=20
    --binselect=2 4     
                        allows selection of sequences that were mapped to
                        specific bins. This option expects two numbers, one
                        for each bin, separated by a space. For example:
                        --binselect 20 30.
    --outputall         
                        use this flag to output the normalized distribution
                        for each individual gene, rather than making a
                        cumulative coverage plot. Useful for making box plots
                        or for making heat maps.

Common options::

    -r 100, --range=100
                        allows you to set the length of the UTR regions. If
                        you set '-r 50' or '--range=50', then the program will
                        set a fixed length (50 bp) regardless of whether the
                        GTF file has genes with annotated UTRs.
    -s intron, --sequence=intron
                        with this option you can select whether you want to
                        generate bins from the coding or genomic sequence or
                        introns,exon,CDS, or UTR coordinates. Default =
                        genomic
    --ignorestrand      
                        To ignore strand information and all reads overlapping
                        with genomic features will be considered sense reads.
                        Useful for analysing ChIP or RIP data



 

	</help>
</tool>