Mercurial > repos > swebb > pycrac
diff pyCRAC/pyMotif.xml @ 0:19b20927172d draft
Uploaded
author | swebb |
---|---|
date | Tue, 18 Jun 2013 09:11:00 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyCRAC/pyMotif.xml Tue Jun 18 09:11:00 2013 -0400 @@ -0,0 +1,222 @@ + <tool id ="pyMotif" name="pyMotif"> + <requirements> + <requirement type="package">pyCRAC</requirement> + </requirements> + <command interpreter="perl"> + pyMotif.pl + -f $input + --gtf=$addGTF.gtf + + #if $addGTF.gtfFile == "default" and $addGTF.annotate.annotations == "auto": + --annotation $addGTF.annotate.scan.annotation + #else: + --annotation $addGTF.annotate.annotation + #end if# + + --tab=$addTab.tab + + #if $addOpt.options == "edit": + --options + --k_min $addOpt.kmin + --k_max $addOpt.kmax + --numberofkmers=$addOpt.numberofkmers + --overlap $addOpt.overlap + --range $addOpt.range + #end if# + -o "$input.name" + --id $count.id + --count $count + --random $random + --features $features + --zscores $zscores + </command> + <version_command>/usr/local/bin/pyMotif.py --version</version_command> + <inputs> + <param format="gtf" name="input" type="data" label="Input File --input_file" help="File of type .gtf" /> + <conditional name="addTab"> + <param name="tabFile" type="select" label="Choose Genomic Reference Sequence from"> + <option value="default" selected="true">Defaults</option> + <option value="other">History</option> + </param> + <when value="default"> + <param name="tab" type="select" label="Genomic Reference Sequence --tab" help="Tab file containing genomic reference sequence"> + <options from_data_table="pycrac_tab"/> + </param> + </when> + <when value="other"> + <param format="tabular" name="tab" type="data" label="Genomic Reference Sequence --tab" help="Tab file containing genomic reference sequence"/> + </when> + </conditional> + <conditional name="addGTF"> + <param name="gtfFile" type="select" label="Choose GTF File from"> + <option value="default" selected="true">Defaults</option> + <option value="other">History</option> + </param> + <when value="default"> + <param name="gtf" type="select" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"> + <options from_data_table="pycrac_gtf"/> + </param> + + <conditional name="annotate"> + <param name="annotations" type="select" label="Select annotation"> + <option value="all" selected="true">All</option> + <option value="manual">Enter in text box</option> + <option value="auto">Scan pyGetGTFSources file</option> + </param> + <when value="all"> + <param name="annotation" type="hidden" format="txt" size="10" value="all"/> + </when> + <when value="manual"> + <param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool"> + <validator type="empty_field" message="Please enter a value"/> + </param> + </when> + <when value="auto"> + <param format="tabular" name="gtf_annotation" type="data" label="GTF annotation File (pyGetGTFSources output)" help="Tabular file containing unique list of annotations/sources in selected GTF file. Refer to pyGetGTFSources"/> + <conditional name="scan"> + <param name="annotations" type="select" label="Scan this file for annotations" help="Choose the correct GTF file then choose GO"> + <option value="wait" selected="true">Waiting</option> + <option value="scanning">Go</option> + </param> + <when value="wait"> + </when> + <when value="scanning"> + <param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation"> + <options from_dataset="gtf_annotation"> + <column name="name" index="0"/> + <column name="value" index="0"/> + </options> + </param> + </when> + </conditional> + </when> + </conditional> + </when> + <when value="other"> + <param format="gtf" name="gtf" type="data" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"/> + <conditional name="annotate"> + <param name="annotations" type="select" label="Select annotation"> + <option value="all" selected="true">All</option> + <option value="manual">Enter in text box</option> + <option value="auto">Scan selected file</option> + </param> + <when value="all"> + <param name="annotation" type="hidden" format="txt" size="10" value="all"/> + </when> + <when value="manual"> + <param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool"> + <validator type="empty_field" message="Please enter a value"/> + </param> + </when> + <when value="auto"> + <param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation"> + <options from_dataset="gtf"> + <column name="name" index="1"/> + <column name="value" index="1"/> + <filter type="unique_value" name="unique" column="1"/> + </options> + </param> + </when> + </conditional> + </when> + </conditional> + <conditional name="addOpt"> + <param name="options" type="select" label="Standard options"> + <option value="default" selected="true">Default</option> + <option value="edit">Edit</option> + </param> + <when value="edit"> + <param format="integer" name="kmin" type="integer" label="Minimum k-mer Length --k_min " value="4" size="6" help="Set the minimal k-mer length"> + <validator type="in_range" min="1" message="Please enter a value >= 1"/> + </param> + <param format="integer" name="kmax" type="integer" label="Maximum k-mer Length --k_min " value="8" size="6" help="Set the minimal k-mer length"> + <validator type="in_range" min="0" message="Please enter a value >= 0"/> + </param> + <param format="integer" name="numberofkmers" type="integer" label="Maximum number of k-mers in output file --numberofkmers" value="1000" size="6" help="Set the maximum number of k-mers in output"> + <validator type="in_range" min="0" message="Please enter a value >= 0"/> + </param> + <param format="integer" name="range" type="integer" label="Range --range" value="0" size="5" help="Manually set the length of the 5' and 3' UTRs 0>50000"> + <validator type="in_range" min="0" max="50000" message="Please enter a value between 0 and 50000"/> + </param> + <param format="integer" name="overlap" type="integer" label="Overlap --overlap" value="1" size="5" help="Sets the number of nucleotides a read has to overlap with a gene before it is considered a hit. "> + <validator type="in_range" min="1" message="Please enter a positive integer"/> + </param> + </when> + <when value="default"> + </when> + </conditional> + <param name="label" type="text" format="txt" size="30" value="pyMotif" label="Enter output file label -o" /> + </inputs> + + <outputs> + <data format="tabular" name="zscores" label="${label.value}_k-mer_Z_scores.txt"/> + <data format="tabular" name="count" label="${label.value}_data_k-mers_count.txt"/> + <data format="gtf" name="features" label="${label.value}_top_k-mers_in_features.gtf"/> + <data format="tabular" name="random" label="${label.value}_random_k-mers_count.txt"/> + </outputs> + <help> + +.. class:: infomark + +**pyMotif** + +pyMotif is part of the pyCRAC_ package. Looks for enriched sequence motifs in high-throughput sequencing data. Produces a GTF type output file +with coordinates and Z-scores for enriched motifs. The GTF file can be visualised in genome browsers. + +.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html + +------ + +**Parameter list** + +File input options:: + + -f intervals.gtf, --input_file=intervals.gtf + Provide the path to an interval gtf file. By default + it expects data from the standard input. + -o OUTPUT_FILE, --output_file=OUTPUT_FILE + Use this flag to override the standard file names. Do + NOT add an extension. + --gtf=annotation_file.gtf + type the path to the gtf annotation file that you want + to use + --tab=tab_file.tab + type the path to the tab file that contains the + genomic reference sequence + +pyMotif specific options:: + + --k_min=4 + this option allows you to set the shortest k-mer + length. Default = 4. + --k_max=6 + this option allows you to set the longest k-mer + length. Default = 8. + -n 100, --numberofkmers=100 + choose the maximum number of enriched k-mer sequences + you want to have reported in output files. Default = + 1000 + +pyCRAC common options:: + + -a protein_coding, --annotation=protein_coding + select which annotation (i.e. protein_coding, ncRNA, + sRNA, rRNA,snoRNA,snRNA, depending on the source of + your GTF file) you would like to focus your search on. + Default = all annotations + -r 100, --range=100 + allows you to add regions flanking the genomic + feature. If you set '-r 50' or '--range=50', then the + program will add 50 nucleotides to each feature on + each side regardless of whether the GTF file has genes + with annotated UTRs. + --overlap=1 + sets the number of nucleotides a motif has to overlap + with a genomic feature before it is considered a hit. + Default = 1 nucleotide + + + + + </help> +</tool>