annotate aggregate.xml @ 5:9bc8cfd2ab08 draft

Uploaded
author kaymccoy
date Sun, 11 Dec 2016 17:01:36 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
5
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
1 <tool id="aggregate" name="Aggregate">
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
2 <description>fitness calculations by gene</description>
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
3 <requirements>
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
4 <requirement type="package" version="1.64">biopython</requirement>
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
5 </requirements>
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
6 <command interpreter="python">
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
7 aggregate.py
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
8 #if $mark.certain == "yes":
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
9 -m $mark.genes
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
10 #end if
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
11 #if $weighted.algorithms == "yes":
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
12 -w 1
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
13 #end if
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
14 -x $cutoff
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
15 -l $weightceiling
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
16 #if $blank.count == "yes":
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
17 -b $blank.custom_blanks
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
18 #end if
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
19 #if $blank.count == "no":
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
20 -f $blank.txt_blanks
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
21 #end if
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
22 -c $ref
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
23 -o $output
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
24 $input
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
25 #for $a in $additionalcsv
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
26 ${a.input2}
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
27 #end for
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
28 </command>
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
29 <inputs>
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
30 <param name="input" type="data" label="csv fitness file"/>
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
31 <repeat name="additionalcsv" title="Additional csv fitness file(s)">
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
32 <param name="input2" type="data" label="Select" />
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
33 </repeat>
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
34 <param name="ref" type="data" label="GenBank reference genome"/>
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
35 <conditional name="mark">
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
36 <param name="certain" type="select" label="Mark certain genes?">
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
37 <option value="no">No</option>
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
38 <option value="yes">Yes</option>
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
39 </param>
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
40 <when value="no">
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
41 <!-- do nothing -->
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
42 </when>
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
43 <when value="yes">
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
44 <param name="genes" type="data" label="Genes to mark" />
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
45 </when>
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
46 </conditional>
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
47 <conditional name="weighted">
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
48 <param name="algorithms" type="select" label="Use weighted algorithms?">
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
49 <option value="no">No</option>
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
50 <option value="yes">Yes</option>
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
51 </param>
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
52 <when value="-w 1 "/>
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
53 <when value=""/>
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
54 </conditional>
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
55 <param name="weightceiling" type="float" value="50.0" label="Weight ceiling"/>
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
56 <param name="cutoff" type="float" value="10.0" label="Cutoff3"/>
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
57 <conditional name="blank">
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
58 <param name="count" type="select" label="Enter custom bottleneck correction value?">
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
59 <option value="no">No</option>
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
60 <option value="yes">Yes</option>
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
61 </param>
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
62 <when value="no">
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
63 <param name="txt_blanks" type="data" label="txt output from Calc_fit or Consol_fit"/>
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
64 </when>
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
65 <when value="yes">
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
66 <param name="custom_blanks" type="float" value="0.0" label="blank count (a number from 0.0 to 1.0)"/>
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
67 </when>
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
68 </conditional>
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
69 </inputs>
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
70 <outputs>
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
71 <data name="output" format="csv"/>
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
72 </outputs>
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
73 <help>
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
74
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
75 **What it does**
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
76
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
77 This tool calculates the aggregate fitness values of mutations by gene.
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
78
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
79 **The options explained**
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
80
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
81 The csv fitness file(s): These are the csv (comma separated values) files containing the fitness values you want to aggregate by gene. Since they should have been produced by the "Calculate Fitness" tool, each line besides the header should represent the following information for an insertion location: position,strand,count_1,count_2,ratio,mt_freq_t1,mt_freq_t2,pop_freq_t1,pop_freq_t2,gene,D,W,nW
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
82
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
83 GenBank reference genome: the reference genome of whatever model you're working with, which needs to be in standard genbank format. For more on that format see the genbank website.
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
84
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
85 Marking certain genes: If you chose to mark certain genes, those genes will have an "M" under the M column of the resulting aggregate file.
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
86
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
87 Using weighted algorithms: Recommended. If you chose to use weighted algorithms, scores will be weighted by the number of reads their insertion location has, as insertions with more reads tend to be more accurate.
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
88
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
89 Weight ceiling: This value lets you set a weight ceiling for the weights of fitness values. It's only relevant if you're using weighted algorithms.
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
90
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
91 Cutoff3: This value lets you ignore the fitness scores of any insertion locations with an average count (the number of counts from t1 and t2 divided by 2) less than it.
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
92
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
93 Bottleneck value: The percentage of insertions randomly lost, which will be discounted for all genes (for example, 20% would be entered as 0.20; default 0 if entered by hand). You can just use the blank % calculated from the normalization genes by calc_fit by entering its txt output file
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
94
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
95 The name of your output file: self-explanatory. Remember to have it end in ".csv".
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
96
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
97 **Additional notes**
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
98
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
99 The output file should have each line (besides the header) represent the following information for a particular gene: locus,mean,var,sd,se,gene,Total,Blank,Not Blank,Blank Removed,M
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
100
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
101 </help>
9bc8cfd2ab08 Uploaded
kaymccoy
parents:
diff changeset
102 </tool>