annotate aggregate.xml @ 37:ceb1ee961db1 draft default tip

Uploaded
author kaymccoy
date Sun, 06 Nov 2016 20:30:31 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
37
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
1 <tool id="aggregate" name="Aggregate">
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
2 <description>fitness calculations by gene</description>
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
3 <requirements>
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
4 <requirement type="package" version="1.64">biopython</requirement>
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
5 </requirements>
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
6 <command interpreter="python">
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
7 aggregate.py
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
8 #if $mark.certain == "yes":
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
9 -m $mark.genes
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
10 #end if
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
11 #if $weighted.algorithms == "yes":
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
12 -w 1
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
13 #end if
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
14 -x $cutoff
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
15 -l $weightceiling
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
16 #if $blank.count == "yes":
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
17 -b $blank.custom_blanks
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
18 #end if
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
19 #if $blank.count == "no":
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
20 -f $blank.txt_blanks
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
21 #end if
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
22 -c $ref
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
23 -o $output
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
24 $input
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
25 #for $a in $additionalcsv
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
26 ${a.input2}
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
27 #end for
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
28 </command>
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
29 <inputs>
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
30 <param name="input" type="data" label="csv fitness file"/>
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
31 <repeat name="additionalcsv" title="Additional csv fitness file(s)">
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
32 <param name="input2" type="data" label="Select" />
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
33 </repeat>
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
34 <param name="ref" type="data" label="GenBank reference genome"/>
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
35 <conditional name="mark">
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
36 <param name="certain" type="select" label="Mark certain genes?">
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
37 <option value="no">No</option>
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
38 <option value="yes">Yes</option>
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
39 </param>
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
40 <when value="no">
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
41 <!-- do nothing -->
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
42 </when>
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
43 <when value="yes">
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
44 <param name="genes" type="data" label="Genes to mark" />
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
45 </when>
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
46 </conditional>
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
47 <conditional name="weighted">
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
48 <param name="algorithms" type="select" label="Use weighted algorithms?">
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
49 <option value="no">No</option>
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
50 <option value="yes">Yes</option>
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
51 </param>
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
52 <when value="-w 1 "/>
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
53 <when value=""/>
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
54 </conditional>
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
55 <param name="weightceiling" type="float" value="50.0" label="Weight ceiling"/>
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
56 <param name="cutoff" type="float" value="10.0" label="Cutoff3"/>
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
57 <conditional name="blank">
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
58 <param name="count" type="select" label="Enter custom bottleneck correction value?">
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
59 <option value="no">No</option>
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
60 <option value="yes">Yes</option>
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
61 </param>
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
62 <when value="no">
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
63 <param name="txt_blanks" type="data" label="txt output from Calc_fit or Consol_fit"/>
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
64 </when>
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
65 <when value="yes">
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
66 <param name="custom_blanks" type="float" value="0.0" label="blank count (a number from 0.0 to 1.0)"/>
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
67 </when>
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
68 </conditional>
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
69 </inputs>
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
70 <outputs>
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
71 <data name="output" format="csv"/>
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
72 </outputs>
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
73 <help>
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
74
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
75 **What it does**
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
76
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
77 This tool calculates the aggregate fitness values of mutations by gene.
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
78
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
79 **The options explained**
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
80
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
81 The csv fitness file(s): These are the csv (comma separated values) files containing the fitness values you want to aggregate by gene. Since they should have been produced by the "Calculate Fitness" tool, each line besides the header should represent the following information for an insertion location: position,strand,count_1,count_2,ratio,mt_freq_t1,mt_freq_t2,pop_freq_t1,pop_freq_t2,gene,D,W,nW
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
82
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
83 GenBank reference genome: the reference genome of whatever model you're working with, which needs to be in standard genbank format. For more on that format see the genbank website.
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
84
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
85 Marking certain genes: If you chose to mark certain genes, those genes will have an "M" under the M column of the resulting aggregate file.
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
86
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
87 Using weighted algorithms: Recommended. If you chose to use weighted algorithms, scores will be weighted by the number of reads their insertion location has, as insertions with more reads tend to be more accurate.
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
88
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
89 Weight ceiling: This value lets you set a weight ceiling for the weights of fitness values. It's only relevant if you're using weighted algorithms.
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
90
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
91 Cutoff3: This value lets you ignore the fitness scores of any insertion locations with an average count (the number of counts from t1 and t2 divided by 2) less than it.
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
92
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
93 Blanks: This value lets you exclude a % of blank fitness scores (scores with a fitness of 0) from your calculations. It should be entered as a float (e.g. 0.10 would be 10%) if entered by hand, or you can use the blank % calculated from the normalization genes by calc_fit by entering its txt output file
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
94
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
95 The name of your output file: self-explanatory. Remember to have it end in ".csv".
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
96
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
97 **Additional notes**
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
98
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
99 The output file should have each line (besides the header) represent the following information for a particular gene: locus,mean,var,sd,se,gene,Total,Blank,Not Blank,Blank Removed,M
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
100
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
101 </help>
ceb1ee961db1 Uploaded
kaymccoy
parents:
diff changeset
102 </tool>