annotate aggregate.xml @ 2:1c7228b9776a draft

Uploaded
author kaymccoy
date Thu, 11 Aug 2016 18:08:49 -0400
parents a66c287c1864
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
1 <tool id="aggregate" name="Aggregate">
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
2 <description>fitness calculations by gene</description>
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
3 <requirements>
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
4 <requirement type="package" version="1.64">biopython</requirement>
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
5 </requirements>
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
6 <command interpreter="python">
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
7 aggregate.py
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
8 #if $mark.certain == "yes":
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
9 -m $mark.genes
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
10 #end if
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
11 #if $weighted.algorithms == "yes":
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
12 -w 1
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
13 #end if
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
14 -x $cutoff
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
15 -l $weightceiling
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
16 #if $blank.count == "yes":
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
17 -b $blank.custom_blanks
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
18 #end if
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
19 #if $blank.count == "no":
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
20 -f $blank.txt_blanks
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
21 #end if
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
22 -c $ref
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
23 -o $output
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
24 $input
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
25 #for $a in $additionalcsv
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
26 ${a.input2}
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
27 #end for
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
28 </command>
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
29 <inputs>
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
30 <param name="input" type="data" label="csv fitness file"/>
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
31 <repeat name="additionalcsv" title="Additional csv fitness file(s)">
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
32 <param name="input2" type="data" label="Select" />
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
33 </repeat>
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
34 <param name="ref" type="data" label="GenBank reference genome"/>
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
35 <conditional name="mark">
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
36 <param name="certain" type="select" label="Mark certain genes?">
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
37 <option value="no">No</option>
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
38 <option value="yes">Yes</option>
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
39 </param>
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
40 <when value="no">
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
41 <!-- do nothing -->
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
42 </when>
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
43 <when value="yes">
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
44 <param name="genes" type="data" label="Genes to mark" />
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
45 </when>
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
46 </conditional>
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
47 <conditional name="weighted">
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
48 <param name="algorithms" type="select" label="Use weighted algorithms?">
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
49 <option value="no">No</option>
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
50 <option value="yes">Yes</option>
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
51 </param>
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
52 <when value="-w 1 "/>
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
53 <when value=""/>
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
54 </conditional>
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
55 <param name="weightceiling" type="float" value="50.0" label="Weight ceiling"/>
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
56 <param name="cutoff" type="float" value="10.0" label="Cutoff"/>
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
57 <conditional name="blank">
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
58 <param name="count" type="select" label="Enter custom blank count?">
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
59 <option value="no">No</option>
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
60 <option value="yes">Yes</option>
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
61 </param>
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
62 <when value="no">
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
63 <param name="txt_blanks" type="data" label="txt output from Calc_fit or Consol_fit"/>
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
64 </when>
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
65 <when value="yes">
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
66 <param name="custom_blanks" type="float" value="0.0" label="blank count (a number from 0.0 to 1.0)"/>
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
67 </when>
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
68 </conditional>
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
69 </inputs>
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
70 <outputs>
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
71 <data name="output" format="csv"/>
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
72 </outputs>
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
73 <help>
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
74
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
75 **What it does**
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
76
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
77 This tool calculates the aggregate fitness values of mutations by gene.
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
78
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
79 **The options explained**
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
80
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
81 The csv fitness file(s): These are the csv (comma separated values) files containing the fitness values you want to aggregate by gene. Since they should have been produced by the "Calculate Fitness" tool, each line besides the header should represent the following information for an insertion location: position,strand,count_1,count_2,ratio,mt_freq_t1,mt_freq_t2,pop_freq_t1,pop_freq_t2,gene,D,W,nW
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
82
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
83 GenBank reference genome: the reference genome of whatever model you're working with, which needs to be in standard genbank format. For more on that format see the genbank website.
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
84
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
85 Marking certain genes: If you chose to mark certain genes, those genes will have an "M" under the M column of the resulting aggregate file.
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
86
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
87 Using weighted algorithms: Recommended. If you chose to use weighted algorithms, scores will be weighted by the number of reads their insertion location has, as insertions with more reads tend to be more accurate.
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
88
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
89 Weight ceiling: This value lets you set a weight ceiling for the weights of fitness values. It's only relevant if you're using weighted algorithms.
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
90
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
91 Cutoff: This value lets you ignore the fitness scores of any insertion locations with an average count (the number of counts from t1 and t2 divided by 2) less than it.
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
92
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
93 Blanks: This value lets you exclude a % of blank fitness scores (scores with a fitness of 0) from your calculations. It should be entered as a float (e.g. 0.10 would be 10%) if entered by hand, or you can use the blank % calculated from the normalization genes by calc_fit by entering its txt output file
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
94
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
95 The name of your output file: self-explanatory. Remember to have it end in ".csv".
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
96
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
97 **Additional notes**
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
98
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
99 The output file should have each line (besides the header) represent the following information for a particular gene: locus,mean,var,sd,se,gene,Total,Blank,Not Blank,Blank Removed,M
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
100
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
101 </help>
a66c287c1864 Uploaded
kaymccoy
parents:
diff changeset
102 </tool>