comparison cpm_tpm_rpk.xml @ 0:35d032c46a4e draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cpm_tpm_rpk commit cc0fd23c039cc4a39c5e4e320b50666b7d9b6f65
author artbio
date Wed, 25 Jul 2018 13:05:17 -0400
parents
children b74bab5157c4
comparison
equal deleted inserted replaced
-1:000000000000 0:35d032c46a4e
1 <tool id="cpm_tpm_rpk" name="Generate CPM, TPM, RPK" version="0.1.0">
2 <description>from raw counts expression values</description>
3 <requirements>
4 <requirement type="package" version="1.3.2=r3.3.2_0">r-optparse</requirement>
5 </requirements>
6 <stdio>
7 <exit_code range="1:" level="fatal" description="Tool exception" />
8 </stdio>
9 <command detect_errors="exit_code"><![CDATA[
10 Rscript $__tool_directory__/cpm_tpm_rpk.R
11 -d $input
12 -t $option.type_transfo
13 -s $input_sep
14 -c $input_header
15 #if $option.type_transfo == "tpm":
16 -f $option.gene_file
17 --gene_sep $option.gene_sep
18 --gene_header $option.gene_header
19 #end if
20 #if $option.type_transfo == "rpk":
21 -f $option.gene_file
22 --gene_sep $option.gene_sep
23 --gene_header $option.gene_header
24 #end if
25 -l $log
26 #if $log == "FALSE":
27 -o ${output}
28 #end if
29 #if $log == "TRUE":
30 -o ${output_log}
31 #end if
32 ]]></command>
33 <inputs>
34 <param name="input" type="data" format="txt" label="Raw counts of expression data"/>
35 <param name="input_sep" type="select" label="Input column separator">
36 <option value="tab" selected="true">Tabs</option>
37 <option value=",">Comma</option>
38 <option value="\ ">Whitespace</option>
39 <option value=".">Dots</option>
40 <option value="_">Underscores</option>
41 <option value="-">Dashes</option>
42 </param>
43 <param name="input_header" type="select" label="Consider first line of input file as header?">
44 <option value="TRUE" selected="true">Yes</option>
45 <option value="FALSE">No</option>
46 </param>
47 <conditional name="option">
48 <param name="type_transfo" type="select" label="Type of transformation">
49 <option value="cpm" selected="true">CPM</option>
50 <option value="tpm">TPM</option>
51 <option value="rpk">RPK</option>
52 </param>
53 <when value="tpm">
54 <param name="gene_file" type="data" format="txt" label="Gene length file"/>
55 <param name="gene_sep" type="select" label="Gene length column separator">
56 <option value="tab" selected="true">Tabs</option>
57 <option value=",">Commas</option>
58 <option value="\ ">Whitespaces</option>
59 <option value=".">Dots</option>
60 <option value="_">Underscores</option>
61 <option value="-">Dashes</option>
62 </param>
63 <param name="gene_header" type="select" label="Consider first line of gene length file as header ?">
64 <option value="TRUE" selected="true">Yes</option>
65 <option value="FALSE">No</option>
66 </param>
67 </when>
68 <when value="rpk">
69 <param name="gene_file" type="data" format="txt" label="Gene length file"/>
70 <param name="gene_sep" type="select" label="Gene length column separator">
71 <option value="tab" selected="true">Tabs</option>
72 <option value=",">Commas</option>
73 <option value="\ ">Whitespaces</option>
74 <option value=".">Dots</option>
75 <option value="_">Underscores</option>
76 <option value="-">Dashes</option>
77 </param>
78 <param name="gene_header" type="select" label="Consider first line of gene length file as header ?">
79 <option value="TRUE" selected="true">Yes</option>
80 <option value="FALSE">No</option>
81 </param>
82 </when>
83 <when value="cpm">
84 </when>
85 </conditional>
86 <param name="log" type="select" label="Data should be log transformed ?">
87 <option value="FALSE" selected="true">No</option>
88 <option value="TRUE">Yes</option>
89 </param>
90 </inputs>
91 <outputs>
92 <data name="output" format="tabular" label="${option.type_transfo} from ${on_string}">
93 <filter>log == "FALSE"</filter>
94 </data>
95 <data name="output_log" format="tabular" label="log2(${option.type_transfo} +1) from ${on_string}">
96 <filter>log == "TRUE"</filter>
97 </data>
98 </outputs>
99 <tests>
100 <test>
101 <param name="input" value="counts.tab" ftype="tabular"/>
102 <param name="type_transfo" value="cpm"/>
103 <output name="output" file="cpm.tab" ftype="tabular"/>
104 </test>
105 <test>
106 <param name="input" value="counts.tab" ftype="tabular"/>
107 <param name="type_transfo" value="cpm"/>
108 <param name="log" value="TRUE"/>
109 <output name="output" file="logcpm.tab" ftype="tabular"/>
110 </test>
111 <test>
112 <param name="input" value="counts.tab" ftype="tabular"/>
113 <param name="type_transfo" value="tpm"/>
114 <param name="gene_file" value="gene_length.tab" ftype="tabular"/>
115 <param name="gene_header" value="TRUE"/>
116 <output name="output" file="tpm.tab" ftype="tabular"/>
117 </test>
118 <test>
119 <param name="input" value="counts.tab" ftype="tabular"/>
120 <param name="type_transfo" value="tpm"/>
121 <param name="gene_file" value="gene_length.tab" ftype="tabular"/>
122 <param name="gene_header" value="TRUE"/>
123 <param name="log" value="TRUE"/>
124 <output name="output" file="logtpm.tab" ftype="tabular"/>
125 </test>
126 <test>
127 <param name="input" value="counts.tab" ftype="tabular"/>
128 <param name="type_transfo" value="rpk"/>
129 <param name="gene_file" value="gene_length.tab" ftype="tabular"/>
130 <param name="gene_header" value="TRUE"/>
131 <output name="output" file="rpk.tab" ftype="tabular"/>
132 </test>
133 <test>
134 <param name="input" value="counts.tab" ftype="tabular"/>
135 <param name="type_transfo" value="rpk"/>
136 <param name="gene_file" value="gene_length.tab" ftype="tabular"/>
137 <param name="gene_header" value="TRUE"/>
138 <param name="log" value="TRUE"/>
139 <output name="output" file="logrpk.tab" ftype="tabular"/>
140 </test>
141 </tests>
142 <help>
143
144 **What it does**
145
146 And returns a table of normalized expression values.
147
148 Normalization can be:
149
150 - CPM (Counts Per Million) are obtained by dividing counts by the library counts sum and multiplying the results by a million.
151 - RPK (Reads Per Kilobases) are obtained by dividing read counts by gene lengths (expressed in kilo-nucleotides).
152 - TPM (Transcripts Per Million) are obtained by dividing RPK values by the sum of all RPK values in a sample and multiplying the results by 1 million.
153
154 RPK and TPM require a two-column correspondance table gene_name - gene length where the length is specified in nucleotide. Both these metrics are relevant only for sequencing of full length RNAs.
155
156 Note: do not comment you header line if you have header line in you tabular input (ie do not start the header line with a '#' character)
157
158 Computed values may be log-transformed (log2([CPM or RPK or TPM]+1))
159
160 </help>
161 </tool>