Mercurial > repos > artbio > gsc_cpm_tpm_rpk
comparison cpm_tpm_rpk.xml @ 0:ce3d027ec26b draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/gsc_cpm_tpm_rpk commit 09dcd74dbc01f448518cf3db3e646afb0675a6fe
author | artbio |
---|---|
date | Mon, 24 Jun 2019 13:37:16 -0400 |
parents | |
children | 46507a10106c |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:ce3d027ec26b |
---|---|
1 <tool id="cpm_tpm_rpk" name="Generate CPM, TPM, RPK" version="0.9.0"> | |
2 <description>from raw counts expression values</description> | |
3 <requirements> | |
4 <requirement type="package" version="1.6.0">r-optparse</requirement> | |
5 </requirements> | |
6 <stdio> | |
7 <exit_code range="1:" level="fatal" description="Tool exception" /> | |
8 </stdio> | |
9 <command detect_errors="exit_code"><![CDATA[ | |
10 Rscript $__tool_directory__/cpm_tpm_rpk.R | |
11 --data '$input' | |
12 --type '$option.type_transfo' | |
13 --sep '$input_sep' | |
14 --colnames '$input_header' | |
15 #if $option.type_transfo == "tpm": | |
16 -f '$option.gene_file' | |
17 --gene_sep $option.gene_sep | |
18 --gene_header $option.gene_header | |
19 #end if | |
20 #if $option.type_transfo == "rpk": | |
21 -f '$option.gene_file' | |
22 --gene_sep '$option.gene_sep' | |
23 --gene_header '$option.gene_header' | |
24 #end if | |
25 --log '$log' | |
26 #if $log == "FALSE": | |
27 -o ${output} | |
28 #else if $log == "TRUE": | |
29 -o ${output_log} | |
30 #end if | |
31 ]]></command> | |
32 <inputs> | |
33 <param name="input" type="data" format="txt,tabular" label="Raw counts of expression data"/> | |
34 <param name="input_sep" type="select" label="Input column separator"> | |
35 <option value="tab" selected="true">Tabulation</option> | |
36 <option value=",">Comma</option> | |
37 </param> | |
38 <param name="input_header" type="select" label="Consider first line of input file as header?"> | |
39 <option value="TRUE" selected="true">Yes</option> | |
40 <option value="FALSE">No</option> | |
41 </param> | |
42 <conditional name="option"> | |
43 <param name="type_transfo" type="select" label="Type of transformation"> | |
44 <option value="cpm" selected="true">CPM</option> | |
45 <option value="tpm">TPM</option> | |
46 <option value="rpk">RPK</option> | |
47 </param> | |
48 <when value="tpm"> | |
49 <param name="gene_file" type="data" format="txt,tabular" label="Gene length file"/> | |
50 <param name="gene_sep" type="select" label="Gene length column separator"> | |
51 <option value="tab" selected="true">Tabulation</option> | |
52 <option value=",">Commas</option> | |
53 </param> | |
54 <param name="gene_header" type="select" label="Consider first line of gene length file as header ?"> | |
55 <option value="TRUE" selected="true">Yes</option> | |
56 <option value="FALSE">No</option> | |
57 </param> | |
58 </when> | |
59 <when value="rpk"> | |
60 <param name="gene_file" type="data" format="txt,tabular" label="Gene length file"/> | |
61 <param name="gene_sep" type="select" label="Gene length column separator"> | |
62 <option value="tab" selected="true">Tabs</option> | |
63 <option value=",">Commas</option> | |
64 </param> | |
65 <param name="gene_header" type="select" label="Consider first line of gene length file as header ?"> | |
66 <option value="TRUE" selected="true">Yes</option> | |
67 <option value="FALSE">No</option> | |
68 </param> | |
69 </when> | |
70 <when value="cpm"> | |
71 </when> | |
72 </conditional> | |
73 <param name="log" type="select" label="Data should be log transformed ?"> | |
74 <option value="FALSE" selected="true">No</option> | |
75 <option value="TRUE">Yes</option> | |
76 </param> | |
77 </inputs> | |
78 <outputs> | |
79 <data name="output" format="tabular" label="${option.type_transfo} from ${on_string}"> | |
80 <filter>log == "FALSE"</filter> | |
81 </data> | |
82 <data name="output_log" format="tabular" label="log2(${option.type_transfo} +1) from ${on_string}"> | |
83 <filter>log == "TRUE"</filter> | |
84 </data> | |
85 </outputs> | |
86 <tests> | |
87 <test> | |
88 <param name="input" value="counts.tab" ftype="tabular"/> | |
89 <param name="type_transfo" value="cpm"/> | |
90 <param name="log" value="TRUE"/> | |
91 <output name="output" file="logcpm.tab" ftype="tabular"/> | |
92 </test> | |
93 <test> | |
94 <param name="input" value="counts.tab" ftype="tabular"/> | |
95 <param name="type_transfo" value="tpm"/> | |
96 <param name="gene_file" value="gene_length.tab" ftype="tabular"/> | |
97 <param name="log" value="TRUE"/> | |
98 <output name="output" file="logtpm.tab" ftype="tabular"/> | |
99 </test> | |
100 <!-- test without t-SNE --> | |
101 <test> | |
102 <param name="input" value="counts.tab" ftype="tabular"/> | |
103 <param name="type_transfo" value="cpm"/> | |
104 <output name="output" file="cpm.tab" ftype="tabular"/> | |
105 </test> | |
106 <test> | |
107 <param name="input" value="counts.tab" ftype="tabular"/> | |
108 <param name="type_transfo" value="tpm"/> | |
109 <param name="gene_file" value="gene_length.tab" ftype="tabular"/> | |
110 <param name="gene_header" value="TRUE"/> | |
111 <output name="output" file="tpm.tab" ftype="tabular"/> | |
112 </test> | |
113 <test> | |
114 <param name="input" value="counts.tab" ftype="tabular"/> | |
115 <param name="type_transfo" value="rpk"/> | |
116 <param name="gene_file" value="gene_length.tab" ftype="tabular"/> | |
117 <param name="gene_header" value="TRUE"/> | |
118 <output name="output" file="rpk.tab" ftype="tabular"/> | |
119 </test> | |
120 <test> | |
121 <param name="input" value="counts.tab" ftype="tabular"/> | |
122 <param name="type_transfo" value="rpk"/> | |
123 <param name="gene_file" value="gene_length.tab" ftype="tabular"/> | |
124 <param name="gene_header" value="TRUE"/> | |
125 <param name="log" value="TRUE"/> | |
126 <output name="output" file="logrpk.tab" ftype="tabular"/> | |
127 </test> | |
128 </tests> | |
129 <help> | |
130 | |
131 **What it does** | |
132 | |
133 Takes a raw count expression matrix and returns a table of normalized expression values. | |
134 | |
135 Normalization can be: | |
136 | |
137 - CPM (Counts Per Million) are obtained by dividing counts by the library counts sum and multiplying the results by a million. | |
138 - RPK (Reads Per Kilobases) are obtained by dividing read counts by gene lengths (expressed in kilo-nucleotides). | |
139 - TPM (Transcripts Per Million) are obtained by dividing RPK values by the sum of all RPK values in a sample and multiplying the results by 1 million. | |
140 | |
141 RPK and TPM require a two-column correspondance table gene_name - gene length where the length is specified in nucleotide. Both these metrics are relevant only for sequencing of full length RNAs. | |
142 | |
143 Note: First header row must NOT start with a '#' comment character | |
144 | |
145 Computed values may be base-2 log-transformed (log2([CPM or RPK or TPM]+1)) | |
146 | |
147 </help> | |
148 <citations> | |
149 <citation type="bibtex"> | |
150 @Manual{, | |
151 title = {R: A Language and Environment for Statistical Computing}, | |
152 author = {{R Core Team}}, | |
153 organization = {R Foundation for Statistical Computing}, | |
154 address = {Vienna, Austria}, | |
155 year = {2014}, | |
156 url = {http://www.R-project.org/}, | |
157 } | |
158 </citation> | |
159 </citations> | |
160 </tool> |