Mercurial > repos > artbio > cpm_tpm_rpk
view cpm_tpm_rpk.xml @ 6:d2575928b824 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/cpm_tpm_rpk commit 6b0877a65bb8141d028dbff2fcf73b223d864635
author | artbio |
---|---|
date | Sat, 14 Oct 2023 16:24:48 +0000 |
parents | bcff1eb6fdb5 |
children | f00c1c34565e |
line wrap: on
line source
<tool id="cpm_tpm_rpk" name="Generate CPM, TPM, RPK" version="0.5.2"> <description>from raw counts expression values</description> <requirements> <requirement type="package" version="1.7.1">r-optparse</requirement> <requirement type="package" version="0.16">r-rtsne</requirement> <requirement type="package" version="3.3.6">r-ggplot2</requirement> <requirement type="package" version="0.4.14">r-ggfortify</requirement> <requirement type="package" version="1.4.4">r-reshape2</requirement> </requirements> <stdio> <exit_code range="1:" level="fatal" description="Tool exception" /> </stdio> <command detect_errors="exit_code"><![CDATA[ Rscript $__tool_directory__/cpm_tpm_rpk.R -d $input -t $option.type_transfo -s $input_sep -c $input_header #if $option.type_transfo == "tpm": -f $option.gene_file --gene_sep $option.gene_sep --gene_header $option.gene_header #end if #if $option.type_transfo == "rpk": -f $option.gene_file --gene_sep $option.gene_sep --gene_header $option.gene_header #end if -l $log #if $log == "FALSE": -o ${output} #else if $log == "TRUE": -o ${output_log} #end if #if $visu_option.visualisation == "yes": --visu 'TRUE' #if $visu_option.tsne_labels == "yes": --tsne_labels 'TRUE' #else --tsne_labels 'FALSE' #end if --seed '$visu_option.seed' --perp '$visu_option.perp' --theta '$visu_option.theta' --tsne_out '$tsne_out' --pca_out '$pca_out' #end if ]]></command> <inputs> <param name="input" type="data" format="txt" label="Raw counts of expression data"/> <param name="input_sep" type="select" label="Input column separator"> <option value="tab" selected="true">Tabs</option> <option value=",">Comma</option> <option value="\ ">Whitespace</option> <option value=".">Dots</option> <option value="_">Underscores</option> <option value="-">Dashes</option> </param> <param name="input_header" type="select" label="Consider first line of input file as header?"> <option value="TRUE" selected="true">Yes</option> <option value="FALSE">No</option> </param> <conditional name="option"> <param name="type_transfo" type="select" label="Type of transformation"> <option value="cpm" selected="true">CPM</option> <option value="tpm">TPM</option> <option value="rpk">RPK</option> <option value="none">NONE</option> </param> <when value="tpm"> <param name="gene_file" type="data" format="txt" label="Gene length file"/> <param name="gene_sep" type="select" label="Gene length column separator"> <option value="tab" selected="true">Tabs</option> <option value=",">Commas</option> <option value="\ ">Whitespaces</option> <option value=".">Dots</option> <option value="_">Underscores</option> <option value="-">Dashes</option> </param> <param name="gene_header" type="select" label="Consider first line of gene length file as header ?"> <option value="TRUE" selected="true">Yes</option> <option value="FALSE">No</option> </param> </when> <when value="rpk"> <param name="gene_file" type="data" format="txt" label="Gene length file"/> <param name="gene_sep" type="select" label="Gene length column separator"> <option value="tab" selected="true">Tabs</option> <option value=",">Commas</option> <option value="\ ">Whitespaces</option> <option value=".">Dots</option> <option value="_">Underscores</option> <option value="-">Dashes</option> </param> <param name="gene_header" type="select" label="Consider first line of gene length file as header ?"> <option value="TRUE" selected="true">Yes</option> <option value="FALSE">No</option> </param> </when> <when value="cpm"> </when> <when value="none"> </when> </conditional> <param name="log" type="select" label="Data should be log transformed ?"> <option value="FALSE" selected="true">No</option> <option value="TRUE">Yes</option> </param> <conditional name="visu_option"> <param name="visualisation" type="select" label="Visualisation of data with various dimensionality reduction methods" > <option value="no" selected="true">No visualisation</option> <option value="yes" >t-SNE and PCA</option> </param> <when value="yes"> <param name="seed" value="49.0" type="float" label="Seed value for reproducibility of t-SNE" help="Set to 49 as default" /> <param name="perp" value="10.0" type="float" label="perplexity (t-SNE)" help="should be less than ((nbre observations)-1)/3" /> <param name="theta" value="1.0" type="float" label="theta (t-SNE)"/> <param name="tsne_labels" type="select" label="Add labels to points plots" > <option value="no" selected="true">No Labels</option> <option value="yes" >Label points</option> </param> </when> <when value="no" /> </conditional> </inputs> <outputs> <data name="output" format="tabular" label="${option.type_transfo} from ${on_string}"> <filter>log == "FALSE"</filter> </data> <data name="output_log" format="tabular" label="log2(${option.type_transfo} +1) from ${on_string}"> <filter>log == "TRUE"</filter> </data> <data name="tsne_out" format="pdf" label="tsne from ${on_string}"> <filter>visu_option['visualisation'] == 'yes'</filter> </data> <data name="pca_out" format="pdf" label="PCA from ${on_string}"> <filter>visu_option['visualisation'] == 'yes'</filter> </data> </outputs> <tests> <!-- test t-SNE --> <test expect_num_outputs="3"> <param name="input" value="none.tab" ftype="tabular"/> <param name="type_transfo" value="none"/> <param name="log" value="FALSE"/> <param name="visualisation" value="yes"/> <param name="seed" value="49"/> <param name="perp" value="10"/> <param name="theta" value="1" /> <param name="tsne_labels" value="yes" /> <output name="output" file="none.tab" ftype="tabular" /> <output name="tsne_out" file="none_tsne.pdf" ftype="pdf" compare="sim_size" delta="1000" /> <output name="pca_out" file="none_pca.pdf" ftype="pdf" /> </test> <test expect_num_outputs="3"> <param name="input" value="counts.tab" ftype="tabular"/> <param name="type_transfo" value="cpm"/> <param name="log" value="TRUE"/> <param name="visualisation" value="yes"/> <param name="seed" value="49"/> <param name="perp" value="2"/> <param name="theta" value="1" /> <param name="tsne_labels" value="yes" /> <output name="output" file="logcpm.tab" ftype="tabular" /> <output name="tsne_out" file="tsne.pdf" ftype="pdf" compare="sim_size" delta="1000" /> <output name="pca_out" file="pca.pdf" ftype="pdf" /> </test> <test expect_num_outputs="3"> <param name="input" value="counts.tab" ftype="tabular"/> <param name="type_transfo" value="tpm"/> <param name="gene_file" value="gene_length.tab" ftype="tabular"/> <param name="log" value="TRUE"/> <param name="visualisation" value="yes"/> <param name="seed" value="49"/> <param name="perp" value="2"/> <param name="theta" value="1" /> <param name="tsne_labels" value="no" /> <output name="output" file="logtpm.tab" ftype="tabular" /> <output name="tsne_out" file="tsne.nolab.pdf" ftype="pdf" compare="sim_size" delta="1000" /> <output name="pca_out" file="pca.nolab.pdf" ftype="pdf" /> </test> <!-- test without t-SNE --> <test expect_num_outputs="1"> <param name="input" value="counts.tab" ftype="tabular"/> <param name="type_transfo" value="cpm"/> <output name="output" file="cpm.tab" ftype="tabular"/> </test> <test expect_num_outputs="1"> <param name="input" value="counts.tab" ftype="tabular"/> <param name="type_transfo" value="cpm"/> <param name="log" value="TRUE"/> <output name="output" file="logcpm.tab" ftype="tabular" /> </test> <test expect_num_outputs="1"> <param name="input" value="counts.tab" ftype="tabular"/> <param name="type_transfo" value="tpm"/> <param name="gene_file" value="gene_length.tab" ftype="tabular"/> <param name="gene_header" value="TRUE"/> <output name="output" file="tpm.tab" ftype="tabular"/> </test> <test expect_num_outputs="1"> <param name="input" value="counts.tab" ftype="tabular"/> <param name="type_transfo" value="tpm"/> <param name="gene_file" value="gene_length.tab" ftype="tabular"/> <param name="gene_header" value="TRUE"/> <param name="log" value="TRUE"/> <output name="output" file="logtpm.tab" ftype="tabular" /> </test> <test expect_num_outputs="1"> <param name="input" value="counts.tab" ftype="tabular"/> <param name="type_transfo" value="rpk"/> <param name="gene_file" value="gene_length.tab" ftype="tabular"/> <param name="gene_header" value="TRUE"/> <output name="output" file="rpk.tab" ftype="tabular" /> </test> <test expect_num_outputs="1"> <param name="input" value="counts.tab" ftype="tabular"/> <param name="type_transfo" value="rpk"/> <param name="gene_file" value="gene_length.tab" ftype="tabular"/> <param name="gene_header" value="TRUE"/> <param name="log" value="TRUE"/> <output name="output" file="logrpk.tab" ftype="tabular" /> </test> </tests> <help> **What it does** Takes a raw count expression matrix and returns a table of normalized expression values. Normalization can be: - CPM (Counts Per Million) are obtained by dividing counts by the library counts sum and multiplying the results by a million. - RPK (Reads Per Kilobases) are obtained by dividing read counts by gene lengths (expressed in kilo-nucleotides). - TPM (Transcripts Per Million) are obtained by dividing RPK values by the sum of all RPK values in a sample and multiplying the results by 1 million. RPK and TPM require a two-column correspondance table gene_name - gene length where the length is specified in nucleotide. Both these metrics are relevant only for sequencing of full length RNAs. Note: do not comment you header line if you have header line in you tabular input (ie do not start the header line with a '#' character) Computed values may be log-transformed (log2([CPM or RPK or TPM]+1)) </help> <citations> <citation type="bibtex">@Article{, title = {Visualizing High-Dimensional Data Using t-SNE}, volume = {9}, pages = {2579-2605}, year = {2008}, author = {L.J.P. {van der Maaten} and G.E. Hinton}, journal = {Journal of Machine Learning Research}, } </citation> <citation type="bibtex">@Manual{, title = {{ClusterR}: Gaussian Mixture Models, K-Means, Mini-Batch-Kmeans, K-Medoids and Affinity Propagation Clustering}, author = {Lampros Mouselimis}, year = {2019}, note = {R package version 1.1.9}, url = {https://github.com/mlampros/ClusterR}, } </citation> </citations> </tool>