Mercurial > repos > malex > gait_gm
diff split_wide_dataset.xml @ 1:ec9ee8edb84d draft
Initial upload of 21.6.10 release.
author | malex |
---|---|
date | Fri, 18 Jun 2021 20:23:19 +0000 |
parents | |
children | 2c218a253d56 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/split_wide_dataset.xml Fri Jun 18 20:23:19 2021 +0000 @@ -0,0 +1,205 @@ +<tool id="secimtools_split_input_wide_dataset" name="Create: Design, Wide, and Annotation datasets" version="@WRAPPER_VERSION@"> + <description>from an Input wide dataset</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <stdio> + <exit_code range="1" level="fatal" description="Repeated Unique IDs"/> + </stdio> + <command detect_errors="exit_code"><![CDATA[ + split_wide_dataset.py + -i=$input + #if $cond_UniqID.hasUniqID == "y": + -id=$cond_UniqID.uniqID + #if $cond_UniqID.only_numbers.hasOnlyNumbers == "y": + -p2=$cond_UniqID.only_numbers.prefix2 + #end if + #else: + -p=$cond_UniqID.prefix + #end if + -s=$samples + -w=$wide + -d=$design + -a=$annot + ]]></command> + <inputs> + <param name="input" type="data" format="tabular" label="Input Wide Dataset" help="Input tab separated Wide Dataset. If input is not tab separated, see TIP below."/> + <conditional name="cond_UniqID"> + <param name="hasUniqID" type="select" display="radio" label="Does your Wide Dataset have a unique FeatureID column?"> + <option value="y">Yes</option> + <option value="n">No</option> + </param> + <when value="y"> + <param name="uniqID" type="text" size="30" value="" label="Unique FeatureID" help="Name of the column in your Wide Dataset that contains the unique FeatureIDs."/> + <conditional name="only_numbers"> + <param name="hasOnlyNumbers" type="select" display="radio" label="Are your unique FeatureIDs ONLY Numbers?"> + <option value="y">Yes</option> + <option value="n">No</option> + </param> + <when value="y"> + <param name="prefix2" type="text" size="30" value="" label="Prefix to use during generation of unique IDs" help="This prefix will be prepended to your NUMERIC unique FeatureID, with an underbar in between."/> + </when> + </conditional> + </when> + <when value="n"> + <param name="prefix" type="text" size="30" value="" label="Prefix to use during generation of unique IDs" help="Unique IDs are required. You can input a prefix for the tool to use when creating a unique identifier (Optional). If you chose not to use a prefix, the tool-created uniqueID will be an underbar followed by a number."/> + </when> + </conditional> + <param name="samples" label="Sample Columns" type="text" help="Enter the numbers (1-based) of the columns in your Wide Dataset that contain sample data. E.g. if your sample data is in columns 2-4 then enter '2,3,4' (no spaces). Columns that are not selected are treated as descriptor annotation columns. NOTE: annotation columns are expected to ALL be left of the data columns. "/> + </inputs> + <outputs> + <data format="tabular" name="wide" label="${tool.name} on ${on_string}: Wide Dataset"/> + <data format="tabular" name="design" label="${tool.name} on ${on_string}: Design Dataset"/> + <data format="tabular" name="annot" label="${tool.name} on ${on_string}: Annotation Dataset"/> + </outputs> + <tests> + <test> + <param name="input" value="gene_input_dataset_01fhl.tsv"/> + <param name="prefix" value="Gene"/> + <param name="samples" value="2,3,4,5,6,7,8,9,10,11"/> + <output name="wide" file="gene_wide_dataset_01fhl.tsv"/> + <output name="design" file="gene_design_file_01fhl.tsv"/> + <output name="annot" file="gene_annot_file_01fhl.tsv"/> + </test> + <test> + <param name="input" value="metabolite_input_dataset_01fhl.tsv"/> + <param name="prefix" value="Met"/> + <param name="samples" value="2,3,4,5,6,7,8,9,10,11"/> + <output name="wide" file="metabolite_wide_dataset_01fhl.tsv"/> + <output name="design" file="met_design_file_01fhl.tsv"/> + <output name="annot" file="met_annot_file_01fhl.tsv"/> + </test> + </tests> + <help><![CDATA[ + +**Tool Description** + + This tool can be used to perform two tasks 1) convert a single file that contains + data and annotation in wide format to two files in wide format, one with data and + one with annotation 2) create a design file template that will be compatible with + the wide data and annotation files. The tool will automatically check for a column + containing unique feature identifiers (FeatureIDs). If no unique FeatureID is located + the tool will generate one. The user can specify a prefix for the unique FeatureID + (e.g. 'met' for metabolite data). The Design Dataset is a template with an exact + match to the columns with a single column called 'SampleID' that contains the names + of the samples in the input Wide Dataset. This Design Dataset can be modified by + the user to include metadata columns. The tool also creates a separate Annotation + Dataset containing the unique FeatureIDs (user-specified or generated by the tool) + and any non-sample descriptor columns that were present in the input wide dataset + (such as m/z ratio, retention time, compound name, etc.). Finally, the tool creates + a 'clean' Wide Dataset containing only samples in columns and features in rows. + +-------------------------------------------------------------------------------- + +**INPUT** + +**Example - Wide Format Input Dataset** + + +---------+-----------+---------+---------+-----+ + | rowID | m/z ratio | sample1 | sample2 | ... | + +=========+===========+=========+=========+=====+ + | 1 | 8.845 | 20 | 10 | ... | + +---------+-----------+---------+---------+-----+ + | 2 | 0.258 | 22 | 30 | ... | + +---------+-----------+---------+---------+-----+ + | 3 | 10.54 | 27 | 2 | ... | + +---------+-----------+---------+---------+-----+ + | 4 | 8.594 | 17 | 8 | ... | + +---------+-----------+---------+---------+-----+ + | ... | ... | ... | ... | ... | + +---------+-----------+---------+---------+-----+ + + **NOTE:** The input dataset has features in rows and samples in columns. Any descriptor columns that are present will be used to populate the Annotation File. + +**Unique FeatureID** + + If the Input Dataset has a column with unique FeatureIDs, the user can specify the name of this column. If the Input Dataset does not have a column with unique FeatureIDs, the tool will create a numeric one. + +**Prefix** + + The user can add a prefix to the tool-generated unique FeatureID, if desired. Example: If met is input then the unique FeatureID column will consist of met\_ followed by a number. + +**Sample Columns** + + Name of the columns in the Input Dataset that contain sample information. All columns not specified as samples will be used to populate the Annotation File. + +-------------------------------------------------------------------------------- + +**OUTPUT** + + +**A Wide Dataset containing the FeatureID column and all columns selected as samples** + + +------------+---------+---------+---------+-----+ + | FeatureID | sample1 | sample2 | sample3 | ... | + +============+=========+=========+=========+=====+ + | met_1 | 10 | 20 | 10 | ... | + +------------+---------+---------+---------+-----+ + | met_2 | 5 | 22 | 30 | ... | + +------------+---------+---------+---------+-----+ + | met_3 | 30 | 27 | 2 | ... | + +------------+---------+---------+---------+-----+ + | met_4 | 32 | 17 | 8 | ... | + +------------+---------+---------+---------+-----+ + | ... | ... | ... | ... | ... | + +------------+---------+---------+---------+-----+ + + In the above example, *met* was input for Prefix + + +**A Design Dataset template containing a column called sampleID with the column headers from the input dataset that were chosen as samples** + + +----------+---------+ + | SampleID | | + +==========+=========+ + | sample1 | | + +----------+---------+ + | sample2 | | + +----------+---------+ + | sample3 | | + +----------+---------+ + | sample4 | | + +----------+---------+ + | ... | | + +----------+---------+ + + +**An Annotation Dataset containing the unique FeatureID column and any non-sample descriptor columns** + + +-------------+------------+-----+ + | FeatureID | m/z ratio | ... | + +=============+============+=====+ + | FeatureID_1 | 8.845 | ... | + +-------------+------------+-----+ + | FeatureID_2 | 0.258 | ... | + +-------------+------------+-----+ + | FeatureID_3 | 10.54 | ... | + +-------------+------------+-----+ + | FeatureID_4 | 8.594 | ... | + +-------------+------------+-----+ + | ... | ... | ... | + +-------------+------------+-----+ + + + ]]> + </help> + <citations> + <citation type="bibtex">@ARTICLE{Kirpich17secimtools, + author = {Alexander S. Kirpich, Miguel Ibarra, Oleksandr Moskalenko, Justin M. Fear, Joseph Gerken, Xinlei Mi, Ali Ashrafi, Alison M. Morse, Lauren M. McIntyre}, + title = {SECIMTools: A suite of Metabolomics Data Analysis Tools}, + journal = {BMC Bioinformatics}, + year = {in press} + }</citation> + <citation type="bibtex">@article{garcia2010paintomics, + title={Paintomics: a web based tool for the joint visualization of transcriptomics and metabolomics data}, + author={Garc{\'\i}a-Alcalde, Fernando and Garc{\'\i}a-L{\'o}pez, Federico and Dopazo, Joaqu{\'\i}n and Conesa, Ana}, + journal={Bioinformatics}, + volume={27}, + number={1}, + pages={137--139}, + year={2010}, + publisher={Oxford University Press} + }</citation> + </citations> +</tool>