comparison retention_time_flags.xml @ 1:2e7d47c0b027 draft

"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
author malex
date Mon, 08 Mar 2021 22:04:06 +0000
parents
children
comparison
equal deleted inserted replaced
0:b54326490b4d 1:2e7d47c0b027
1 <tool id="secimtools_retention_time_flags" name="Retention Time (RT) Flags" version="@WRAPPER_VERSION@">
2 <description>- Flag features with discrepancies in retention time.</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements" />
7 <stdio>
8 <exit_code range="1:" level="warning" description="RuntimeWarning"/>
9 </stdio>
10 <command detect_errors="exit_code"><![CDATA[
11 retention_time_flags.py
12 --input $input
13 --design $design
14 --ID $uniqID
15 --figure $RTplot
16 --flag $RTflag
17 --minutes $minutes
18 #if $CVcutoff:
19 --CVcutoff $CVcutoff
20 #end if
21 #if $pctl
22 --pctl
23 #end if
24 ]]></command>
25 <inputs>
26 <param name="input" type="data" format="tabular" label="Wide Dataset" help="Input your tab-separated wide format dataset. If file is not tab separated see TIP below."/>
27 <param name="design" type="data" format="tabular" label="Design File" help="Input your design file(tab-separated). Note you need a 'sampleID' column. If not tab separated see TIP below."/>
28 <param name="uniqID" type="text" size="30" value="" label="Unique Feature ID" help="Name of the column in your Wide Dataset that has unique identifiers."/>
29 <param name="CVcutoff" optional="true" type="float" value="0.1" size="4" label="Coefficient of Variation (CV) Cutoff" help="Coefficient of variation (CV) cutoff (in decimals) that specifies the proportion of features to flag. Default CV cutoff = 0.1, which implies that 10% of the features with the largest CVs will be flagged." />
30 <param name="minutes" type="float" size="4" value="0.2" label="Retention Time Cutoff Value" help="If the difference in the retention time between the 95th and 5th percentiles (or 90th and 10th) is greater than this specified RT Cutoff value, features are flagged. (A default value of 0.2 assumes data units are in minutes)" />
31 <param name="pctl" type="boolean" size="6" label="90th and 10th percentiles [Optional]" help="See RT Cutoff Value above. Check this box to use the 90th and 10th percentiles instead of the default 95th and 5th percentiles."/>
32 </inputs>
33 <outputs>
34 <data format="pdf" name="RTplot" label="${tool.name} on ${on_string}: plot" />
35 <data format="tabular" name="RTflag" label="${tool.name} on ${on_string}: flag" />
36 </outputs>
37 <tests>
38 <test>
39 <param name="input" value="TEST0000_rt.tsv"/>
40 <param name="design" value="TEST0000_design.tsv"/>
41 <param name="uniqID" value="rowID" />
42 <output name="RTplot" file="TEST0000_retention_time_flags_figure.pdf" compare="sim_size" delta="10000"/>
43 <output name="RTflag" file="TEST0000_retention_time_flags_flag.tsv" />
44 </test>
45 </tests>
46 <help><![CDATA[
47
48 @TIP_AND_WARNING@
49
50 **Tool Description**
51
52 ***NOTE:*** This tool is primarily intended for flagging features with variation in retention times in mass spectrometry data analysis.
53 The goal of the tool is to identify potential problems with the instrument or with data processing and pre-processing.
54
55 The retention time for a given feature is predicted to be relatively consistent across samples. This tool identifies potential abnormalities or shifts in the retention time for a feature.
56
57 --------------------------------------------------------------------------------
58
59 **Input**
60
61 - Two input datasets are required.
62
63 @WIDE@
64
65 **NOTE:** The sample IDs must match the sample IDs in the Design File (below).
66 Extra columns will automatically be ignored.
67
68 @METADATA@
69
70 @UNIQID@
71
72 **Coefficient of Variation (CV) Cutoff**
73
74 - The coefficient of variation (CV) cutoff (in decimals) specifies the proportion of features to flag. Default CV cutoff = 0.1, implying that 10% of the features with the largest CV will be flagged.
75
76
77 **Retention Time Cutoff Value**
78
79 - A user specified value (Default value = 0.2 assumes the data units are in minutes) used with the percentile button below. Features where the difference in the RT between the 95th and 5th percentiles is greater than the given Retention Time Cutoff Value are flagged.
80
81
82 **90th percentile [Optional]**
83
84 - See Retention Time Cutoff Value above. Check this box to use a 90th percentile. The default is a 95th percentile.
85
86 --------------------------------------------------------------------------------
87
88 **Output**
89
90 The tool outputs two files:
91
92 (1) a TSV file with flags for each feature, where the results from each flagging method are saved in a separate column
93
94 - flag_RT_Q95Q05_outlier: 0/1 flag where the value “1” is for features where the difference in the retention time between the 95th and 5th percentile (or 90th and 10th percentiles) is greater than the user specified Retention Time Cutoff Value (default is 0.2 minutes).
95
96 - flag_RT_max_gt_threshold: 0/1 flag where the value “1” is for features where the difference between the retention time maximum and median is greater than the Retention Time Cutoff Value divided by 2.
97
98 - flag_RT_min_lt_threshold: 0/1 flag where the value “1” is for features where the difference between the retention time minimum and median is greater than the Retention Time Cutoff Value divided by 2.
99
100 - flag_RT_min_max_outlier: 0/1 flag where the value “1” is for features where the difference between the retention time minimum and maximum is greater than 3 times the standard deviation from the mean.
101
102 - flag_RT_big_CV: 0/1 flag where the value “1” is for features where the coefficient of variation (CV) in retention time is greater than the CV Cutoff. The default value is 0.1 which corresponds to flagging the 10% of the features with the largest CV.
103
104 (2) and a PDF file containing a density plot of the coefficients of variation (CV) for the retention time. The vertical red dotted line shows the CV cutoff for the top XX% of the data as specified by the CV cutoff values.
105
106
107
108 ]]></help>
109 <expand macro="citations"/>
110 </tool>