comparison qiime2/qiime_vsearch_cluster-features-open-reference.xml @ 0:51b9b6b57732 draft

Uploaded
author florianbegusch
date Thu, 24 May 2018 05:21:07 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:51b9b6b57732
1 <?xml version="1.0" ?>
2 <tool id="qiime_vsearch_cluster-features-open-reference" name="qiime vsearch cluster-features-open-reference" version="2018.4">
3 <description> - Open-reference clustering of features.</description>
4 <requirements>
5 <requirement type="package" version="2018.4">qiime2</requirement>
6 </requirements>
7 <command>
8 <![CDATA[
9 qiime vsearch cluster-features-open-reference --p-perc-identity="$ppercidentity" --i-table=$itable --i-sequences=$isequences --i-reference-sequences=$ireferencesequences
10
11 #if str($cmdconfig) != 'None':
12 --cmd-config=$cmdconfig
13 #end if
14 --o-clustered-table=oclusteredtable
15 #if str($pstrand) != 'None':
16 --p-strand=$pstrand
17 #end if
18 --o-clustered-sequences=oclusteredsequences --o-new-reference-sequences=onewreferencesequences
19 #set $pthreads = '${GALAXY_SLOTS:-4}'
20
21 #if str($pthreads):
22 --p-threads="$pthreads"
23 #end if
24
25 ;
26 cp oclusteredtable.qza $oclusteredtable;
27 cp oclusteredsequences.qza $oclusteredsequences;
28 cp onewreferencesequences.qza $onewreferencesequences
29 ]]>
30 </command>
31 <inputs>
32 <param format="qza,no_unzip.zip" label="--i-sequences: FeatureData[Sequence] The sequences corresponding to the features in table. [required]" name="isequences" optional="False" type="data"/>
33 <param format="qza,no_unzip.zip" label="--i-table: FeatureTable[Frequency] The feature table to be clustered. [required]" name="itable" optional="False" type="data"/>
34 <param format="qza,no_unzip.zip" label="--i-reference-sequences: FeatureData[Sequence] The sequences to use as cluster centroids. [required]" name="ireferencesequences" optional="False" type="data"/>
35
36 <param label="--p-perc-identity: The percent identity at which clustering should be performed. This parameter maps to vsearch's --id parameter. [required]" name="ppercidentity" optional="False" type="text"/>
37
38 <param label="--p-strand: Search plus (i.e., forward) or both (i.e.,
39 forward and reverse complement) strands.
40 [default: plus]" name="pstrand" optional="True" type="select">
41 <option selected="True" value="None">Selection is Optional</option>
42 <option value="both">both</option>
43 <option value="plus">plus</option>
44 </param>
45
46 <param label="--cmd-config: Use config file for command options" name="cmdconfig" optional="True" type="data"/>
47 </inputs>
48 <outputs>
49 <data format="qza" label="${tool.name} on ${on_string}: clustered-table.qza" name="oclusteredtable"/>
50 <data format="qza" label="${tool.name} on ${on_string}: clustered-sequences.qza" name="oclusteredsequences"/>
51 <data format="qza" label="${tool.name} on ${on_string}: new-reference-sequences.qza" name="onewreferencesequences"/>
52 </outputs>
53 <help>
54 <![CDATA[
55 Open-reference clustering of features.
56 ---------------------------------------
57
58 Given a feature table and the associated feature sequences, cluster the
59 features against a reference database based on user-specified percent
60 identity threshold of their sequences. Any sequences that don't match are
61 then clustered de novo. This is not a general-purpose clustering method,
62 but rather is intended to be used for clustering the results of quality-
63 filtering/dereplication methods, such as DADA2, or for re-clustering a
64 FeatureTable at a lower percent identity than it was originally clustered
65 at. When a group of features in the input table are clustered into a single
66 feature, the frequency of that single feature in a given sample is the sum
67 of the frequencies of the features that were clustered in that sample.
68 Feature identifiers will be inherited from the centroid feature of each
69 cluster. For features that match a reference sequence, the centroid feature
70 is that reference sequence, so its identifier will become the feature
71 identifier. The clustered_sequences result will contain feature
72 representative sequences that are derived from the sequences input for all
73 features in clustered_table. This will always be the most abundant sequence
74 in the cluster. The new_reference_sequences result will contain the entire
75 reference database, plus feature representative sequences for any de novo
76 features. This is intended to be used as a reference database in subsequent
77 iterations of cluster_features_open_reference, if applicable. See the
78 vsearch documentation for details on how sequence clustering is performed.
79
80 Parameters
81 ----------
82 sequences : FeatureData[Sequence]
83 The sequences corresponding to the features in table.
84 table : FeatureTable[Frequency]
85 The feature table to be clustered.
86 reference_sequences : FeatureData[Sequence]
87 The sequences to use as cluster centroids.
88 perc_identity : Float % Range(0, 1, inclusive_start=False, inclusive_end=True)
89 The percent identity at which clustering should be performed. This
90 parameter maps to vsearch's --id parameter.
91 strand : Str % Choices({'both', 'plus'}), optional
92 Search plus (i.e., forward) or both (i.e., forward and reverse
93 complement) strands.
94
95 Returns
96 -------
97 clustered_table : FeatureTable[Frequency]
98 The table following clustering of features.
99 clustered_sequences : FeatureData[Sequence]
100 Sequences representing clustered features.
101 new_reference_sequences : FeatureData[Sequence]
102 The new reference sequences. This can be used for subsequent runs of
103 open-reference clustering for consistent definitions of features across
104 open-reference feature tables.
105 ]]>
106 </help>
107 <macros>
108 <import>qiime_citation.xml</import>
109 </macros>
110 <expand macro="qiime_citation" />
111 </tool>