comparison qiime2/qiime_vsearch_cluster-features-open-reference.xml @ 29:3ba9833030c1 draft

Uploaded
author florianbegusch
date Fri, 04 Sep 2020 13:12:49 +0000
parents
children
comparison
equal deleted inserted replaced
28:c28331a63dfd 29:3ba9833030c1
1 <?xml version="1.0" ?>
2 <tool id="qiime_vsearch_cluster-features-open-reference" name="qiime vsearch cluster-features-open-reference"
3 version="2020.8">
4 <description> Open-reference clustering of features.</description>
5 <requirements>
6 <requirement type="package" version="2020.8">qiime2</requirement>
7 </requirements>
8 <command><![CDATA[
9 qiime vsearch cluster-features-open-reference
10
11 --i-sequences=$isequences
12
13 --i-table=$itable
14
15 --i-reference-sequences=$ireferencesequences
16
17 --p-perc-identity=$ppercidentity
18
19 #if str($pstrand) != 'None':
20 --p-strand=$pstrand
21 #end if
22
23 --p-threads=$pthreads
24
25 --o-clustered-table=oclusteredtable
26
27 --o-clustered-sequences=oclusteredsequences
28
29 --o-new-reference-sequences=onewreferencesequences
30
31 #if str($examples) != 'None':
32 --examples=$examples
33 #end if
34
35 ;
36 cp onewreferencesequences.qza $onewreferencesequences
37
38 ]]></command>
39 <inputs>
40 <param format="qza,no_unzip.zip" label="--i-sequences: ARTIFACT FeatureData[Sequence] The sequences corresponding to the features in table. [required]" name="isequences" optional="False" type="data" />
41 <param format="qza,no_unzip.zip" label="--i-table: ARTIFACT FeatureTable[Frequency] The feature table to be clustered. [required]" name="itable" optional="False" type="data" />
42 <param format="qza,no_unzip.zip" label="--i-reference-sequences: ARTIFACT FeatureData[Sequence] The sequences to use as cluster centroids. [required]" name="ireferencesequences" optional="False" type="data" />
43 <param label="--p-perc-identity: PROPORTION Range(0, 1, inclusive_start=False, inclusive_end=True) The percent identity at which clustering should be performed. This parameter maps to vsearch\'s --id parameter. [required]" name="ppercidentity" optional="False" type="text" />
44 <param label="--p-strand: " name="pstrand" optional="True" type="select">
45 <option selected="True" value="None">Selection is Optional</option>
46 <option value="plus">plus</option>
47 <option value="both">both</option>
48 </param>
49 <param label="--examples: Show usage examples and exit." name="examples" optional="False" type="data" />
50
51 </inputs>
52
53 <outputs>
54 <data format="qza" label="${tool.name} on ${on_string}: clusteredtable.qza" name="oclusteredtable" />
55 <data format="qza" label="${tool.name} on ${on_string}: clusteredsequences.qza" name="oclusteredsequences" />
56 <data format="qza" label="${tool.name} on ${on_string}: newreferencesequences.qza" name="onewreferencesequences" />
57
58 </outputs>
59
60 <help><![CDATA[
61 Open-reference clustering of features.
62 ###############################################################
63
64 Given a feature table and the associated feature sequences, cluster the
65 features against a reference database based on user-specified percent
66 identity threshold of their sequences. Any sequences that don't match are
67 then clustered de novo. This is not a general-purpose clustering method,
68 but rather is intended to be used for clustering the results of quality-
69 filtering/dereplication methods, such as DADA2, or for re-clustering a
70 FeatureTable at a lower percent identity than it was originally clustered
71 at. When a group of features in the input table are clustered into a single
72 feature, the frequency of that single feature in a given sample is the sum
73 of the frequencies of the features that were clustered in that sample.
74 Feature identifiers will be inherited from the centroid feature of each
75 cluster. For features that match a reference sequence, the centroid feature
76 is that reference sequence, so its identifier will become the feature
77 identifier. The clustered_sequences result will contain feature
78 representative sequences that are derived from the sequences input for all
79 features in clustered_table. This will always be the most abundant sequence
80 in the cluster. The new_reference_sequences result will contain the entire
81 reference database, plus feature representative sequences for any de novo
82 features. This is intended to be used as a reference database in subsequent
83 iterations of cluster_features_open_reference, if applicable. See the
84 vsearch documentation for details on how sequence clustering is performed.
85
86 Parameters
87 ----------
88 sequences : FeatureData[Sequence]
89 The sequences corresponding to the features in table.
90 table : FeatureTable[Frequency]
91 The feature table to be clustered.
92 reference_sequences : FeatureData[Sequence]
93 The sequences to use as cluster centroids.
94 perc_identity : Float % Range(0, 1, inclusive_start=False, inclusive_end=True)
95 The percent identity at which clustering should be performed. This
96 parameter maps to vsearch's --id parameter.
97 strand : Str % Choices('plus', 'both'), optional
98 Search plus (i.e., forward) or both (i.e., forward and reverse
99 complement) strands.
100 threads : Int % Range(0, 256, inclusive_end=True), optional
101 The number of threads to use for computation. Passing 0 will launch one
102 thread per CPU core.
103
104 Returns
105 -------
106 clustered_table : FeatureTable[Frequency]
107 The table following clustering of features.
108 clustered_sequences : FeatureData[Sequence]
109 Sequences representing clustered features.
110 new_reference_sequences : FeatureData[Sequence]
111 The new reference sequences. This can be used for subsequent runs of
112 open-reference clustering for consistent definitions of features across
113 open-reference feature tables.
114 ]]></help>
115 <macros>
116 <import>qiime_citation.xml</import>
117 </macros>
118 <expand macro="qiime_citation"/>
119 </tool>