comparison qiime2/qiime_vsearch_cluster-features-open-reference.xml @ 0:370e0b6e9826 draft

Uploaded
author florianbegusch
date Wed, 17 Jul 2019 03:05:17 -0400
parents
children f190567fe3f6
comparison
equal deleted inserted replaced
-1:000000000000 0:370e0b6e9826
1 <?xml version="1.0" ?>
2 <tool id="qiime_vsearch_cluster-features-open-reference" name="qiime vsearch cluster-features-open-reference" version="2019.4">
3 <description> - Open-reference clustering of features.</description>
4 <requirements>
5 <requirement type="package" version="2019.4">qiime2</requirement>
6 </requirements>
7 <command><![CDATA[
8 qiime vsearch cluster-features-open-reference
9
10 --i-sequences=$isequences
11 --i-table=$itable
12 --i-reference-sequences=$ireferencesequences
13 --p-perc-identity="$ppercidentity"
14
15 #if str($pstrand) != 'None':
16 --p-strand=$pstrand
17 #end if
18
19 #set $pthreads = '${GALAXY_SLOTS:-4}'
20 #if str($pthreads):
21 --p-threads="$pthreads"
22 #end if
23
24 --o-clustered-table=oclusteredtable
25 --o-clustered-sequences=oclusteredsequences
26 --o-new-reference-sequences=onewreferencesequences
27 ;
28 cp oclusteredtable.qza $oclusteredtable;
29 cp oclusteredsequences.qza $oclusteredsequences;
30 cp onewreferencesequences.qza $onewreferencesequences
31 ]]></command>
32 <inputs>
33 <param format="qza,no_unzip.zip" label="--i-sequences: ARTIFACT FeatureData[Sequence] The sequences corresponding to the features in table. [required]" name="isequences" optional="False" type="data"/>
34 <param format="qza,no_unzip.zip" label="--i-table: ARTIFACT FeatureTable[Frequency] The feature table to be clustered. [required]" name="itable" optional="False" type="data"/>
35 <param format="qza,no_unzip.zip" label="--i-reference-sequences: ARTIFACT FeatureData[Sequence] The sequences to use as cluster centroids. [required]" name="ireferencesequences" optional="False" type="data"/>
36
37 <param label="--p-perc-identity: PROPORTION Range(0, 1, inclusive_start=False, inclusive_end=True) The percent identity at which clustering should be performed. This parameter maps to vsearch's --id parameter. [required]" name="ppercidentity" optional="False" min="0" max="1" exclude_min="True" exclude_max="False" value="" type="float"/>
38 <param label="--p-strand: " name="pstrand" optional="True" type="select">
39 <option selected="True" value="None">Selection is Optional</option>
40 <option value="plus">plus</option>
41 <option value="both">both</option>
42 </param>
43 </inputs>
44 <outputs>
45 <data format="qza" label="${tool.name} on ${on_string}: clusteredtable.qza" name="oclusteredtable"/>
46 <data format="qza" label="${tool.name} on ${on_string}: clusteredsequences.qza" name="oclusteredsequences"/>
47 <data format="qza" label="${tool.name} on ${on_string}: newreferencesequences.qza" name="onewreferencesequences"/>
48 </outputs>
49 <help><![CDATA[
50 Open-reference clustering of features.
51 ######################################
52
53 Given a feature table and the associated feature sequences, cluster the
54 features against a reference database based on user-specified percent
55 identity threshold of their sequences. Any sequences that don't match are
56 then clustered de novo. This is not a general-purpose clustering method,
57 but rather is intended to be used for clustering the results of quality-
58 filtering/dereplication methods, such as DADA2, or for re-clustering a
59 FeatureTable at a lower percent identity than it was originally clustered
60 at. When a group of features in the input table are clustered into a single
61 feature, the frequency of that single feature in a given sample is the sum
62 of the frequencies of the features that were clustered in that sample.
63 Feature identifiers will be inherited from the centroid feature of each
64 cluster. For features that match a reference sequence, the centroid feature
65 is that reference sequence, so its identifier will become the feature
66 identifier. The clustered_sequences result will contain feature
67 representative sequences that are derived from the sequences input for all
68 features in clustered_table. This will always be the most abundant sequence
69 in the cluster. The new_reference_sequences result will contain the entire
70 reference database, plus feature representative sequences for any de novo
71 features. This is intended to be used as a reference database in subsequent
72 iterations of cluster_features_open_reference, if applicable. See the
73 vsearch documentation for details on how sequence clustering is performed.
74
75 Parameters
76 ----------
77 sequences : FeatureData[Sequence]
78 The sequences corresponding to the features in table.
79 table : FeatureTable[Frequency]
80 The feature table to be clustered.
81 reference_sequences : FeatureData[Sequence]
82 The sequences to use as cluster centroids.
83 perc_identity : Float % Range(0, 1, inclusive_start=False, inclusive_end=True)
84 The percent identity at which clustering should be performed. This
85 parameter maps to vsearch's --id parameter.
86 strand : Str % Choices('plus', 'both'), optional
87 Search plus (i.e., forward) or both (i.e., forward and reverse
88 complement) strands.
89
90 Returns
91 -------
92 clustered_table : FeatureTable[Frequency]
93 The table following clustering of features.
94 clustered_sequences : FeatureData[Sequence]
95 Sequences representing clustered features.
96 new_reference_sequences : FeatureData[Sequence]
97 The new reference sequences. This can be used for subsequent runs of
98 open-reference clustering for consistent definitions of features across
99 open-reference feature tables.
100 ]]></help>
101 <macros>
102 <import>qiime_citation.xml</import>
103 </macros>
104 <expand macro="qiime_citation"/>
105 </tool>