comparison filter_by_fasta_ids.xml @ 2:1bd985f14938 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/filter_by_fasta_ids commit 2bc87e917c91a3b7a43996a0f3752b8992c0c749
author galaxyp
date Sat, 28 Apr 2018 03:49:28 -0400
parents 8d15aebf55fd
children 3c623e81be77
comparison
equal deleted inserted replaced
1:8d15aebf55fd 2:1bd985f14938
1 <tool id="filter_by_fasta_ids" version="1.0" name="Filter by FASTA IDs"> 1 <tool id="filter_by_fasta_ids" name="Filter FASTA" version="2.0">
2 <description>Extract sequences from a FASTA file based on a list of IDs</description> 2 <description>on the headers and/or the sequences</description>
3 <command> 3 <macros>
4 <![CDATA[ 4 <xml name="regexp_macro" token_label="Regular expression pattern">
5 python $__tool_directory__/filter_by_fasta_ids.py 5 <param name="regexp" type="text" value="" label="@LABEL@" help="Use the Python regular expression syntax as specified in https://docs.python.org/3/library/re.html">
6 $dedup 6 <validator type="empty_field" />
7 '$identifiers' 7 <sanitizer>
8 '$input' 8 <valid initial="string.printable">
9 '$output' 9 <remove value="'"/>
10 ]]> 10 </valid>
11 </command> 11 <mapping initial="none">
12 <add source="'" target="'&quot;'&quot;'" />
13 </mapping>
14 </sanitizer>
15 </param>
16 </xml>
17 </macros>
18 <requirements>
19 <requirement type="package" version="3.6.5">python</requirement>
20 </requirements>
21 <command><![CDATA[
22 python '$__tool_directory__/filter_by_fasta_ids.py'
23 -i '$input'
24 #if $header_criteria.header_criteria_select == 'id_list'
25 --id_list '$header_criteria.identifiers'
26 #elif $header_criteria.header_criteria_select == 'regexp'
27 --header_regexp '$header_criteria.regexp'
28 #end if
29 #if $sequence_criteria.sequence_criteria_select == 'seq_length'
30 --min_length $sequence_criteria.min_length
31 #if str($sequence_criteria.max_length)
32 --max_length $sequence_criteria.max_length
33 #end if
34 #elif $sequence_criteria.sequence_criteria_select == 'regexp'
35 --sequence_regexp '$sequence_criteria.regexp'
36 #end if
37 $dedup
38 -o '$output'
39 #if $output_discarded
40 -d '$discarded'
41 #end if
42 ]]></command>
12 <inputs> 43 <inputs>
13 <param format="fasta" name="input" type="data" label="FASTA sequences"/> 44 <param name="input" type="data" format="fasta" label="FASTA sequences"/>
14 <param format="txt" name="identifiers" type="data" label="List of IDs to extract sequences for"/> 45 <conditional name="header_criteria">
15 <param name="dedup" type="boolean" truevalue="--dedup" falsevalue="" checked="true" label="Remove duplicate sequences" /> 46 <param name="header_criteria_select" type="select" label="Criteria for filtering on the headers">
47 <option value="">No filtering</option>
48 <option value="id_list">List of IDs</option>
49 <option value="regexp">Regular expression on the headers</option>
50 </param>
51 <when value="" />
52 <when value="id_list">
53 <param name="identifiers" type="data" format="txt" label="List of IDs to extract sequences for"/>
54 </when>
55 <when value="regexp">
56 <expand macro="regexp_macro" label="Regular expression pattern the header should match" />
57 </when>
58 </conditional>
59 <conditional name="sequence_criteria">
60 <param name="sequence_criteria_select" type="select" label="Criteria for filtering on the sequences">
61 <option value="">No filtering</option>
62 <option value="seq_length">Sequence length</option>
63 <option value="regexp">Regular expression on the sequences</option>
64 </param>
65 <when value="" />
66 <when value="seq_length">
67 <param name="min_length" type="integer" value="0" label="Minimum length" />
68 <param name="max_length" type="integer" min="1" value="" optional="true" label="Maximum length" />
69 </when>
70 <when value="regexp">
71 <expand macro="regexp_macro" label="Regular expression pattern the sequence should match" />
72 </when>
73 </conditional>
74 <param name="dedup" type="boolean" truevalue="--dedup" falsevalue="" label="Remove duplicate sequences" />
75 <param name="output_discarded" type="boolean" label="Output discarded FASTA entries" />
16 </inputs> 76 </inputs>
17 <outputs> 77 <outputs>
18 <data format="fasta" name="output" label="FASTA sequences for ${identifiers.name}"/> 78 <data name="output" format="fasta" label="${tool.name} on ${on_string}: FASTA sequences"/>
79 <data name="discarded" format="fasta" label="${tool.name} on ${on_string}: discarded entries">
80 <filter>output_discarded</filter>
81 </data>
19 </outputs> 82 </outputs>
20 <tests> 83 <tests>
21 <test> 84 <test expect_num_outputs="1">
22 <param name="input" ftype="fasta" value="input.fasta" /> 85 <param name="input" ftype="fasta" value="input.fasta" />
86 <param name="header_criteria_select" value="id_list" />
23 <param name="identifiers" ftype="txt" value="ids.txt" /> 87 <param name="identifiers" ftype="txt" value="ids.txt" />
88 <param name="dedup" value="True" />
24 <output name="output" file="output_dedup.fasta" /> 89 <output name="output" file="output_dedup.fasta" />
25 </test> 90 </test>
26 <test> 91 <test expect_num_outputs="2">
27 <param name="input" ftype="fasta" value="input.fasta" /> 92 <param name="input" ftype="fasta" value="input.fasta" />
93 <param name="header_criteria_select" value="id_list" />
28 <param name="identifiers" ftype="txt" value="ids.txt" /> 94 <param name="identifiers" ftype="txt" value="ids.txt" />
29 <param name="dedup" value="False" /> 95 <param name="dedup" value="False" />
96 <param name="output_discarded" value="True" />
30 <output name="output" file="output_not_dedup.fasta" /> 97 <output name="output" file="output_not_dedup.fasta" />
98 <output name="discarded" file="discarded_not_dedup.fasta" />
99 </test>
100 <test expect_num_outputs="2">
101 <param name="input" ftype="fasta" value="input.fasta" />
102 <param name="header_criteria_select" value="regexp" />
103 <param name="regexp" value="2" />
104 <param name="dedup" value="False" />
105 <param name="output_discarded" value="True" />
106 <output name="output" file="output_header_regexp.fasta" />
107 <output name="discarded" file="discarded_header_regexp.fasta" />
108 </test>
109 <test expect_num_outputs="2">
110 <param name="input" ftype="fasta" value="input.fasta" />
111 <param name="sequence_criteria_select" value="seq_length" />
112 <param name="min_length" value="5" />
113 <param name="dedup" value="False" />
114 <param name="output_discarded" value="True" />
115 <output name="output" file="output_min_length5.fasta" />
116 <output name="discarded" file="discarded_min_length5.fasta" />
117 </test>
118 <test expect_num_outputs="2">
119 <param name="input" ftype="fasta" value="input.fasta" />
120 <param name="sequence_criteria_select" value="seq_length" />
121 <param name="max_length" value="4" />
122 <param name="dedup" value="False" />
123 <param name="output_discarded" value="True" />
124 <output name="output" file="output_max_length4.fasta" />
125 <output name="discarded" file="discarded_max_length4.fasta" />
126 </test>
127 <test expect_num_outputs="2">
128 <param name="input" ftype="fasta" value="input.fasta" />
129 <param name="sequence_criteria_select" value="regexp" />
130 <param name="regexp" value="T{2,}" />
131 <param name="dedup" value="False" />
132 <param name="output_discarded" value="True" />
133 <output name="output" file="output_sequence_regexp.fasta" />
134 <output name="discarded" file="discarded_sequence_regexp.fasta" />
31 </test> 135 </test>
32 </tests> 136 </tests>
33 <help> 137 <help><![CDATA[
34 <![CDATA[
35 **What it does** 138 **What it does**
36 139
37 Extract sequences from a FASTA file based on a list of IDs. 140 Filter entries of a FASTA file on the headers and/or the sequences based on various criteria.
38 ]]> 141 ]]></help>
39 </help>
40 </tool> 142 </tool>