comparison biopython-convert.xml @ 0:869e206b3ca1 draft

"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/biopython-convert commit 2f8c5be3bf3117a8d3296a6b82655ff08edd2f34"
author brinkmanlab
date Fri, 24 Jan 2020 18:52:04 -0500
parents
children ff7e1478030d
comparison
equal deleted inserted replaced
-1:000000000000 0:869e206b3ca1
1 <tool id="biopython-convert" name="BioPython SeqIO Converter" version="1.0" profile="16.04">
2 <description>Interconvert between the various sequence file formats that BioPython supports</description>
3 <edam_topics>
4 <edam_topic>topic_0091</edam_topic>
5 </edam_topics>
6 <edam_operations>
7 <edam_operation>operation_3434</edam_operation>
8 <edam_operation>operation_0335</edam_operation>
9 <edam_operation>operation_3359</edam_operation>
10 <edam_operation>operation_0224</edam_operation>
11 <edam_operation>operation_3695</edam_operation>
12 </edam_operations>
13 <requirements>
14 <requirement type="package" version="3.7">python</requirement>
15 <requirement type="package" version="1.0">biopython.convert</requirement>
16 </requirements>
17 <version_command><![CDATA[ biopython.convert -v ]]></version_command>
18 <command detect_errors="aggressive"><![CDATA[
19 #if $bioperl
20 #set $script = 'python ' + $__tool_directory__ + '/bioperl_compat.py'
21 #else
22 #set $script = 'biopython.convert'
23 #end if
24 #if $split
25 mkdir -p output &&
26 $script $split $info
27 #if $query
28 -q '$query'
29 #end if
30 $input $input.ext output/record $output_type
31 #else
32 $script $split $info
33 #if $query
34 -q '$query'
35 #end if
36 $input $input.ext $output $output_type
37 #end if
38 #if $info
39 > $info_output
40 #end if
41 ]]></command>
42 <inputs>
43 <param name="input" type="data" format="abi,abi-trim,ace,cif-atom,cif-seqres,clustal,embl,fasta,fasta-2line,fastq-sanger,fastq,fastq-solexa,fastq-illumina,genbank,gb,ig,imgt,nexus,pdb-seqres,pdb-atom,phd,phylip,pir,seqxml,sff,sff-trim,stockholm,swiss,tab,qual,uniprot-xml,gff3" label="Input" />
44 <param name="output_type" type="select" label="Output Format">
45 <option value="clustal">clustal</option>
46 <option value="embl">embl</option>
47 <option value="fasta">fasta</option>
48 <option value="fasta-2line">fasta-2line</option>
49 <option value="fastq-sanger">fastq-sanger</option>
50 <option value="fastq">fastq</option>
51 <option value="fastq-solexa">fastq-solexa</option>
52 <option value="fastq-illumina">fastq-illumina</option>
53 <option value="genbank">genbank</option>
54 <option value="gb">gb</option>
55 <option value="imgt">imgt</option>
56 <option value="nexus">nexus</option>
57 <option value="phd">phd</option>
58 <option value="phylip">phylip</option>
59 <option value="pir">pir</option>
60 <option value="seqxml">seqxml</option>
61 <option value="sff">sff</option>
62 <option value="stockholm">stockholm</option>
63 <option value="tab">tab</option>
64 <option value="qual">qual</option>
65 <option value="gff3">gff3</option>
66 </param>
67 <param name="split" type="boolean" truevalue="-s" falsevalue="" checked="false" label="Split each record into a different file" />
68 <param name="info" type="boolean" truevalue="-i" falsevalue="" checked="false" label="Output record information while converting" />
69 <param name="query" type="text" label="Query records, keeping only what matches path" help="Provide a JMESPath selecting records to keep. The root is the list of records. The path must return a list of records.">
70 <sanitizer>
71 <valid initial="string.printable">
72 <remove value="&apos;" />
73 </valid>
74 </sanitizer>
75 </param>
76 <param name="bioperl" type="boolean" checked="false" label="Modify biopython to generate files similar to bioperl" />
77 </inputs>
78 <outputs>
79 <data name="info_output" format="gff3" label="Record Info from ${tool.name} on ${on_string}">
80 <filter>info</filter>
81 </data>
82 <data name="output" format="txt" label="Output from ${tool.name} on ${on_string}">
83 <filter>not split</filter>
84 <change_format>
85 <when input="output_type" value="clustal" format="clustal" />
86 <when input="output_type" value="embl" format="embl" />
87 <when input="output_type" value="fasta" format="fasta" />
88 <when input="output_type" value="fasta-2line" format="fasta-2line" />
89 <when input="output_type" value="fastq-sanger" format="fastq-sanger" />
90 <when input="output_type" value="fastq" format="fastq" />
91 <when input="output_type" value="fastq-solexa" format="fastq-solexa" />
92 <when input="output_type" value="fastq-illumina" format="fastq-illumina" />
93 <when input="output_type" value="genbank" format="genbank" />
94 <when input="output_type" value="gb" format="genbank" />
95 <when input="output_type" value="imgt" format="imgt" />
96 <when input="output_type" value="nexus" format="nexus" />
97 <when input="output_type" value="phd" format="phd" />
98 <when input="output_type" value="phylip" format="phylip" />
99 <when input="output_type" value="pir" format="pir" />
100 <when input="output_type" value="seqxml" format="seqxml" />
101 <when input="output_type" value="sff" format="sff" />
102 <when input="output_type" value="stockholm" format="stockholm" />
103 <when input="output_type" value="tab" format="tabular" />
104 <when input="output_type" value="qual" format="qual" />
105 <when input="output_type" value="gff3" format="gff3" />
106 </change_format>
107 </data>
108 <collection name="split_output" type="list" label="Output split per record from ${tool.name} on ${on_string}">
109 <filter>split</filter>
110 <discover_datasets pattern=".*" directory="output" format="$output_type" />
111 </collection>
112 </outputs>
113 <tests>
114 <test expect_num_outputs="1">
115 <!-- Test basic conversion to same format -->
116 <param name="input" value="BioPython-Convert/test-data/has_plasmids.gbff" ftype="genbank" />
117 <param name="output_type" value="genbank" />
118 <output name="output" checksum="sha256:2808187bb8e2231545e4d2d7a27dc802df4d1f7c0e953a8399300b2df6b0c737" ftype="genbank" />
119 </test>
120 <test expect_num_outputs="1">
121 <!-- Test basic conversion to same format with info -->
122 <param name="input" value="BioPython-Convert/test-data/has_plasmids.gbff" ftype="genbank" />
123 <param name="info" value="-i" />
124 <param name="output_type" value="genbank" />
125 <output name="output" checksum="sha256:2808187bb8e2231545e4d2d7a27dc802df4d1f7c0e953a8399300b2df6b0c737" ftype="genbank" />
126 <output name="info_output" checksum="sha256:a611656c5a7e7f719c3d64f6b348b67c1abcb8ed56fa82f51fc90cbe2125e5f0" ftype="gff3" />
127 </test>
128 <test expect_num_outputs="1">
129 <!-- Test basic conversion to different format -->
130 <param name="input" value="BioPython-Convert/test-data/has_plasmids.gbff" ftype="genbank" />
131 <param name="output_type" value="embl" />
132 <output name="output" checksum="sha256:5598cb679f5f6c31349968ddde3646fe97296da42ee528ed3f46dec3f5490cbd" ftype="embl" />
133 </test>
134 <test expect_num_outputs="1">
135 <!-- Test basic conversion to same format with filter -->
136 <param name="input" value="BioPython-Convert/test-data/has_plasmids.gbff" ftype="genbank" />
137 <param name="query" value="[?!(features[?type==`source`].qualifiers.plasmid)]" />
138 <param name="output_type" value="genbank" />
139 <output name="output" checksum="sha256:e142d7e1fbd103c96e3b728e3b75f7af6955c97cdbddb87c3202f2c1e2f133d4" ftype="genbank" />
140 </test>
141 <test expect_num_outputs="1">
142 <!-- Test split -->
143 <param name="input" value="BioPython-Convert/test-data/has_plasmids.gbff" ftype="genbank" />
144 <param name="output_type" value="genbank" />
145 <output_collection name="split_output" type="list" count="3">
146 <element name="record.0" ftype="genbank" checksum="sha256:8d02b2087c4cea42da7c5f0a69b7a40d544d953c1a9d611b97bd116cc1f8cd7f" />
147 <element name="record.1" ftype="genbank" checksum="sha256:e37ecc4288ae8b2c3bea25484326a69ced9679fa791162ed593064fdf535944d" />
148 <element name="record.2" ftype="genbank" checksum="sha256:e142d7e1fbd103c96e3b728e3b75f7af6955c97cdbddb87c3202f2c1e2f133d4" />
149 </output_collection>
150 </test>
151 </tests>
152 <help><![CDATA[
153 Interconvert between different file formats that BioPython SeqIO supports. Support for GFF3 has also been included.
154
155 Included features are:
156
157 - Split: Output a collection of datasets, one for each record in the input.
158 This is useful when a tool only accepts single records.
159 The resulting output collection can then be mapped over the receiving tool.
160 - Info: Output an additional GFF3 dataset that contains a summary record for each record in the output dataset.
161 This is useful for extracting sequence IDs, counting how many records are in the output dataset, and various
162 diagnostic processes.
163 - Query: Datasets can be queried or filtered using JMESPath query language.
164 For example ``[?!(features[?type==`source`].qualifiers.plasmid)]`` will remove any plasmid records sometimes
165 found in prokaryotic Genbank or EMBL reference genomes.
166 See http://jmespath.org/ for documentation, and https://biopython.org/DIST/docs/api/Bio.SeqFeature.SeqFeature-class.html
167 for the data structure. Single quotes `'` are not permitted in the query.
168
169 ]]></help>
170 <citations>
171 <citation type="doi">10.5281/zenodo.3364782</citation>
172 <citation type="doi">10.5281/zenodo.3364789</citation>
173 </citations>
174 </tool>