comparison get_annotated_regions_from_gb.xml @ 2:06bcf65179fb draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/getorganelle commit b7f743ebc67b5ffabac0eddf3b20960a6444365f
author iuc
date Thu, 23 Feb 2023 17:06:37 +0000
parents
children 7348b69e5109
comparison
equal deleted inserted replaced
1:8b330a577046 2:06bcf65179fb
1 <tool id="get_annotated_regions_from_gb" name="Get annotated regions from genbank files (getorganelle)" version="0.1.0" python_template_version="3.5">
2 <macros>
3 <import>macros.xml</import>
4 </macros>
5 <requirements>
6 <requirement type="package" version="@TOOL_VERSION@">getorganelle</requirement>
7 <requirement type="package" version="@BIOPYTHON_VERSION@">biopython</requirement>
8 </requirements>
9 <command detect_errors="exit_code"><![CDATA[
10 #import re
11
12 ## create list of symbolic links to input files
13
14 #set file_names = []
15 #for $input in $inputs
16 #set $input_name = re.sub('[^\w\-\s]', '_', str($input.name)).replace('_gb', '.gb')
17 ln -s '$input' '$input_name' &&
18 $file_names.append($input_name)
19 #end for
20
21 ## run python script
22
23 get_annotated_regions_from_gb.py
24 #for file in file_names:
25 '$file'
26 #end for
27 -o results_directory
28 #if str($gene_type_selector) == "CDS":
29 -t CDS
30 #end if
31 #if str($gene_type_selector) == "tRNA":
32 -t tRNA
33 #end if
34 #if str($gene_type_selector) == "rRNA":
35 -t rRNA
36 #end if
37 --mix
38
39 ]]></command>
40 <inputs>
41 <param type="data" multiple="true" name="inputs" format="gb" label="Annotated genbank file(s)" help="Genbank files with annotated regions to extract. Multiple files can be selected." />
42 <param name="gene_type_selector" type="select" label="Gene type">
43 <option value="CDS" selected="true">CDS</option>
44 <option value="tRNA">tRNA</option>
45 <option value="rRNA">rRNA</option>
46 </param>
47 </inputs>
48 <outputs>
49 <data name="output_fasta" format="fasta" from_work_dir="results_directory/gene/gene.fasta" label='${tool.name} on ${on_string}: Annotated genes'/>
50 </outputs>
51 <tests>
52 <test>
53 <param name="inputs" value="NC_047059.gb,NC_047060.gb,NC_047400.gb"/>
54 <param name="gene_type_selector" value="CDS"/>
55 <assert_stdout>
56 <has_text text="Time cost" />
57 </assert_stdout>
58 <output name="output_fasta">
59 <assert_contents>
60 <has_line line=">matK CDS - NC_047059--Styphnolobium_japonicum_voucher_Yi15212-KUN_plastid__complete_genome" />
61 <has_line line=">matK CDS - NC_047060--Haematoxylum_brasiletto_voucher_N._Zamora6857-Costa_Rica_plastid__complete_genome" />
62 <has_line line=">matK CDS - NC_047400--Chamaecrista_mimosoides_voucher_Yi15441-KUN_plastid__complete_genome" />
63 </assert_contents>
64 </output>
65 </test>
66 <test>
67 <param name="inputs" value="NC_047059.gb,NC_047060.gb,NC_047400.gb"/>
68 <param name="gene_type_selector" value="tRNA"/>
69 <assert_stdout>
70 <has_text text="Time cost" />
71 </assert_stdout>
72 <output name="output_fasta">
73 <assert_contents>
74 <has_line line=">trnA-UGC tRNA - NC_047059--Styphnolobium_japonicum_voucher_Yi15212-KUN_plastid__complete_genome" />
75 <has_line line=">trnA-UGC tRNA - NC_047060--Haematoxylum_brasiletto_voucher_N._Zamora6857-Costa_Rica_plastid__complete_genome" />
76 <has_line line=">trnA-UGC tRNA - NC_047400--Chamaecrista_mimosoides_voucher_Yi15441-KUN_plastid__complete_genome" />
77 </assert_contents>
78 </output>
79 </test>
80 <test>
81 <param name="inputs" value="NC_047059.gb,NC_047060.gb,NC_047400.gb"/>
82 <param name="gene_type_selector" value="rRNA"/>
83 <assert_stdout>
84 <has_text text="Time cost" />
85 </assert_stdout>
86 <output name="output_fasta">
87 <assert_contents>
88 <has_line line=">rrn16 rRNA - NC_047059--Styphnolobium_japonicum_voucher_Yi15212-KUN_plastid__complete_genome" />
89 <has_line line=">rrn16 rRNA - NC_047060--Haematoxylum_brasiletto_voucher_N._Zamora6857-Costa_Rica_plastid__complete_genome" />
90 <has_line line=">rrn16 rRNA - NC_047400--Chamaecrista_mimosoides_voucher_Yi15441-KUN_plastid__complete_genome" />
91 </assert_contents>
92 </output>
93 </test>
94 </tests>
95 <help><![CDATA[
96
97 Python script to extract annotated genes from genbank files. The annotated genes are in the correct format to be used as seed sequences in GetOrganelle.
98 Help information from the python script is below. Only options for input genebank files and gene type are included in this tool.
99
100 By jinjianjun@mail.kib.ac.cn 2017
101 Usage: get_annotated_regions_from_gb.py gb_files -o out_dir
102
103 Options:
104 -h, --help show this help message and exit
105 -o OUT_PUT Output.
106 -t GENE_TYPES Annotation type taken as gene. Default: CDS,tRNA,rRNA
107 --separate-copy By default, only keep one copy (see '--copy-mode' for
108 more) if there are several regions with the same name.
109 Exception: if there are one copy with intron(s) and
110 another copy without intron, they would be both kept.
111 This exception was specially made for the convenience
112 of commonly-incorrectly-annotated rps12 gene of
113 plastome.
114 --copy-mode=COPY_MODE
115 first|longest|leastN|leastN_longest (default).
116 --separate-exon By default, combining exons.
117 --keys=GENE_KEYS The key to the gene name: gene, label, product or
118 other keys in the qualifiers region.Default:
119 gene,label,product,note.
120 --mix Mix different genes into a single fasta file. In this
121 mode, the sequence header will be gene_name - gb_info
122 --case-mode=CASE_TREATMENT
123 first: Gene name case-non-sensitive. Consistent to the
124 first appearance. lower: Gene name case-non-
125 sensitive. All gene name set to lower case. upper:
126 Gene name case-non-sensitive. All gene name set to
127 Upper case. raw: Gene name case-sensitive.
128 --ignore-format-error
129 Skip the Error: key "*" not found in annotation. Not
130 suggested.
131 --translate-to-product
132 Translate the tRNA gene name to the form of their
133 product. Default: False
134 --overwrite Choose to overwrite previous result.
135
136 ]]></help>
137 <citations>
138 <citation type="doi">10.1093/sysbio/syaa047</citation>
139 </citations>
140 </tool>