Mercurial > repos > iuc > getorganelle
view get_annotated_regions_from_gb.xml @ 5:ba88bc5a6446 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/getorganelle commit 5a871a3fb322bf6d69ff0198111d3ce53b802d94
author | iuc |
---|---|
date | Mon, 15 Apr 2024 06:27:37 +0000 |
parents | 7348b69e5109 |
children |
line wrap: on
line source
<tool id="get_annotated_regions_from_gb" name="Get annotated regions from genbank files (getorganelle)" version="0.1.0" python_template_version="3.5"> <macros> <import>macros.xml</import> </macros> <requirements> <requirement type="package" version="@TOOL_VERSION@">getorganelle</requirement> <requirement type="package" version="@BIOPYTHON_VERSION@">biopython</requirement> </requirements> <command detect_errors="exit_code"><![CDATA[ #import re ## create list of symbolic links to input files #set file_names = [] #for $input in $inputs #set $input_name = re.sub('[^\w\-\s]', '_', str($input.name)).replace('_gb', '.gb') ln -s '$input' '$input_name' && $file_names.append($input_name) #end for ## run python script get_annotated_regions_from_gb.py #for file in file_names: '$file' #end for -o results_directory #if str($gene_type_selector) == "CDS": -t CDS #end if #if str($gene_type_selector) == "tRNA": -t tRNA #end if #if str($gene_type_selector) == "rRNA": -t rRNA #end if --mix ]]></command> <inputs> <param type="data" multiple="true" name="inputs" format="genbank" label="Annotated genbank file(s)" help="Genbank files with annotated regions to extract. Multiple files can be selected." /> <param name="gene_type_selector" type="select" label="Gene type"> <option value="CDS" selected="true">CDS</option> <option value="tRNA">tRNA</option> <option value="rRNA">rRNA</option> </param> </inputs> <outputs> <data name="output_fasta" format="fasta" from_work_dir="results_directory/gene/gene.fasta" label='${tool.name} on ${on_string}: Annotated genes'/> </outputs> <tests> <test> <param name="inputs" value="NC_047059.gb,NC_047060.gb,NC_047400.gb"/> <param name="gene_type_selector" value="CDS"/> <assert_stdout> <has_text text="Time cost" /> </assert_stdout> <output name="output_fasta"> <assert_contents> <has_line line=">matK CDS - NC_047059--Styphnolobium_japonicum_voucher_Yi15212-KUN_plastid__complete_genome" /> <has_line line=">matK CDS - NC_047060--Haematoxylum_brasiletto_voucher_N._Zamora6857-Costa_Rica_plastid__complete_genome" /> <has_line line=">matK CDS - NC_047400--Chamaecrista_mimosoides_voucher_Yi15441-KUN_plastid__complete_genome" /> </assert_contents> </output> </test> <test> <param name="inputs" value="NC_047059.gb,NC_047060.gb,NC_047400.gb"/> <param name="gene_type_selector" value="tRNA"/> <assert_stdout> <has_text text="Time cost" /> </assert_stdout> <output name="output_fasta"> <assert_contents> <has_line line=">trnA-UGC tRNA - NC_047059--Styphnolobium_japonicum_voucher_Yi15212-KUN_plastid__complete_genome" /> <has_line line=">trnA-UGC tRNA - NC_047060--Haematoxylum_brasiletto_voucher_N._Zamora6857-Costa_Rica_plastid__complete_genome" /> <has_line line=">trnA-UGC tRNA - NC_047400--Chamaecrista_mimosoides_voucher_Yi15441-KUN_plastid__complete_genome" /> </assert_contents> </output> </test> <test> <param name="inputs" value="NC_047059.gb,NC_047060.gb,NC_047400.gb"/> <param name="gene_type_selector" value="rRNA"/> <assert_stdout> <has_text text="Time cost" /> </assert_stdout> <output name="output_fasta"> <assert_contents> <has_line line=">rrn16 rRNA - NC_047059--Styphnolobium_japonicum_voucher_Yi15212-KUN_plastid__complete_genome" /> <has_line line=">rrn16 rRNA - NC_047060--Haematoxylum_brasiletto_voucher_N._Zamora6857-Costa_Rica_plastid__complete_genome" /> <has_line line=">rrn16 rRNA - NC_047400--Chamaecrista_mimosoides_voucher_Yi15441-KUN_plastid__complete_genome" /> </assert_contents> </output> </test> </tests> <help><![CDATA[ Python script to extract annotated genes from genbank files. The annotated genes are in the correct format to be used as seed sequences in GetOrganelle. Help information from the python script is below. Only options for input genebank files and gene type are included in this tool. By jinjianjun@mail.kib.ac.cn 2017 Usage: get_annotated_regions_from_gb.py gb_files -o out_dir Options: -h, --help show this help message and exit -o OUT_PUT Output. -t GENE_TYPES Annotation type taken as gene. Default: CDS,tRNA,rRNA --separate-copy By default, only keep one copy (see '--copy-mode' for more) if there are several regions with the same name. Exception: if there are one copy with intron(s) and another copy without intron, they would be both kept. This exception was specially made for the convenience of commonly-incorrectly-annotated rps12 gene of plastome. --copy-mode=COPY_MODE first|longest|leastN|leastN_longest (default). --separate-exon By default, combining exons. --keys=GENE_KEYS The key to the gene name: gene, label, product or other keys in the qualifiers region.Default: gene,label,product,note. --mix Mix different genes into a single fasta file. In this mode, the sequence header will be gene_name - gb_info --case-mode=CASE_TREATMENT first: Gene name case-non-sensitive. Consistent to the first appearance. lower: Gene name case-non- sensitive. All gene name set to lower case. upper: Gene name case-non-sensitive. All gene name set to Upper case. raw: Gene name case-sensitive. --ignore-format-error Skip the Error: key "*" not found in annotation. Not suggested. --translate-to-product Translate the tRNA gene name to the form of their product. Default: False --overwrite Choose to overwrite previous result. ]]></help> <citations> <citation type="doi">10.1093/sysbio/syaa047</citation> </citations> </tool>