Mercurial > repos > iuc > data_manager_salmon_index_builder
changeset 5:4d92281e3b30 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_salmon_index_builder commit aed25572a6ac6a1f8acc72bb25ed3c337a623696
author | iuc |
---|---|
date | Thu, 16 Oct 2025 20:11:44 +0000 |
parents | 566207ae614c |
children | |
files | data_manager/salmon_index_builder.py data_manager/salmon_index_builder.xml test-data/phiX174_transcripts.fasta test-data/transcriptomes.loc tool_data_table_conf.xml.sample tool_data_table_conf.xml.test |
diffstat | 6 files changed, 217 insertions(+), 104 deletions(-) [+] |
line wrap: on
line diff
--- a/data_manager/salmon_index_builder.py Sun Apr 16 08:31:17 2023 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,84 +0,0 @@ -#!/usr/bin/env python -# Based heavily on the kallisto data manager wrapper script by iuc -from __future__ import print_function - -import argparse -import json -import os -import subprocess -import sys - -DEFAULT_DATA_TABLE_NAME = "salmon_indexes_versioned" - - -def get_id_name(params, dbkey, fasta_description=None): - # TODO: ensure sequence_id is unique and does not already appear in location file - sequence_id = params['param_dict']['sequence_id'] - if not sequence_id: - sequence_id = dbkey - - sequence_name = params['param_dict']['sequence_name'] - if not sequence_name: - sequence_name = fasta_description - if not sequence_name: - sequence_name = dbkey - return sequence_id, sequence_name - - -def build_salmon_index(data_manager_dict, options, params, sequence_id, sequence_name): - data_table_name = options.data_table_name or DEFAULT_DATA_TABLE_NAME - target_directory = params['output_data'][0]['extra_files_path'] - if not os.path.exists(target_directory): - os.mkdir(target_directory) - args = ['salmon', 'index'] - if options.kmer_size != '': - args.append('-k') - args.append(options.kmer_size) - args.extend(['-t', options.fasta_filename, '-i', target_directory]) - return_code = subprocess.call(args=args, shell=False) - if return_code: - print("Error building index.", file=sys.stderr) - sys.exit(return_code) - data_table_entry = dict(value=sequence_id, dbkey=options.fasta_dbkey, name=sequence_name, path=sequence_id, version=options.index_version) - _add_data_table_entry(data_manager_dict, data_table_name, data_table_entry) - - -def _add_data_table_entry(data_manager_dict, data_table_name, data_table_entry): - data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {}) - data_manager_dict['data_tables'][data_table_name] = data_manager_dict['data_tables'].get(data_table_name, []) - data_manager_dict['data_tables'][data_table_name].append(data_table_entry) - return data_manager_dict - - -def main(): - # Parse Command Line - parser = argparse.ArgumentParser() - parser.add_argument('--output', dest='output', action='store', type=str, default=None) - parser.add_argument('--fasta_filename', dest='fasta_filename', action='store', type=str, default=None) - parser.add_argument('--fasta_dbkey', dest='fasta_dbkey', action='store', type=str, default=None) - parser.add_argument('--fasta_description', dest='fasta_description', action='store', type=str, default=None) - parser.add_argument('--data_table_name', dest='data_table_name', action='store', type=str, default='salmon_indexes') - parser.add_argument('-v', '--index_version', dest='index_version', action='store', type=str, help='Use IndexVersion attribute from header.json') - parser.add_argument('-k', '--kmer_size', dest='kmer_size', action='store', type=str, help='kmer_size') - options = parser.parse_args() - - filename = options.output - - with open(filename) as fh: - params = json.load(fh) - data_manager_dict = {} - - if options.fasta_dbkey in [None, '', '?']: - raise Exception('"%s" is not a valid dbkey. You must specify a valid dbkey.' % options.fasta_dbkey) - - sequence_id, sequence_name = get_id_name(params, dbkey=options.fasta_dbkey, fasta_description=options.fasta_description) - # build the index - build_salmon_index(data_manager_dict, options, params, sequence_id, sequence_name) - - # save info to json file - with open(filename, 'w') as out: - json.dump(data_manager_dict, out, sort_keys=True) - - -if __name__ == "__main__": - main()
--- a/data_manager/salmon_index_builder.xml Sun Apr 16 08:31:17 2023 +0000 +++ b/data_manager/salmon_index_builder.xml Thu Oct 16 20:11:44 2025 +0000 @@ -1,42 +1,108 @@ -<tool id="salmon_index_builder_data_manager" name="Salmon" tool_type="manage_data" version="1.3.0" profile="19.01"> +<tool id="salmon_index_builder_data_manager" name="Salmon" tool_type="manage_data" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="24.0"> <description>index builder</description> + <macros> + <token name="@TOOL_VERSION@">1.3.0</token> + <token name="@VERSION_SUFFIX@">1</token> + <token name="@PROFILE@">24.0</token> + <token name="@IDX_VERSION@">q7</token> + </macros> <requirements> - <requirement type="package" version="1.3.0">salmon</requirement> - <requirement type="package" version="3.7">python</requirement> + <requirement type="package" version="@TOOL_VERSION@">salmon</requirement> </requirements> - <macros> - <token name="@IDX_VERSION@">q7</token> - </macros> <command detect_errors="exit_code"><![CDATA[ - python '$__tool_directory__/salmon_index_builder.py' --output '${out_file}' - --fasta_filename '${all_fasta_source.fields.path}' - --fasta_dbkey '${all_fasta_source.fields.dbkey}' - --fasta_description '${all_fasta_source.fields.name}' - --kmer_size "${kmer_size}" - --data_table_name salmon_indexes_versioned - --index_version @IDX_VERSION@ - ]]> - </command> + ## https://combine-lab.github.io/alevin-tutorial/2019/selective-alignment/ + ## https://salmon.readthedocs.io/en/latest/salmon.html#preparing-transcriptome-indices-mapping-based-mode + + #for $transcripts in $transcriptome.fields.path.split(",") + (zcat '$transcripts' 2>/dev/null || cat '$transcripts') >> gentrome.fa && + #end for + (zcat '$all_fasta_source.fields.path' 2>/dev/null || cat '$all_fasta_source.fields.path') >> gentrome.fa && + + (zcat '$all_fasta_source.fields.path' 2>/dev/null || cat '$all_fasta_source.fields.path') | awk '{if($1 ~ /^>/) print $1}' | cut -c2- | tr -d " " > decoys.txt && + + mkdir '$out_file.extra_files_path' && + + salmon index + -k $kmer_size + -t gentrome.fa + -d decoys.txt + -i '$out_file.extra_files_path' + -p "\${GALAXY_SLOTS:-12}" + $gencode + && + + cp '$dmjson' '$out_file' + ]]></command> + <configfiles> + <configfile name="dmjson"><![CDATA[{ +#if str($sequence_id).strip() == "" + #set sequence_id = $all_fasta_source.fields.dbkey +#end if +#if str($sequence_name).strip() == "" + #set sequence_name = $all_fasta_source.fields.dbkey +#end if + + "data_tables":{ + "salmon_indexes_versioned":[ + { + "value": "$sequence_id", + "dbkey": "$all_fasta_source.fields.dbkey", + "name": "$sequence_name", + "path": "$out_file.extra_files_path", + "version": "@IDX_VERSION@" + } + ] + } +}]]></configfile> + </configfiles> <inputs> - <param label="Source FASTA Sequence" name="all_fasta_source" type="select"> + <param label="Transcriptome sequences" name="transcriptome" optional="false" type="select"> + <options from_data_table="transcriptomes" /> + </param> + <param label="Genome" name="all_fasta_source" optional="false" type="select"> <options from_data_table="all_fasta" /> </param> <param name="sequence_name" type="text" value="" label="Name of sequence" /> <param name="sequence_id" type="text" value="" label="ID for sequence" /> - <param name="kmer_size" type="integer" optional='true' value="21" max="32" label="The size of the k-mer on which the index is built" - help="There is a tradeoff here between the distinctiveness of the k-mers and their robustness to errors. The shorter the k-mers, the more robust they will be to errors in the reads, but the longer the k-mers, the more distinct they will be. We generally recommend using a k-mer size of at least 20. MUST BE AN ODD VALUE "/> + <param name="kmer_size" type="integer" optional='true' value="31" max="32" label="The size of the k-mer on which the index is built" + help="There is a tradeoff here between the distinctiveness of the k-mers and their robustness to errors. The shorter the k-mers, the more robust they will be to errors in the reads, but the longer the k-mers, the more distinct they will be. We generally recommend using a k-mer size of at least 20. MUST BE AN ODD VALUE "> + <validator type="expression" message="Only odd values">value % 2 == 1</validator> + </param> + <param name="gencode" type="boolean" label="Transcript sequences are in gencode format" truevalue="--gencode" falsevalue="" checked="false" help="Will split the transcript name at the first '|' character. These reduced names will be used in the output and when looking for these transcripts in a gene to transcript GTF."/> </inputs> <outputs> <data name="out_file" format="data_manager_json" /> </outputs> <tests> <test> + <param name="transcriptome" value="phiX174"/> <param name="all_fasta_source" value="phiX174"/> <param name="sequence_name" value="sequence_name"/> <param name="sequence_id" value="sequence_id"/> <output name="out_file"> <assert_contents> - <has_line line='{"data_tables": {"salmon_indexes_versioned": [{"dbkey": "phiX174", "name": "sequence_name", "path": "sequence_id", "value": "sequence_id", "version": "q7"}]}}' /> + <has_text text='"salmon_indexes_versioned"' /> + <has_text text='"dbkey": "phiX174"' /> + <has_text text='"name": "sequence_name"' /> + <has_text text='"value": "sequence_id"' /> + <has_text text='"version": "q7"' /> + <has_text text='"path":' /> + </assert_contents> + </output> + </test> + <test> + <param name="transcriptome" value="phiX174"/> + <param name="all_fasta_source" value="phiX174"/> + <param name="sequence_name" value=""/> + <param name="sequence_id" value=""/> + <output name="out_file"> + <assert_contents> + <has_text text='"salmon_indexes_versioned"' /> + <has_text text='"dbkey": "phiX174"' /> + <has_text text='"name": "phiX174"' /> + <has_text text='"value": "phiX174"' /> + <has_text text='"version": "q7"' /> + <has_text text='"path":' /> </assert_contents> </output> </test> @@ -45,7 +111,11 @@ <![CDATA[ .. class:: infomark -**Notice:** If you leave name, description, or id blank, it will be generated automatically. +Indices are constructed as described here: https://combine-lab.github.io/alevin-tutorial/2019/selective-alignment/ + +See also https://salmon.readthedocs.io/en/latest/salmon.html#preparing-transcriptome-indices-mapping-based-mode + +**Notice:** If you leave name, description, or id blank, it the dbkey of the genome will be used. ]]> </help> <citations>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/phiX174_transcripts.fasta Thu Oct 16 20:11:44 2025 +0000 @@ -0,0 +1,118 @@ +>lcl|NC_001422.1_cds_NP_040703.1_1 [locus_tag=phiX174p01] [db_xref=GeneID:2546398] [protein=DNA replication initiation] [protein_id=NP_040703.1] [location=join(3981..5386,1..136)] [gbkey=CDS] +ATGGTTCGTTCTTATTACCCTTCTGAATGTCACGCTGATTATTTTGACTTTGAGCGTATCGAGGCTCTTA +AACCTGCTATTGAGGCTTGTGGCATTTCTACTCTTTCTCAATCCCCAATGCTTGGCTTCCATAAGCAGAT +GGATAACCGCATCAAGCTCTTGGAAGAGATTCTGTCTTTTCGTATGCAGGGCGTTGAGTTCGATAATGGT +GATATGTATGTTGACGGCCATAAGGCTGCTTCTGACGTTCGTGATGAGTTTGTATCTGTTACTGAGAAGT +TAATGGATGAATTGGCACAATGCTACAATGTGCTCCCCCAACTTGATATTAATAACACTATAGACCACCG +CCCCGAAGGGGACGAAAAATGGTTTTTAGAGAACGAGAAGACGGTTACGCAGTTTTGCCGCAAGCTGGCT +GCTGAACGCCCTCTTAAGGATATTCGCGATGAGTATAATTACCCCAAAAAGAAAGGTATTAAGGATGAGT +GTTCAAGATTGCTGGAGGCCTCCACTATGAAATCGCGTAGAGGCTTTGCTATTCAGCGTTTGATGAATGC +AATGCGACAGGCTCATGCTGATGGTTGGTTTATCGTTTTTGACACTCTCACGTTGGCTGACGACCGATTA +GAGGCGTTTTATGATAATCCCAATGCTTTGCGTGACTATTTTCGTGATATTGGTCGTATGGTTCTTGCTG +CCGAGGGTCGCAAGGCTAATGATTCACACGCCGACTGCTATCAGTATTTTTGTGTGCCTGAGTATGGTAC +AGCTAATGGCCGTCTTCATTTCCATGCGGTGCACTTTATGCGGACACTTCCTACAGGTAGCGTTGACCCT +AATTTTGGTCGTCGGGTACGCAATCGCCGCCAGTTAAATAGCTTGCAAAATACGTGGCCTTATGGTTACA +GTATGCCCATCGCAGTTCGCTACACGCAGGACGCTTTTTCACGTTCTGGTTGGTTGTGGCCTGTTGATGC +TAAAGGTGAGCCGCTTAAAGCTACCAGTTATATGGCTGTTGGTTTCTATGTGGCTAAATACGTTAACAAA +AAGTCAGATATGGACCTTGCTGCTAAAGGTCTAGGAGCTAAAGAATGGAACAACTCACTAAAAACCAAGC +TGTCGCTACTTCCCAAGAAGCTGTTCAGAATCAGAATGAGCCGCAACTTCGGGATGAAAATGCTCACAAT +GACAAATCTGTCCACGGAGTGCTTAATCCAACTTACCAAGCTGGGTTACGACGCGACGCCGTTCAACCAG +ATATTGAAGCAGAACGCAAAAAGAGAGATGAGATTGAGGCTGGGAAAAGTTACTGTAGCCGACGTTTTGG +CGGCGCAACCTGTGACGACAAATCTGCTCAAATTTATGCGCGCTTCGATAAAAATGATTGGCGTATCCAA +CCTGCAGAGTTTTATCGCTTCCATGACGCAGAAGTTAACACTTTCGGATATTTCTGATGAGTCGAAAAAT +TATCTTGATAAAGCAGGAATTACTACTGCTTGTTTACGAATTAAATCGAAGTGGACTGCTGGCGGAAAAT +GA +>lcl|NC_001422.1_cds_NP_040704.1_2 [locus_tag=phiX174p02] [db_xref=GeneID:2546406] [protein=DNA replication initiation] [protein_id=NP_040704.1] [location=join(4497..5386,1..136)] [gbkey=CDS] +ATGAAATCGCGTAGAGGCTTTGCTATTCAGCGTTTGATGAATGCAATGCGACAGGCTCATGCTGATGGTT +GGTTTATCGTTTTTGACACTCTCACGTTGGCTGACGACCGATTAGAGGCGTTTTATGATAATCCCAATGC +TTTGCGTGACTATTTTCGTGATATTGGTCGTATGGTTCTTGCTGCCGAGGGTCGCAAGGCTAATGATTCA +CACGCCGACTGCTATCAGTATTTTTGTGTGCCTGAGTATGGTACAGCTAATGGCCGTCTTCATTTCCATG +CGGTGCACTTTATGCGGACACTTCCTACAGGTAGCGTTGACCCTAATTTTGGTCGTCGGGTACGCAATCG +CCGCCAGTTAAATAGCTTGCAAAATACGTGGCCTTATGGTTACAGTATGCCCATCGCAGTTCGCTACACG +CAGGACGCTTTTTCACGTTCTGGTTGGTTGTGGCCTGTTGATGCTAAAGGTGAGCCGCTTAAAGCTACCA +GTTATATGGCTGTTGGTTTCTATGTGGCTAAATACGTTAACAAAAAGTCAGATATGGACCTTGCTGCTAA +AGGTCTAGGAGCTAAAGAATGGAACAACTCACTAAAAACCAAGCTGTCGCTACTTCCCAAGAAGCTGTTC +AGAATCAGAATGAGCCGCAACTTCGGGATGAAAATGCTCACAATGACAAATCTGTCCACGGAGTGCTTAA +TCCAACTTACCAAGCTGGGTTACGACGCGACGCCGTTCAACCAGATATTGAAGCAGAACGCAAAAAGAGA +GATGAGATTGAGGCTGGGAAAAGTTACTGTAGCCGACGTTTTGGCGGCGCAACCTGTGACGACAAATCTG +CTCAAATTTATGCGCGCTTCGATAAAAATGATTGGCGTATCCAACCTGCAGAGTTTTATCGCTTCCATGA +CGCAGAAGTTAACACTTTCGGATATTTCTGATGAGTCGAAAAATTATCTTGATAAAGCAGGAATTACTAC +TGCTTGTTTACGAATTAAATCGAAGTGGACTGCTGGCGGAAAATGA +>lcl|NC_001422.1_cds_NP_040705.1_3 [locus_tag=phiX174p03] [db_xref=GeneID:2546405] [protein=head morphogenesis] [protein_id=NP_040705.1] [location=join(5075..5386,1..51)] [gbkey=CDS] +ATGGAACAACTCACTAAAAACCAAGCTGTCGCTACTTCCCAAGAAGCTGTTCAGAATCAGAATGAGCCGC +AACTTCGGGATGAAAATGCTCACAATGACAAATCTGTCCACGGAGTGCTTAATCCAACTTACCAAGCTGG +GTTACGACGCGACGCCGTTCAACCAGATATTGAAGCAGAACGCAAAAAGAGAGATGAGATTGAGGCTGGG +AAAAGTTACTGTAGCCGACGTTTTGGCGGCGCAACCTGTGACGACAAATCTGCTCAAATTTATGCGCGCT +TCGATAAAAATGATTGGCGTATCCAACCTGCAGAGTTTTATCGCTTCCATGACGCAGAAGTTAACACTTT +CGGATATTTCTGA +>lcl|NC_001422.1_cds_NP_040706.1_4 [locus_tag=phiX174p04] [db_xref=GeneID:2546403] [protein=K] [protein_id=NP_040706.1] [location=51..221] [gbkey=CDS] +ATGAGTCGAAAAATTATCTTGATAAAGCAGGAATTACTACTGCTTGTTTACGAATTAAATCGAAGTGGAC +TGCTGGCGGAAAATGAGAAAATTCGACCTATCCTTGCGCAGCTCGAGAAGCTCTTACTTTGCGACCTTTC +GCCATCAACTAACGATTCTGTCAAAAACTGA +>lcl|NC_001422.1_cds_NP_040707.1_5 [locus_tag=phiX174p05] [db_xref=GeneID:2546402] [protein=terminase] [protein_id=NP_040707.1] [location=133..393] [gbkey=CDS] +ATGAGAAAATTCGACCTATCCTTGCGCAGCTCGAGAAGCTCTTACTTTGCGACCTTTCGCCATCAACTAA +CGATTCTGTCAAAAACTGACGCGTTGGATGAGGAGAAGTGGCTTAATATGCTTGGCACGTTCGTCAAGGA +CTGGTTTAGATATGAGTCACATTTTGTTCATGGTAGAGATTCTCTTGTTGACATTTTAAAAGAGCGTGGA +TTACTATCTGAGTCCGATGCTGTTCAACCACTAATAGGTAAGAAATCATGA +>lcl|NC_001422.1_cds_NP_040708.1_6 [locus_tag=phiX174p07] [db_xref=GeneID:2546399] [protein=head morphogenesis] [protein_id=NP_040708.1] [location=390..848] [gbkey=CDS] +ATGAGTCAAGTTACTGAACAATCCGTACGTTTCCAGACCGCTTTGGCCTCTATTAAGCTCATTCAGGCTT +CTGCCGTTTTGGATTTAACCGAAGATGATTTCGATTTTCTGACGAGTAACAAAGTTTGGATTGCTACTGA +CCGCTCTCGTGCTCGTCGCTGCGTTGAGGCTTGCGTTTATGGTACGCTGGACTTTGTGGGATACCCTCGC +TTTCCTGCTCCTGTTGAGTTTATTGCTGCCGTCATTGCTTATTATGTTCATCCCGTCAACATTCAAACGG +CCTGTCTCATCATGGAAGGCGCTGAATTTACGGAAAACATTATTAATGGCGTCGAGCGTCCGGTTAAAGC +CGCTGAATTGTTCGCGTTTACCTTGCGTGTACGCGCAGGAAACACTGACGTTCTTACTGACGCAGAAGAA +AACGTGCGTCAAAAATTACGTGCGGAAGGAGTGATGTAA +>lcl|NC_001422.1_cds_NP_040709.1_7 [locus_tag=phiX174p08] [db_xref=GeneID:2546400] [protein=endolysin] [protein_id=NP_040709.1] [location=568..843] [gbkey=CDS] +ATGGTACGCTGGACTTTGTGGGATACCCTCGCTTTCCTGCTCCTGTTGAGTTTATTGCTGCCGTCATTGC +TTATTATGTTCATCCCGTCAACATTCAAACGGCCTGTCTCATCATGGAAGGCGCTGAATTTACGGAAAAC +ATTATTAATGGCGTCGAGCGTCCGGTTAAAGCCGCTGAATTGTTCGCGTTTACCTTGCGTGTACGCGCAG +GAAACACTGACGTTCTTACTGACGCAGAAGAAAACGTGCGTCAAAAATTACGTGCGGAAGGAGTGA +>lcl|NC_001422.1_cds_NP_040710.1_8 [locus_tag=phiX174p09] [db_xref=GeneID:2546404] [protein=DNA condensation] [protein_id=NP_040710.1] [location=848..964] [gbkey=CDS] +ATGTCTAAAGGTAAAAAACGTTCTGGCGCTCGCCCTGGTCGTCCGCAGCCGTTGCGAGGTACTAAAGGCA +AGCGTAAAGGCGCTCGTCTTTGGTATGTAGGTGGTCAACAATTTTAA +>lcl|NC_001422.1_cds_NP_040711.1_9 [locus_tag=phiX174p06] [db_xref=GeneID:2546408] [protein=major head protein] [protein_id=NP_040711.1] [location=1001..2284] [gbkey=CDS] +ATGTCTAATATTCAAACTGGCGCCGAGCGTATGCCGCATGACCTTTCCCATCTTGGCTTCCTTGCTGGTC +AGATTGGTCGTCTTATTACCATTTCAACTACTCCGGTTATCGCTGGCGACTCCTTCGAGATGGACGCCGT +TGGCGCTCTCCGTCTTTCTCCATTGCGTCGTGGCCTTGCTATTGACTCTACTGTAGACATTTTTACTTTT +TATGTCCCTCATCGTCACGTTTATGGTGAACAGTGGATTAAGTTCATGAAGGATGGTGTTAATGCCACTC +CTCTCCCGACTGTTAACACTACTGGTTATATTGACCATGCCGCTTTTCTTGGCACGATTAACCCTGATAC +CAATAAAATCCCTAAGCATTTGTTTCAGGGTTATTTGAATATCTATAACAACTATTTTAAAGCGCCGTGG +ATGCCTGACCGTACCGAGGCTAACCCTAATGAGCTTAATCAAGATGATGCTCGTTATGGTTTCCGTTGCT +GCCATCTCAAAAACATTTGGACTGCTCCGCTTCCTCCTGAGACTGAGCTTTCTCGCCAAATGACGACTTC +TACCACATCTATTGACATTATGGGTCTGCAAGCTGCTTATGCTAATTTGCATACTGACCAAGAACGTGAT +TACTTCATGCAGCGTTACCATGATGTTATTTCTTCATTTGGAGGTAAAACCTCTTATGACGCTGACAACC +GTCCTTTACTTGTCATGCGCTCTAATCTCTGGGCATCTGGCTATGATGTTGATGGAACTGACCAAACGTC +GTTAGGCCAGTTTTCTGGTCGTGTTCAACAGACCTATAAACATTCTGTGCCGCGTTTCTTTGTTCCTGAG +CATGGCACTATGTTTACTCTTGCGCTTGTTCGTTTTCCGCCTACTGCGACTAAAGAGATTCAGTACCTTA +ACGCTAAAGGTGCTTTGACTTATACCGATATTGCTGGCGACCCTGTTTTGTATGGCAACTTGCCGCCGCG +TGAAATTTCTATGAAGGATGTTTTCCGTTCTGGTGATTCGTCTAAGAAGTTTAAGATTGCTGAGGGTCAG +TGGTATCGTTATGCGCCTTCGTATGTTTCTCCTGCTTATCACCTTCTTGAAGGCTTCCCATTCATTCAGG +AACCGCCTTCTGGTGATTTGCAAGAACGCGTACTTATTCGCCACCATGATTATGACCAGTGTTTCCAGTC +CGTTCAGTTGTTGCAGTGGAATAGTCAGGTTAAATTTAATGTGACCGTTTATCGCAATCTGCCGACCACT +CGCGATTCAATCATGACTTCGTGA +>lcl|NC_001422.1_cds_NP_040712.1_10 [locus_tag=phiX174p06] [db_xref=GeneID:2546408] [protein=major spike protein] [protein_id=NP_040712.1] [location=2395..2922] [gbkey=CDS] +ATGTTTCAGACTTTTATTTCTCGCCATAATTCAAACTTTTTTTCTGATAAGCTGGTTCTCACTTCTGTTA +CTCCAGCTTCTTCGGCACCTGTTTTACAGACACCTAAAGCTACATCGTCAACGTTATATTTTGATAGTTT +GACGGTTAATGCTGGTAATGGTGGTTTTCTTCATTGCATTCAGATGGATACATCTGTCAACGCCGCTAAT +CAGGTTGTTTCTGTTGGTGCTGATATTGCTTTTGATGCCGACCCTAAATTTTTTGCCTGTTTGGTTCGCT +TTGAGTCTTCTTCGGTTCCGACTACCCTCCCGACTGCCTATGATGTTTATCCTTTGAATGGTCGCCATGA +TGGTGGTTATTATACCGTCAAGGACTGTGTGACTATTGACGTCCTTCCCCGTACGCCGGGCAATAACGTT +TATGTTGGTTTCATGGTTTGGTCTAACTTTACCGCTACTAAATGCCGCGGATTGGTTTCGCTGAATCAGG +TTATTAAAGAGATTATTTGTCTCCAGCCACTTAAGTGA +>lcl|NC_001422.1_cds_NP_040713.1_11 [locus_tag=phiX174p06] [db_xref=GeneID:2546408] [protein=pilot protein for DNA ejection] [protein_id=NP_040713.1] [location=2931..3917] [gbkey=CDS] +ATGTTTGGTGCTATTGCTGGCGGTATTGCTTCTGCTCTTGCTGGTGGCGCCATGTCTAAATTGTTTGGAG +GCGGTCAAAAAGCCGCCTCCGGTGGCATTCAAGGTGATGTGCTTGCTACCGATAACAATACTGTAGGCAT +GGGTGATGCTGGTATTAAATCTGCCATTCAAGGCTCTAATGTTCCTAACCCTGATGAGGCCGCCCCTAGT +TTTGTTTCTGGTGCTATGGCTAAAGCTGGTAAAGGACTTCTTGAAGGTACGTTGCAGGCTGGCACTTCTG +CCGTTTCTGATAAGTTGCTTGATTTGGTTGGACTTGGTGGCAAGTCTGCCGCTGATAAAGGAAAGGATAC +TCGTGATTATCTTGCTGCTGCATTTCCTGAGCTTAATGCTTGGGAGCGTGCTGGTGCTGATGCTTCCTCT +GCTGGTATGGTTGACGCCGGATTTGAGAATCAAAAAGAGCTTACTAAAATGCAACTGGACAATCAGAAAG +AGATTGCCGAGATGCAAAATGAGACTCAAAAAGAGATTGCTGGCATTCAGTCGGCGACTTCACGCCAGAA +TACGAAAGACCAGGTATATGCACAAAATGAGATGCTTGCTTATCAACAGAAGGAGTCTACTGCTCGCGTT +GCGTCTATTATGGAAAACACCAATCTTTCCAAGCAACAGCAGGTTTCCGAGATTATGCGCCAAATGCTTA +CTCAAGCTCAAACGGCTGGTCAGTATTTTACCAATGACCAAATCAAAGAAATGACTCGCAAGGTTAGTGC +TGAGGTTGACTTAGTTCATCAGCAAACGCAGAATCAGCGGTATGGCTCTTCTCATATTGGCGCTACTGCA +AAGGATATTTCTAATGTCGTCACTGATGCTGCTTCTGGTGTGGTTGATATTTTTCATGGTATTGATAAAG +CTGTTGCCGATACTTGGAACAATTTCTGGAAAGACGGTAAAGCTGATGGTATTGGCTCTAATTTGTCTAG +GAAATAA +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/transcriptomes.loc Thu Oct 16 20:11:44 2025 +0000 @@ -0,0 +1,1 @@ +phiX174 phiX174 phiX174 ${__HERE__}/phiX174_transcripts.fasta \ No newline at end of file
--- a/tool_data_table_conf.xml.sample Sun Apr 16 08:31:17 2023 +0000 +++ b/tool_data_table_conf.xml.sample Thu Oct 16 20:11:44 2025 +0000 @@ -8,4 +8,8 @@ <columns>value, dbkey, name, path</columns> <file path="tool-data/all_fasta.loc" /> </table> + <table name="transcriptomes" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/transcriptomes.loc" /> + </table> </tables>
--- a/tool_data_table_conf.xml.test Sun Apr 16 08:31:17 2023 +0000 +++ b/tool_data_table_conf.xml.test Thu Oct 16 20:11:44 2025 +0000 @@ -8,4 +8,8 @@ <columns>value, dbkey, name, path</columns> <file path="${__HERE__}/test-data/all_fasta.loc" /> </table> + <table name="transcriptomes" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="${__HERE__}/test-data/transcriptomes.loc" /> + </table> </tables>