Changeset 9:8c4c97fd0555 (2021-10-13)

Previous changeset 8:1a56888ddb7d (2021-10-11) Next changeset 10:c58d1774c762 (2022-02-11)

Commit message:
"planemo upload for repository https://github.com/jj-umn/tools-iuc/tree/arriba/tools/arriba commit bd2c6bea7cb7dc30ca57f9d69ad49460ddf7f14b"

modified:
arriba.xml
arriba_download_reference.xml
arriba_draw_fusions.xml
macros.xml

added:
static/images/draw-fusions-example.png
test-data/arriba_indexes.loc
test-data/test-cache/genome.fasta
test-data/test-cache/genome.gtf
tool-data/arriba_indexes.loc.sample
tool_data_table_conf.xml.sample
tool_data_table_conf.xml.test

diff -r 1a56888ddb7d -r 8c4c97fd0555 arriba.xml
--- a/arriba.xml Mon Oct 11 19:00:45 2021 +0000
+++ b/arriba.xml Wed Oct 13 18:45:16 2021 +0000

[

b'@@ -6,23 +6,38 @@\n <expand macro="requirements" />\n <expand macro="version_command" />\n <command detect_errors="exit_code"><![CDATA[\n+@GENOME_SOURCE@\n #if str($input_params.input_source) == "use_fastq"\n #set $readFilesCommand = \'\'\n- #if $input_params.left_fq.is_of_type("fastq.gz"):\n- #set read1 = \'input_1.fastq.gz\'\n- #set $readFilesCommand = \'--readFilesCommand zcat\'\n- #else:\n- #set read1 = \'input_1.fastq\'\n+ #set $read2 = \'\'\n+ #if str($input_params.singlePaired.sPaired) == \'paired_collection\':\n+ #if $input_params.singlePaired.input.forward.is_of_type(\'fastq.gz\', \'fastqsanger.gz\'):\n+ #set $readFilesCommand = \'--readFilesCommand zcat\'\n+ #set read1 = \'input_1.fastq.gz\'\n+ #set read2 = \'input_2.fastq.gz\'\n+ #else\n+ #set read1 = \'input_1.fastq\'\n+ #set read2 = \'input_2.fastq\'\n+ #end if\n+ ln -sf \'${$input_params.singlePaired.input.forward}\' ${read1} &&\n+ ln -sf \'${$input_params.singlePaired.input.reverse}\' ${read2} &&\n+ #else\n+ #if $input_params.singlePaired.input1.is_of_type(\'fastq.gz\', \'fastqsanger.gz\'):\n+ #set $readFilesCommand = \'--readFilesCommand zcat\'\n+ #set read1 = \'input_1.fastq.gz\'\n+ #else\n+ #set read1 = \'input_1.fastq\'\n+ #end if\n+ ln -sf \'$input_params.singlePaired.input1\' ${read1} &&\n+ #if str($input_params.singlePaired.sPaired) == \'paired\':\n+ #set $read2 = $read1.replace(\'1\',\'2\')\n+ ln -sf \'$input_params.singlePaired.input2\' ${read2} &&\n+ #end if\n #end if\n- ln -f -s \'${input_params.left_fq}\' ${read1} &&\n- #if $input_params.right_fq.is_of_type("fastq.gz"):\n- #set read2 = \'input_2.fastq.gz\'\n- #else:\n- #set read2 = \'input_2.fastq\'\n- #end if\n- ln -f -s \'${input_params.right_fq}\' ${read2} &&\n #if str($input_params.index.index_source) == "history"\n #set $star_index_dir = $input_params.index.star_index.extra_files_path\n+ #else \n+ #set $star_index_dir = $input_params.index.arriba_ref.fields.star_index\n #end if\n STAR \n --runThreadN \\${GALAXY_SLOTS:-1} \n@@ -57,7 +72,7 @@\n #end if\n #end if\n -a \'$genome_assembly\'\n- -g \'$annotation\'\n+ -g \'$genome_annotation\'\n #if $blacklist\n -b \'$blacklist\'\n #else\n@@ -174,29 +189,43 @@\n </param>\n </when>\n <when value="use_fastq">\n- <param name="left_fq"\n- type="data"\n- format="fastqsanger,fastqsanger.gz"\n- argument="--left_fq"\n- label="left.fq file"/>\n- <param name="right_fq"\n- type="data"\n- format="fastqsanger,fastqsanger.gz"\n- argument="--right_fq"\n- label="right.fq file"/>\n+ <conditional name="singlePaired">\n+ <param name="sPaired" type="select" label="Single-end or paired-end reads">\n+ <option value="single" selected="true">Single-end</option>\n+ <option value="paired">Paired-end (as individual datasets)</option>\n+ <option value="paired_collection">Paired-end (as collection)</option>\n+ </param>\n+ <when value="single">\n+ <param format="fastq,fasta,fastq.gz,fastqsanger.gz" name="input1" type="data" label="RNA-Seq FASTQ/FASTA file"/>\n+ </when>\n+ <when value="paired">\n+ <param format="fastq,fasta,fastq.gz,fastqsanger.gz" name="input1" type="data" label="RNA-Seq FASTQ/FASTA file, forward reads"/>\n+ <param format="fastq,fasta,fastq.gz,fastqsanger.gz" name="input2" type="data" label="RNA-Seq FASTQ/FASTA file, reverse reads"/>\n+ </when>\n+ <when value="pair'..b'="genome_source" value="history"/>\n+ <param name="assembly" ftype="fasta" value="genome.fasta"/>\n+ <param name="annotation" ftype="gtf" value="genome.gtf"/>\n+ </conditional>\n <param name="protein_domains" ftype="gff3" value="protein_domains.gff3"/>\n <conditional name="visualization">\n <param name="do_viz" value="no"/>\n@@ -425,13 +457,17 @@\n </output>\n </test>\n \n+\n <test> \n <conditional name="input_params">\n <param name="input_source" value="use_star"/>\n <param name="input" ftype="sam" value="Aligned.out.sam"/>\n </conditional>\n- <param name="genome_assembly" ftype="fasta" value="genome.fasta"/>\n- <param name="annotation" ftype="gtf" value="genome.gtf"/>\n+ <conditional name="genome">\n+ <param name="genome_source" value="history"/>\n+ <param name="assembly" ftype="fasta" value="genome.fasta"/>\n+ <param name="annotation" ftype="gtf" value="genome.gtf"/>\n+ </conditional>\n <param name="protein_domains" ftype="gff3" value="protein_domains.gff3"/>\n <conditional name="visualization">\n <param name="do_viz" value="yes"/>\n@@ -448,6 +484,27 @@\n </assert_contents>\n </output>\n </test>\n+ \n+ <test> \n+ <conditional name="input_params">\n+ <param name="input_source" value="use_star"/>\n+ <param name="input" ftype="sam" value="Aligned.out.sam"/>\n+ </conditional>\n+ <conditional name="genome">\n+ <param name="genome_source" value="cached"/>\n+ <param name="arriba_ref" value="GRCh38+ENSEMBL93"/>\n+ </conditional>\n+ <param name="protein_domains" ftype="gff3" value="protein_domains.gff3"/>\n+ <conditional name="visualization">\n+ <param name="do_viz" value="no"/>\n+ <param name="cytobands" ftype="tabular" value="cytobands.tsv"/>\n+ </conditional>\n+ <output name="fusions_tsv">\n+ <assert_contents>\n+ <has_text_matching expression="BCR\\tABL1"/>\n+ </assert_contents>\n+ </output>\n+ </test>\n \n </tests>\n <help><![CDATA[\n@@ -601,6 +658,13 @@\n NOTE: Arriba was designed for alignments from RNA-Seq data. It should not be run on WGS data directly. Many assumptions made by Arriba about the data (statistical models, blacklist, etc.) only apply to RNA-Seq data and are not valid for DNA-Seq data. For such data, a structural variant calling algorithm should be used and the results should be passed to Arriba.\n \n \n+**OPTIONS**\n+\n+ - Arriba: https://arriba.readthedocs.io/en/latest/command-line-options/#arriba\n+ - Visualization: https://arriba.readthedocs.io/en/latest/command-line-options/#draw_fusionsr\n+ - RNA STAR: https://arriba.readthedocs.io/en/latest/workflow/\n+\n+\n **OUTPUTS**\n \n See: https://arriba.readthedocs.io/en/latest/output-files/\n@@ -662,12 +726,9 @@\n \n A PDF file with one page for each predicted fusion. Each page depicts the fusion partners, their orientation, the retained exons in the fusion transcript, statistics about the number of supporting reads, and if the column fusion_transcript has a value an excerpt of the sequence around the breakpoint.\n \n-\n-**OPTIONS**\n-\n- - Arriba: https://arriba.readthedocs.io/en/latest/command-line-options/#arriba\n- - Visualization: https://arriba.readthedocs.io/en/latest/command-line-options/#draw_fusionsr\n- - RNA STAR: https://arriba.readthedocs.io/en/latest/workflow/\n+.. image:: draw-fusions-example.png\n+ :width: 800\n+ :height: 467\n \n \n .. _Arriba: https://arriba.readthedocs.io/en/latest/\n'

diff -r 1a56888ddb7d -r 8c4c97fd0555 arriba_download_reference.xml
--- a/arriba_download_reference.xml Mon Oct 11 19:00:45 2021 +0000
+++ b/arriba_download_reference.xml Wed Oct 13 18:45:16 2021 +0000

[

@@ -21,17 +21,12 @@
     <inputs>
         <param name="is_test" type="hidden" value="no"/>
         <param name="arriba_reference_name" type="select" label="Select reference">
-
             <option value="GRCh38+ENSEMBL93">GRCh38+ENSEMBL93</option>
             <option value="GRCh38+GENCODE28">GRCh38+GENCODE28</option>
             <option value="GRCh38+RefSeq">GRCh38+RefSeq</option>
             <option value="GRCh38viral+ENSEMBL93">GRCh38viral+ENSEMBL93</option>
             <option value="GRCh38viral+GENCODE28">GRCh38viral+GENCODE28</option>
             <option value="GRCh38viral+RefSeq">GRCh38viral+RefSeq</option>
-            <option value="GRCm38+GENCODEM25">GRCm38+GENCODEM25</option>
-            <option value="GRCm38+RefSeq">GRCm38+RefSeq</option>
-            <option value="GRCm38viral+GENCODEM25">GRCm38viral+GENCODEM25</option>
-            <option value="GRCm38viral+RefSeq">GRCm38viral+RefSeq</option>
             <option value="hg38+ENSEMBL93">hg38+ENSEMBL93</option>
             <option value="hg38+GENCODE28">hg38+GENCODE28</option>
             <option value="hg38+RefSeq">hg38+RefSeq</option>
@@ -56,6 +51,10 @@
             <option value="hs37d5viral+ENSEMBL87">hs37d5viral+ENSEMBL87</option>
             <option value="hs37d5viral+GENCODE19">hs37d5viral+GENCODE19</option>
             <option value="hs37d5viral+RefSeq">hs37d5viral+RefSeq</option>
+            <option value="GRCm38+GENCODEM25">GRCm38+GENCODEM25</option>
+            <option value="GRCm38+RefSeq">GRCm38+RefSeq</option>
+            <option value="GRCm38viral+GENCODEM25">GRCm38viral+GENCODEM25</option>
+            <option value="GRCm38viral+RefSeq">GRCm38viral+RefSeq</option>
             <option value="mm10+GENCODEM25">mm10+GENCODEM25</option>
             <option value="mm10+RefSeq">mm10+RefSeq</option>
             <option value="mm10viral+GENCODEM25">mm10viral+GENCODEM25</option>
@@ -78,7 +77,6 @@
                     <has_text text="GRCh38+ENSEMBL93"/>
                 </assert_contents>
             </output>
-
         </test>
     </tests>
     <help><![CDATA[
@@ -87,7 +85,7 @@
Arriba_ is a fast tool to search for aberrant transcripts such as gene fusions.
It is based on chimeric alignments found by the STAR RNA-Seq aligner.

-**Arriba Reference** downloads a genome sequence fasta and its related annoation GTF, and then build a STAR index for the RNA STAR aligner.
+**Arriba Reference** downloads a genome sequence fasta and its related annotation GTF, and then build a STAR index for the RNA STAR aligner.

These datasets will be added to your Galaxy history:

@@ -101,7 +99,7 @@
   - https://arriba.readthedocs.io/en/latest/input-files/

-**NOTE:** This is a resource intensive process, so the results should be copies to new histories as needed rather than running this in each workflow.
+**NOTE:** This is a resource intensive process, so the results should be copied to new histories as needed rather than running this in each workflow.

.. _Arriba: https://arriba.readthedocs.io/en/latest/

diff -r 1a56888ddb7d -r 8c4c97fd0555 arriba_draw_fusions.xml
--- a/arriba_draw_fusions.xml Mon Oct 11 19:00:45 2021 +0000
+++ b/arriba_draw_fusions.xml Wed Oct 13 18:45:16 2021 +0000

[

b'@@ -7,7 +7,7 @@\n <expand macro="version_command" />\n <command detect_errors="exit_code"><![CDATA[\n #if $alignments.extension == \'sam\'\n- ln -sf \'$assembly\' input.fa &&\n+ ln -sf \'$genome.assembly\' input.fa &&\n samtools faidx input.fa &&\n samtools view -b -@ \\${GALAXY_SLOTS:-1} -t input.fa.fai \'$alignments\' | \n samtools sort -O bam -@ \\${GALAXY_SLOTS:-1} -T "\\${TMPDIR:-.}" -o Aligned.sortedByCoord.out.bam &&\n@@ -21,8 +21,7 @@\n <inputs>\n <param argument="--fusions" type="data" format="tabular" label="Arriba fusions.tsv"/>\n <param argument="--alignments" type="data" format="sam,bam" label="STAR Aligned.out.bam"/>\n- <param name="assembly" type="data" format="fasta" optional="true" label="Genome assembly fasta (required when alignments are not sorted bam format)"/>\n- <param argument="--annotation" type="data" format="gtf" label="GTF file with gene annotation"/>\n+ <expand macro="genome_source" assembly_optional="true"/>\n <param name="protein_domains" argument="-p" type="data" format="gff3" optional="true" label="File containing protein domains"/>\n <section name="visualization" expanded="true" title="Visualization Options">\n <expand macro="visualization_options" />\n@@ -38,8 +37,11 @@\n <test> \n <param name="fusions" ftype="tabular" value="fusions.tsv"/>\n <param name="alignments" ftype="sam" value="Aligned.out.sam"/>\n- <param name="assembly" ftype="fasta" value="genome.fasta"/>\n- <param name="annotation" ftype="gtf" value="genome.gtf"/>\n+ <conditional name="genome">\n+ <param name="genome_source" value="history"/>\n+ <param name="assembly" ftype="fasta" value="genome.fasta"/>\n+ <param name="annotation" ftype="gtf" value="genome.gtf"/>\n+ </conditional>\n <param name="protein_domains" ftype="gff3" value="protein_domains.gff3"/>\n <section name="visualization">\n <param name="cytobands" ftype="tabular" value="cytobands.tsv"/>\n@@ -52,230 +54,66 @@\n </test>\n </tests>\n <help><![CDATA[\n-**Arriba**\n-\n+**Arriba Draw Fusions**\n \n-Arriba_ is a fast tool to search for aberrant transcripts such as gene fusions.\n-It is based on chimeric alignments found by the STAR RNA-Seq aligner.\n+Arriba_Draw_Fusions_ (draw_fusions.R) renders publication-quality visualizations of the transcripts involved in predicted fusions. It generates a PDF file with one page for each predicted fusion. Each page depicts the fusion partners, their orientation, the retained exons in the fusion transcript, statistics about the number of supporting reads, and - if the column fusion_transcript has a value - an excerpt of the sequence around the breakpoint.\n \n \n **INPUTS**\n \n-See: https://arriba.readthedocs.io/en/latest/input-files/\n+See: https://arriba.readthedocs.io/en/latest/command-line-options/#draw_fusionsr\n+\n+ - Fusions \n+\n+ File containing fusion predictions from Arriba_ (fusions.tsv) or STAR-Fusion (star-fusion.fusion_predictions.tsv or star-fusion.fusion_predictions.abridged.coding_effect.tsv).\n+\n+ - Annotation\n+\n+ Gene annotation in GTF format that was used by the STAR aligner. \n \n - Alignments\n \n- Arriba takes the main output file of STAR (Aligned.out.bam) as input (parameter -x). If STAR was run with the parameter --chimOutType WithinBAM, then this file contains all the information needed by Arriba to find fusions. When STAR was run with the parameter --chimOutType SeparateSAMold, the main output file lacks chimeric alignments. Instead, STAR writes them to a separate output file named Chimeric.out.sam. In this case, the file needs to be passed to Arriba via the parameter -c in addition to the main output file Aligned.out.bam.\n-\n- Arriba extracts three types of reads from the alignment file(s):\n-\n- * Split-reads, i.e., reads composed of segments which map in a no'..b'equence rather than the supporting reads is wrapped in parentheses (( and )). In addition, when -I is used, the sequence is trimmed to the boundaries of the fused transcripts. The coordinate of the fusion breakpoint relative to the start of the transcript can thus easily be inferred by counting the bases from the beginning of the fusion transcript to the breakpoint character (|). In case the full sequence could be constructed from the combined information of supporting reads and assembly sequence, the start of the fusion transcript is marked by a caret sign (^) and the end by a dollar sign ($). If the full sequence could not be constructed, these signs are missing.\n-\n- * peptide_sequence : This column contains the fusion peptide sequence. The sequence is translated from the fusion transcript given in the column fusion_transcript and determines the reading frame of the fused genes according to the transcript isoforms given in the columns transcript_id1 and transcript_id2. Translation starts at the start of the assembled fusion transcript or when the start codon is encountered in the 5\' gene. Translation ends when either the end of the assembled fusion transcript is reached or when a stop codon is encountered. If the fusion transcript contains an ellipsis (...), the sequence beyond the ellipsis is trimmed before translation, because the reading frame cannot be determined reliably. The column contains a dot (.), when the transcript sequence could not be predicted or when the precise breakpoints are unknown due to lack of split reads or when the fusion transcript does not overlap any coding exons in the 5\' gene or when no start codon could be found in the 5\' gene or when there is a stop codon prior to the fusion junction (in which case the column reading_frame contains the value stop-codon). The breakpoint is represented as a pipe symbol (|). If a codon spans the breakpoint, the amino acid is placed on the side of the breakpoint where two of the three bases reside. Codons resulting from non-template bases are flanked by two pipes. Amino acids are written as lowercase characters in the following situations: non-silent SNVs/SNPs, insertions, frameshifts, codons spanning the breakpoint, non-coding regions (introns/intergenic regions/UTRs), and non-template bases. Codons which cannot be translated to amino acids, such as those having invalid characters, are represented as ?.\n-\n- * read_identifiers : This column contains the names of the supporting reads separated by commas.\n-\n- - fusions.discarded.tsv\n-\n- The file fusions.discarded.tsv (as specified by the parameter -O) contains all events that Arriba classified as an artifact or that are also observed in healthy tissue. It has the same format as the file fusions.tsv. \n-\n-\n-**VISUALIZATION**\n-\n See: https://arriba.readthedocs.io/en/latest/visualization/\n \n - fusions.pdf\n \n A PDF file with one page for each predicted fusion. Each page depicts the fusion partners, their orientation, the retained exons in the fusion transcript, statistics about the number of supporting reads, and if the column fusion_transcript has a value an excerpt of the sequence around the breakpoint.\n \n-\n-**OPTIONS**\n-\n- - Arriba: https://arriba.readthedocs.io/en/latest/command-line-options/#arriba\n- - Visualization: https://arriba.readthedocs.io/en/latest/command-line-options/#draw_fusionsr\n- - RNA STAR: https://arriba.readthedocs.io/en/latest/workflow/\n+.. image:: draw-fusions-example.png\n+ :width: 800\n+ :height: 467\n \n \n+\n+\n+.. _Arriba_Draw_Fusions: https://arriba.readthedocs.io/en/latest/visualization/\n .. _Arriba: https://arriba.readthedocs.io/en/latest/\n-.. _INPUTS: https://arriba.readthedocs.io/en/latest/input-files/\n-.. _OUTPUTS: https://arriba.readthedocs.io/en/latest/output-files/\n-.. _VISUALIZATION: https://arriba.readthedocs.io/en/latest/visualization/\n-.. _OPTIONS: https://arriba.readthedocs.io/en/latest/command-line-options/\n \n ]]></help>\n <expand macro="citations" />\n'

diff -r 1a56888ddb7d -r 8c4c97fd0555 macros.xml
--- a/macros.xml Mon Oct 11 19:00:45 2021 +0000
+++ b/macros.xml Wed Oct 13 18:45:16 2021 +0000

@@ -1,6 +1,6 @@
<macros>
     <token name="@TOOL_VERSION@">2.1.0</token>
-    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@VERSION_SUFFIX@">1</token>
     <xml name="requirements">
         <requirements>
         <requirement type="package" version="@TOOL_VERSION@">arriba</requirement>
@@ -16,6 +16,36 @@
     <xml name="version_command">
         <version_command>arriba -h | grep Version | sed 's/^.* //'</version_command>
     </xml>
+    <xml name="genome_source" token_assembly_optional="false" >
+        <conditional name="genome">
+            <param name="genome_source" type="select" label="Arriba Genome assembly and annotation source">
+                <option value="history">From your history</option>
+                <option value="cached">Use built-in Arriba</option>
+            </param>
+            <when value="history">
+                <param name="assembly" argument="-a" type="data" format="fasta" optional="@ASSEMBLY_OPTIONAL@" label="Genome assembly fasta"/>
+                <param name="annotation" argument="-g" type="data" format="gtf" label="Gene annotation in GTF format"/>
+            </when>
+            <when value="cached">
+                <param name="arriba_ref" type="select" label="Arriba Genome assembly and annotation">
+                    <options from_data_table="arriba_indexes">
+                    </options>
+                </param>
+            </when>
+        </conditional>
+    </xml>
+    <token name="@GENOME_SOURCE@">
+#if str($genome.genome_source) == "history"
+    #if $genome.assembly
+        #set $genome_assembly = $genome.assembly
+    #end if
+    #set $genome_annotation = $genome.annotation
+#else
+    #set $genome_assembly = $genome.arriba_ref.fields.fasta
+    #set $genome_annotation = $genome.arriba_ref.fields.gtf
+#end if
+</token>
+
     <xml name="visualization_options">
                 <param name="cytobands" argument="--cytobands" type="data" format="tabular" optional="true" label="Cytobands"/>
                 <section name="options" expanded="false" title="Draw Fusion Options">
@@ -127,7 +157,7 @@
draw_fusions.R
     --fusions='$fusions'
     --alignments='Aligned.sortedByCoord.out.bam'
-    --annotation='$annotation'
+    --annotation='$genome.annotation'
     --output=fusions.pdf
     #if $visualization.cytobands
     --cytobands='$visualization.cytobands'

diff -r 1a56888ddb7d -r 8c4c97fd0555 static/images/draw-fusions-example.png

Binary file static/images/draw-fusions-example.png has changed

diff -r 1a56888ddb7d -r 8c4c97fd0555 test-data/arriba_indexes.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/arriba_indexes.loc Wed Oct 13 18:45:16 2021 +0000

@@ -0,0 +1,18 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of Ariba data files.
+#The Arriba script download_references.sh retrieves a genome assembly fasta
+#and a related GTF annotation file, then builds a STAR index.
+#You will need to create these data files and then create a
+#arriba_indexes.loc similar to this one (store it in this
+#directory) that points to the directories in which those files are stored.
+#The arriba_indexes.loc file has this format (longer white space
+#characters are TAB characters):
+#
+#<unique_build_id> <display_name> <genome_fasta_path> <genome_gtf_path> <STAR_index_path>
+#
+#Note that STAR indices can become quite large.
+#
+#<unique_build_id> <display_name> <genome_fasta_path> <genome_gtf_path> <STAR_index_path>
+#GRCh38+ENSEMBL93 GRCh38+ENSEMBL93 /depot/GRCh38+ENSEMBL93/genome.fa /depot/GRCh38+ENSEMBL93/genome.gtf /depot/GRCh38+ENSEMBL93/STAR_index/
+GRCh38+ENSEMBL93 GRCh38+ENSEMBL93 ${__HERE__}/test-cache/genome.fasta ${__HERE__}/test-cache/genome.gtf ${__HERE__}/test-cache/STAR_index/
+

diff -r 1a56888ddb7d -r 8c4c97fd0555 test-data/test-cache/genome.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-cache/genome.fasta Wed Oct 13 18:45:16 2021 +0000

b'@@ -0,0 +1,4 @@\n+>22\n+TAAAGAGACTTAGCACATTTATTCACTCACAGAGGTGAATGAAGGGCTCAGGGTTTGAACTCGATGACACTGATGGCGATCCGGGCTGCCCGCTGTAAGGCTTCGGCCACTTGAGGCTTTTCGTAAGTCTCCACCTCCATGGCACGGAAAGTGGGCACGTGCGTCTGCAGGGCCTTGCGGCCCTCGGGGGCCTCTGCCAGCATGGTAAGGGCCTTGGTGGCATTCAGGCGCGCTATGGTCATGGGGGAGTGCAGCAGCTCCAGGAGCAGGCCGATGGCTTGTGCCTCCAGGGCCGCATACTTCCCTGCAGGCCACCAACACAAGGCGTTCCAAACAGCCCAAGGGGCCCTCTTCTCACCCCCTCTCACCCTAGCCCTCTCCTGACCCTCCTCCTTGTGCAGTCTCAAGGGGTTTATGCCTGGTTTATGCCCCACCAACATGCCCTCACCCTCAGGGCTCTTGCACCTTCGCTGAGTCCGGCCTGGGCCTGGCCTCTTCATCTCCTGCCTTTCCGGCCTCAACTGGCTCCTCTCCATCCACCCTCGTATCCTGCCCAGGAGTTCAAGACCAGCCTGGGCAACATAATGAGATTCCATCTCTACAAAAATTTAAAAATTATCCAGATGTAGTGACAGGTGCCTACAGTCCCAGCTACTCAGGAGACTGAGCGGGGAGGATCACTAGAGCCTGGGAGTTTGAGGCTGCAGTGATCGTGACATCGTACTCTAGCCTGGGTGACACACTGAGACACTGGCTCTTTAAAAAAAGTAAGCCCAGCTGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCTGGTGGATCACGAGGTCAGGAGATTGTGACCATCCTGGCTAACACAGTGAAACCCCGTCTCTATTAAAAATACAAAAAATTAGCCGGGCATGGTGGCGGGCGCCTGTAGTCCCAGCTACTCAGGAGGCTGAGGCAGGAGAATGCGTGAACCTGGGAGGCGGAGCTTGCAGTGAGCCAAGATGGTGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCATCTCAAAAAAAAAAAAAAAGTAAACCCAATAGTTCATATATGTTGCCAATATTTCTTGAGCACCTACTCTGCACAAAACACACATGGTTCCTGAGAGATGCCCTCCTCAACCATTCGCTGCCCCTTTCTGCTGTCCTTGCCGGCCCTATGCTCCTGCAGTGCCCAGTGGACTCTCCCCTTTGCTGGTGGAACCTCTCAGCCTGGGCCCTGCCAACTCCCATTGTCCTTGGGATTGATCCCAAGCCCTTCCCAGAGGCGACTTGTCCCCTGTGTTTGCCCCACCCATGCTGGGCTTCTCCTCCACATCCCTCATCACTGCCTCCCAGGGCCTGGCACGTGGCAGGGGCTTCAGGACCCTCAGCTGGAGTGCCAGCTTGGGAATAAGTGGCCTCATCTCCCTGGGTCTCAGCCTCTTGTGTCTCGAGCCCCGTGGCCCTCCAAGCACTCCTACAGAAAGCCTGGACCGAGAAGGACACTGGGCTGAGGCTGCCCCAAGATAAACACGCAGGGGGAGCCCCTGCCACCGTGCAGTCAGCATCCTGACTACACCAGTAAAAGGAGGGGCTGTGGACTCCAAGAAAGCCCCAACCTCTCCCATCTGCATACCTCAAGCAGCATTAGGGTCAGGTGTGCAGGGCCTGGGAGGACCTGTCTTTGGGAGGCTGATTGCAAATCCAAGCCCAGATGGTCCTCCATGGAACTCAAAGGAAGCACAGGGCCCCAGGCTGGTGGGTGTGTAGGGGCCAGAGGGAAAGTCAAATGCTTTCAGGGGTCCTGGGAGTGCCACGCCCCAGTCTGCCAGGTGTGGAAACAGCCCAGGGTTCTTCCATGAGCCCAGGGTTGGGTGACTTAGAGGGGAAGCCTGACTGGCTGGGAGTCCCAAGGTAGGGGGCTCGGGGGAGATAATTCTGAATTTGCCCCCTCCTCCAGTTTAAGAAAAATAATTAGGATGAATAAACACCATAGAGTCAAGGGCTCCAGGGTTTGCTTAGACCTCAGGGTAACAGAATCATGGATGGCAAACTAATCAACCCTTGAGTCACAGCTCACAAGGGGCCTGGGTGGGAGGCGGGAATTCCAGGCCCAGGGAACAGACTGGTGCGAAGGCACAGAGGTCCTAATGTGAGTGACTGGTGAGGGGCCACTGTGCTGGGGAGAAGAGGGAGGGCTGGGCCTTGGACCCAACCTTGAAAGGACATCAGTGATAGGCCAAGGGGAGGTTTTTTTTTTTTTTGCAAAGTGTGGAGTTTGGGGGACGATACCCAGCCCCTGAGTACAGAAAGCTCATCACGGCTGCTAGCCTGCTAGGGTCTCCCTCCCTGCGGCACCCCCCACCGCCTCACCTTCAGTGATCACTGTGGCGAACATCAGGGCACCGGCAGCGTTAGACTTCACATGCTCCACTGGGTCTTTCAGCAGATGGACCAGGATGGGGATGACGTCAAAATGACACACCTGTTTCTTGCCCTCTCGAGATATGCTGGGGCAGAGAGGGAACAGAGAGGGGCTCTGCTTGGAAGGGAGAAGAGGCGCAAGGCCCAGGAGTGCCCCAGGGAGACACAAAAGAAATAATTGTGTGGGGGCTACCCCAGGTAGTCCCAGGACTGGTCCCCATGATCCAGGACTGGTCATGGCAGGGATGCTGGGCTGCAGAGGAGCATTGGTGTGAACGCGGCCCACTGCCAGTGAAGACCACGGGGTGACAGGGCCTCACTCTGAGCCAGCACACAGAAAAGAAATAGGGCATGTTCTCCGTGGCCCCAAAATGGGGAACAGCAGGACCCAGGGGTGTCAACCACAGGGAGACCTGTTTTGACTTTGCCTAAGAGCCAGAGCTGACAGTGATGGGCCAAGCTGCCCCCAACAGCAGCAGCTGCCGGGCACTGGGCAACTGCAGCTAAGGACTCAGACCAGGTAACTTTTAGGCCACTTACTCTGGCACTGAATGAAGTTGTGGAAATGAGGATGATGAACTCCCCAAAAGTTTCCTGAGAGAGGGTCAATTCTGTCCCCTAAAAAGAAAGGGCTCCCTGGGCCAGGAAACACTGTTCACAAGCCCCACAGGAATGCCACAGTGCAAAGACCCCTGTGTCACTTAGTTTAATCAACCTTTCCCAAATGTAGCCACCAAAGAACCCTCATTTTTGTAGAGCTCTTAATGACCTCAAAGACACAGGTGTTCCCTGGGAACACAGATTGGGAAATGAGGTTGGCCAGCCACTAGCTGTGACACTGGGAGCCTTTCAGGGCAGTGGGCCTCAACCCCGAAGCCCATGATCAAAGTTGCAGAGTCCTGGGCCCCACATGCAGGGATTCCCTGATTCGTTCACACAGCAAGACACTGAGACAAGAGCGATGCAGGGATTCCCCGATTTGTTCACAAGAGTGGGGACAAGAGCAGGGTTTCTCCAGCTCAGTGTTCTTTAGGGCCAGATAATTCTCAACTGGGAGCCAAAGCTCCCCCGCTGCAAACCCTGGGTTTAGGGCAGCAAACAGAGGAAAGCCCTGTCCTCAGGAGCTCCCCTCGGGGATGACACACAAAGTAACGGCAGGGATGGTGGTCACAGTGGACATGGGAGGACAGGGGACAGCAGGGAGGTTACAGTTTCAGAGAGAGAGAGCTCTCTCCTGGAAAGCCACTCTCATGGTCCCACAGAAAGGGGACCTCTGAGCAGACCTAAGAATGATGGGGCTGGGACAGGGCCCAGGAATCTGCATTTTCACAAACCTCCCGGTGGCTTGGGTCCGGGGCCCCACTTGCAGAAATACTGGCCTGAGCCTCTAGTGAGACTCTCAGAAGAGAAGCCAGGACTTAGGGAAGGCAAGAGGGTCTCCACTAAAGAGACATAGCCACAGCTAAGGCAGAGACTCTGGGGTTCGGTGACCAGCCTGTCCTCGTGAAAAGGGGCCCAGTGTGAGAACACAC'..b'GCAGGGCCTTGGCTCCCCTCTGCCTTCTCTTGCACACTGGCTCAGTGCTTGCAGCTGCTTGCCCAGGAAATCCAGGGCCTCGGGGATCCCAGGGGCCCAGTGGAATCCTGTGGGGTTAGAAGCAGCCACTTGGGGCTGGGCATGGTGGCTCATGCCTGCAATCCCAGCACTTTGGGAGGCCAAGGCGGGTGGATCACTGGAGGTCAGGAGTTTGAGACCAGCCTGACCAACATGGTGAAACCCTGTCTCTACAAAAAATACAAAAATTAGGCAAGGCGTGGTGGCTCACACCTGTAATCCCAGCACTTTGGGAGGCTGAGGCGGGCAGATCACCTGAGGTCAGGAGTTTGAGACCAGCCTGGCCAACGTGGTGAAACCCTGTCTCTACTAAAAATACAAAAAAATTAGCGGAGCATGGTGGCAGGCACCTGTAATCCCAGATACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCTGAGGCCACAGTAAGCTGAGATCGTACCACTGCACTCCAGCCTGGACAACAGAGCGAGACTCCGTCTCAATAAATAAATAAATGCATCCATCCATCCATACATACATACAAAAATTAGCTGGGCATGGTGGTGTGGGCCTGTGGTTTACCCCAGTGATTGCCCACATCCAGGCCCACAGGCGGCATCTGTAAACGTGTGCTGTCAGTGAATTGAGCCAGCATGTTCAAGCTGGCAAAAATCACAGGGGCCTTTCTCCCACACTGTTTTGTAAATGAGTAAACTAAGACTCAGAGAGGACTCACCTGAGGCCTTCTTTTTTAACAATAATAGTAAAACCATCAACAAAAATGACATTTGTCAAGTGCCACCCAGGAGGATGTTCTAGAGTCTGCCCATGGGCCCGTTGGGATGCTATGGGTCCTCACCCACCTCCCAGCCCTTCTGCTCCCCATCTTTCCCTGGTGGATGGTGGGCCATTGGAATCCTGGGAGGCCTGAGCTGGGGAGAGGCTCAGGGGCTGGGAGGTGCCCTGGGGCCTGCAGGGCTGCTGGCTCACAGTGAGGCTGTTTGTGCCCCACCACAGGGTCGCTGGACACCCATCAAGCCCCAGCCCAGGCCCTGAACGAGACTCAGTGGGCACTAGAACGCCTGAGGCTGCAGCTGGGCTCCCCGGGGTCCTTGCAGAGGAAACTCAGTCTGCTGGAGCAGGAATCCCAGCAGCAGGAGCTGCAGATCCAGGGCTTCGAGAGTGACCTCGCCGAGATCCGCGCCGACAAACAGAACCTGGAGGCCATTCTGCACAGCCTGCCCGAGAACTGTGCCAGCTGGCAGTGAGGGCTGCCCAGATCCCCGGCACACACTCCCCCACCTGCTGTTTACATGACCCAGGGGGTGCACACTACCCCACAGGTGTGCCCATACAGACATTCCCCGGAGCCGGCTGCTGTGAACTCGCCCCCGTGTGGATAGTCACTCCCTGCCGATTCTGTCTGTGGCTTCTTCCCTGCCAGCAGGACTGAGTGTGCGTACCCAGTTCACCTGGACATGAGTGCACACTCTCACCCCTGCACATGCATAAACGGGCACACCCCAGTGTCAATAACATACACACGTGAGGGTGCATGTCTGTGTGTATGACCCACACGTGTTCAAGTCTAATCCATCCAGTCAGCAGCTTACGGTCCACACACATTACAGTCCACAGCTGTTGTGAGAGCCACCTGTGTGCTGGACACCCTCTGGATGTTGGGCAAGTTGTTACATGAGATGCCCTGGGGTGCTACATCCACTCACTCCAGATAGCAGGGAGGTCTCAGCAGATCTGCAGAGATCAAGGGGGTCAGCAACAGCCAAAGCCCCTAGTCCCAGAGCTGGCTGCCCTCTGTTTCACAGCAGCTCCCTGACCTGTGTTGCTGCGTGCACTCCCTACAGCTCGACACAGCCAGGGGACCAACAGGCCAAGAAATGCAAGATCCCGGGAGGGTTCTTAGCAGCAGAATCTGAGGCCCAGAGACCCTGAGGCCGTGGCCAGGCCTGCTAGTCTGGCTAGAGCAAGGCCCATTCCTGGCGGGGGTGTCATTGCCTTCACCGGACGCTTCCCTCTCAGGGTCCTGGGACTGCACCAGATGCCCTGAGGGAATGGCCCACCCTGGCCTGTACCCACTTCAGCCTGTGATCTATCCAAAGAGCCAGGCCCAAAAGCGCCTAGGTCAGGGTGCTCAGGCTACCAGGAGCACGCCTCTGTGCCCCCGGCAACCCAGTTGACCTTTAATTGACGCTTTCCAGACCAGCCCTGCGGCACCACTTGCCATGCGGGAGGCCACCAGGGTGTGCAAGCCTGGCTGCCATTCCAGTCTGTCCTGTCTGGGAATCGCCCTGTGGCCAGGCCCGCATGCTGGCCTCTGCCCAGGACTCCTCAGCATTTCCTCTTGGCATCCCTCCCCTCTCCCAGACCCTCTTCCAGCAGATGGCAAGGCCTCGGCATTGGGAAGTCAGGCACCTCTGCGGGCCCAGCCCCCTCCCGTGGCTCCCCTGACAGGGGCAGGGGTAGGGCAGCAGCACAGACCAATTCCGTTGAACGTGGAAATAAAGGACCCTTTCACTGGGCAGGGTGGTGTGCCTCACCCTCCCCGGCTGGTGGGCAGCCAGGGCCCTGGCTGTGGGTGTGCATATGACACACCTAGTAGGTGGCCAGCATGTGGACCGGACGTTGGTAGGAAGGTGGCAAAAGCCGAGCTCGTGGCTGGGCCAGTACCTCCCATTAGAGGGCTTTGCTGGGGTTGTGTGATCACAGGTACCTACCCTGTCCTCTCAGGCACTTACCACGTAAAGCCTAGGAGCTGGTGAGTTGGAGGGGTGGGGTGCGGAGAGGCCCTCAGCTGACCTCTGGTTCAGGCTCGAGACGAACTCACAGCCAAGTGTCCGAGGATGGTGAGGAGCAGGGAGGGGCGCCATCCAGGAGGGGGATGGTGTGGGTGGGGCCTTGAAGGGTGGGGAGGCAGAGAAGGAAGCATTCCAGGCAAGAGGGTGGACAACAGTCCGGGGCCCGCAGGGTTGGGGCTCGGCCAGCTTGCATCACTCCAGGACCCCAGGTTGAATGGGGTGGGATGTTGGAGCTGCTCAGTCAGGGCTCTTGGCCGCAGGCCTCAAACCCCTCCTGAGGTGGTTTCAGCAGAAAAGGGGTGTTGGGAGGGTCGCTTGGAACCCTGGAGTAAAAACGGCTGCCACGTGTTGGAGATAGCCTAGGGAGGGGAGCCTGAGGCTTCCGGGATAGGTTGGCTTCCCTCTTCCCCCCTCCCGCCTCTCTTCTTGGTCTGTGTCTCTGCTCTCCTCTCCTGTATCTGCTTCGTTCTTTTCTCTTTATTTATTGATTTTTTTTGAGATGGAGTCTCGCTCTGTTGCCCAGGCTGGAGTGCAGTGGCATGATCTCTGCCCACTGCATCCTCCACCTCCCAGGTTTAAGCAATTCTCCTGCCTCAGCCTCCCGAGTAGCTAGGACTACAGGTGCTTGCCACCATGCCCAGCTAATTTTGGTATTTTTAGTAGAGACAGGGTTTCACCACGTTGGTCAGGCTGGTCTCAAACTCCTGACCTCAGATGATATACCTGCATTGGCCTCTTAAAGTTCTGGGATTTCAGGCATGAGCCACTGCAACCGGCCCATTCTTTTCTCTTTGCAGAGTGGCTTTCTTTGTTTTTCTTGTGCCTGATAGGAGAGGACACCCACCCCTACCGCCATCCCCCATAATGGCCCCAGGTGTACATGTCATCAGGTCCAGTGCTTGCAAGAGACAAGCTGGTGACTCTGTCCTGATTCCAGCTTCTCAGCTTAGGTGAAGTCCCACCAACCCCCGTTCAGGATAATGAGGATCTCTGGATCTAAGGCCAATAATGGATGACCGGTGCCACCCCCCAACCTAATGGGAGATGGTGTTCAGAGAAGAGGTGTGCTCCTCCACAGAAAACTGTAAAATCAAGGCTACGGTGGGGGATTGACATGATTAAACTGAGCTAGGAGTGA\n'

diff -r 1a56888ddb7d -r 8c4c97fd0555 test-data/test-cache/genome.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-cache/genome.gtf Wed Oct 13 18:45:16 2021 +0000

b'@@ -0,0 +1,860 @@\n+#!genome-build GRCh38.p12\n+#!genome-version GRCh38\n+#!genome-date 2013-12\n+#!genome-build-accession NCBI:GCA_000001405.27\n+#!genebuild-last-updated 2018-01\n+9\tensembl_havana\tgene\t1\t59388\t.\t+\t.\tgene_id "ENSG00000107164"; gene_version "15"; gene_name "FUBP3"; gene_source "ensembl_havana"; gene_biotype "protein_coding";\n+9\thavana\ttranscript\t1\t3122\t.\t+\t.\tgene_id "ENSG00000107164"; gene_version "15"; transcript_id "ENST00000467100"; transcript_version "1"; gene_name "FUBP3"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "FUBP3-203"; transcript_source "havana"; transcript_biotype "processed_transcript"; transcript_support_level "2";\n+9\thavana\texon\t1\t800\t.\t+\t.\tgene_id "ENSG00000107164"; gene_version "15"; transcript_id "ENST00000467100"; transcript_version "1"; exon_number "1"; gene_name "FUBP3"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "FUBP3-203"; transcript_source "havana"; transcript_biotype "processed_transcript"; exon_id "ENSE00001817525"; exon_version "1"; transcript_support_level "2";\n+9\thavana\texon\t1695\t3122\t.\t+\t.\tgene_id "ENSG00000107164"; gene_version "15"; transcript_id "ENST00000467100"; transcript_version "1"; exon_number "2"; gene_name "FUBP3"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "FUBP3-203"; transcript_source "havana"; transcript_biotype "processed_transcript"; exon_id "ENSE00001916817"; exon_version "1"; transcript_support_level "2";\n+9\tensembl_havana\ttranscript\t642\t59388\t.\t+\t.\tgene_id "ENSG00000107164"; gene_version "15"; transcript_id "ENST00000319725"; transcript_version "9"; gene_name "FUBP3"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "FUBP3-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS43893"; tag "basic"; transcript_support_level "1";\n+9\tensembl_havana\texon\t642\t800\t.\t+\t.\tgene_id "ENSG00000107164"; gene_version "15"; transcript_id "ENST00000319725"; transcript_version "9"; exon_number "1"; gene_name "FUBP3"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "FUBP3-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS43893"; exon_id "ENSE00001809698"; exon_version "1"; tag "basic"; transcript_support_level "1";\n+9\tensembl_havana\tCDS\t717\t800\t.\t+\t0\tgene_id "ENSG00000107164"; gene_version "15"; transcript_id "ENST00000319725"; transcript_version "9"; exon_number "1"; gene_name "FUBP3"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "FUBP3-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS43893"; protein_id "ENSP00000318177"; protein_version "9"; tag "basic"; transcript_support_level "1";\n+9\tensembl_havana\tstart_codon\t717\t719\t.\t+\t0\tgene_id "ENSG00000107164"; gene_version "15"; transcript_id "ENST00000319725"; transcript_version "9"; exon_number "1"; gene_name "FUBP3"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "FUBP3-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS43893"; tag "basic"; transcript_support_level "1";\n+9\tensembl_havana\texon\t16519\t16624\t.\t+\t.\tgene_id "ENSG00000107164"; gene_version "15"; transcript_id "ENST00000319725"; transcript_version "9"; exon_number "2"; gene_name "FUBP3"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "FUBP3-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS43893"; exon_id "ENSE00003666938"; exon_version "1"; tag "basic"; transcript_support_level "1";\n+9\tensembl_havana\tCDS\t16519\t16624\t.\t+\t0\tgene_id "ENSG00000107164"; gene_version "15"; transcript_id "ENST00000319725"; transcript_version "9"; exon_number "2"; gene_name "FUBP3"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "FUBP3-201"; transcript_source "e'..b'NA";\n+22\tensembl\ttranscript\t202368\t202657\t.\t-\t.\tgene_id "ENSG00000240160"; gene_version "3"; transcript_id "ENST00000467969"; transcript_version "3"; gene_name "RN7SL263P"; gene_source "ensembl"; gene_biotype "misc_RNA"; transcript_name "RN7SL263P-201"; transcript_source "ensembl"; transcript_biotype "misc_RNA"; tag "basic"; transcript_support_level "NA";\n+22\tensembl\texon\t202368\t202657\t.\t-\t.\tgene_id "ENSG00000240160"; gene_version "3"; transcript_id "ENST00000467969"; transcript_version "3"; exon_number "1"; gene_name "RN7SL263P"; gene_source "ensembl"; gene_biotype "misc_RNA"; transcript_name "RN7SL263P-201"; transcript_source "ensembl"; transcript_biotype "misc_RNA"; exon_id "ENSE00001875334"; exon_version "3"; tag "basic"; transcript_support_level "NA";\n+22\thavana\tgene\t203353\t205591\t.\t+\t.\tgene_id "ENSG00000230701"; gene_version "2"; gene_name "FBXW4P1"; gene_source "havana"; gene_biotype "processed_pseudogene";\n+22\thavana\ttranscript\t203353\t205591\t.\t+\t.\tgene_id "ENSG00000230701"; gene_version "2"; transcript_id "ENST00000426721"; transcript_version "2"; gene_name "FBXW4P1"; gene_source "havana"; gene_biotype "processed_pseudogene"; transcript_name "FBXW4P1-201"; transcript_source "havana"; transcript_biotype "processed_pseudogene"; tag "basic"; transcript_support_level "NA";\n+22\thavana\texon\t203353\t205591\t.\t+\t.\tgene_id "ENSG00000230701"; gene_version "2"; transcript_id "ENST00000426721"; transcript_version "2"; exon_number "1"; gene_name "FBXW4P1"; gene_source "havana"; gene_biotype "processed_pseudogene"; transcript_name "FBXW4P1-201"; transcript_source "havana"; transcript_biotype "processed_pseudogene"; exon_id "ENSE00001782951"; exon_version "2"; tag "basic"; transcript_support_level "NA";\n+22\thavana\tgene\t263129\t263513\t.\t-\t.\tgene_id "ENSG00000214526"; gene_version "3"; gene_name "AP000343.1"; gene_source "havana"; gene_biotype "processed_pseudogene";\n+22\thavana\ttranscript\t263129\t263513\t.\t-\t.\tgene_id "ENSG00000214526"; gene_version "3"; transcript_id "ENST00000440602"; transcript_version "1"; gene_name "AP000343.1"; gene_source "havana"; gene_biotype "processed_pseudogene"; transcript_name "AP000343.1-201"; transcript_source "havana"; transcript_biotype "processed_pseudogene"; tag "basic"; transcript_support_level "NA";\n+22\thavana\texon\t263129\t263513\t.\t-\t.\tgene_id "ENSG00000214526"; gene_version "3"; transcript_id "ENST00000440602"; transcript_version "1"; exon_number "1"; gene_name "AP000343.1"; gene_source "havana"; gene_biotype "processed_pseudogene"; transcript_name "AP000343.1-201"; transcript_source "havana"; transcript_biotype "processed_pseudogene"; exon_id "ENSE00001526946"; exon_version "2"; tag "basic"; transcript_support_level "NA";\n+22\thavana\tgene\t267202\t269079\t.\t+\t.\tgene_id "ENSG00000236611"; gene_version "1"; gene_name "LINC02556"; gene_source "havana"; gene_biotype "lincRNA";\n+22\thavana\ttranscript\t267202\t269079\t.\t+\t.\tgene_id "ENSG00000236611"; gene_version "1"; transcript_id "ENST00000450776"; transcript_version "1"; gene_name "LINC02556"; gene_source "havana"; gene_biotype "lincRNA"; transcript_name "LINC02556-201"; transcript_source "havana"; transcript_biotype "lincRNA"; tag "basic"; transcript_support_level "3";\n+22\thavana\texon\t267202\t267377\t.\t+\t.\tgene_id "ENSG00000236611"; gene_version "1"; transcript_id "ENST00000450776"; transcript_version "1"; exon_number "1"; gene_name "LINC02556"; gene_source "havana"; gene_biotype "lincRNA"; transcript_name "LINC02556-201"; transcript_source "havana"; transcript_biotype "lincRNA"; exon_id "ENSE00001785308"; exon_version "1"; tag "basic"; transcript_support_level "3";\n+22\thavana\texon\t268910\t269079\t.\t+\t.\tgene_id "ENSG00000236611"; gene_version "1"; transcript_id "ENST00000450776"; transcript_version "1"; exon_number "2"; gene_name "LINC02556"; gene_source "havana"; gene_biotype "lincRNA"; transcript_name "LINC02556-201"; transcript_source "havana"; transcript_biotype "lincRNA"; exon_id "ENSE00001710203"; exon_version "1"; tag "basic"; transcript_support_level "3";\n'

diff -r 1a56888ddb7d -r 8c4c97fd0555 tool-data/arriba_indexes.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/arriba_indexes.loc.sample Wed Oct 13 18:45:16 2021 +0000

@@ -0,0 +1,17 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of Ariba data files.
+#The Arriba script download_references.sh retrieves a genome assembly fasta
+#and a related GTF annotation file, then builds a STAR index.
+#You will need to create these data files and then create a
+#arriba_indexes.loc similar to this one (store it in this
+#directory) that points to the directories in which those files are stored.
+#The arriba_indexes.loc file has this format (longer white space
+#characters are TAB characters):
+#
+#<unique_build_id> <display_name> <genome_fasta_path> <genome_gtf_path> <STAR_index_path>
+#
+#Note that STAR indices can become quite large.
+#
+#<unique_build_id> <display_name> <genome_fasta_path> <genome_gtf_path> <STAR_index_path>
+#GRCh38+ENSEMBL93 GRCh38+ENSEMBL93 /depot/GRCh38+ENSEMBL93/genome.fa /depot/GRCh38+ENSEMBL93/genome.gtf /depot/GRCh38+ENSEMBL93/STAR_index/
+

diff -r 1a56888ddb7d -r 8c4c97fd0555 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Wed Oct 13 18:45:16 2021 +0000

@@ -0,0 +1,6 @@
+<tables>
+    <table name="arriba_indexes" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, name, fasta, gtf, star_index</columns>
+        <file path="tool-data/arriba_indexes.loc" />
+    </table>
+</tables>

diff -r 1a56888ddb7d -r 8c4c97fd0555 tool_data_table_conf.xml.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test Wed Oct 13 18:45:16 2021 +0000

@@ -0,0 +1,6 @@
+<tables>
+    <table name="arriba_indexes" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, name, fasta, gtf, star_index</columns>
+        <file path="${__HERE__}/test-data/arriba_indexes.loc" />
+    </table>
+</tables>