Mercurial > repos > iuc > deseq2
changeset 19:c56e0689e46e draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5b6dc96c6e14582d5bb1dc213ac8d26dc7b2829e
author | iuc |
---|---|
date | Tue, 04 Dec 2018 08:19:06 -0500 |
parents | 3bf1b3ec1ddf |
children | 89d26b11d452 |
files | deseq2.R deseq2.xml get_deseq_dataset.R test-data/GRCh38_latest_genomic.gff test-data/tx2gene.tab |
diffstat | 5 files changed, 171 insertions(+), 52 deletions(-) [+] |
line wrap: on
line diff
--- a/deseq2.R Fri Nov 16 14:47:19 2018 -0500 +++ b/deseq2.R Tue Dec 04 08:19:06 2018 -0500 @@ -57,7 +57,7 @@ "plots" , "p", 1, "character", "tximport", "i", 0, "logical", "txtype", "y", 1, "character", - "tx2gene", "x", 1, "character", # a space-sep tx-to-gene map or GTF file (auto detect .gtf/.GTF) + "tx2gene", "x", 1, "character", # a space-sep tx-to-gene map or GTF/GFF3 file "esf", "e", 1, "character", "fit_type", "t", 1, "integer", "many_contrasts", "m", 0, "logical",
--- a/deseq2.xml Fri Nov 16 14:47:19 2018 -0500 +++ b/deseq2.xml Tue Dec 04 08:19:06 2018 -0500 @@ -1,11 +1,16 @@ -<tool id="deseq2" name="DESeq2" version="2.11.40.3"> +<tool id="deseq2" name="DESeq2" version="2.11.40.4"> <description>Determines differentially expressed features from count tables</description> <requirements> - <requirement type="package" version="1.18.1">bioconductor-deseq2</requirement> - <requirement type="package" version="1.6.0">bioconductor-tximport</requirement> - <requirement type="package" version="1.30.0">bioconductor-genomicfeatures</requirement> - <requirement type="package" version="0.6.5">r-ggrepel</requirement> - <requirement type="package" version="1.0.8">r-pheatmap</requirement> + <requirement type="package" version="1.20.0">bioconductor-deseq2</requirement> + <!-- Optional dependency of tximport, needed to import kallisto results https://github.com/galaxyproject/usegalaxy-playbook/issues/161 --> + <requirement type="package" version="2.24.0">bioconductor-rhdf5</requirement> + <requirement type="package" version="1.8.0">bioconductor-tximport</requirement> + <requirement type="package" version="1.32.3">bioconductor-genomicfeatures</requirement> + <requirement type="package" version="1.20.2">r-getopt</requirement> + <requirement type="package" version="0.8.0">r-ggrepel</requirement> + <requirement type="package" version="3.0.1">r-gplots</requirement> + <requirement type="package" version="1.0.10">r-pheatmap</requirement> + <requirement type="package" version="0.2.20">r-rjson</requirement> </requirements> <stdio> <regex match="Execution halted" @@ -27,7 +32,7 @@ <command><![CDATA[ #if $tximport.tximport_selector == 'tximport': #if $tximport.mapping_format.mapping_format_selector == 'gtf': - ln -s '$tximport.mapping_format.gtf_file' mapping.gtf && + ln -s '$tximport.mapping_format.gtf_file' mapping.gff && #else: ln -s '$tximport.mapping_format.tabular_file' mapping.txt && #end if @@ -92,7 +97,7 @@ -i -y $tximport.txtype #if $tximport.mapping_format.mapping_format_selector == 'gtf': - -x mapping.gtf + -x mapping.gff #else: -x mapping.txt #end if @@ -133,14 +138,14 @@ </param> <conditional name="mapping_format"> <param name="mapping_format_selector" type="select" label="Gene mapping format"> - <option value="gtf" selected="True">GTF</option> - <option value="tabular">Transcript-ID and Gene-ID mapping file</option> + <option value="gtf" selected="True">GTF/GFF3</option> + <option value="tabular">Transcript-ID to Gene-ID mapping file</option> </param> <when value="gtf"> - <param name="gtf_file" type="data" format="gtf,gff3" label="GTF/GFF3 file with Transcript - Gene mapping"/> + <param name="gtf_file" type="data" format="gtf,gff3" label="GTF/GFF3 annotation file"/> </when> <when value="tabular"> - <param name="tabular_file" type="data" format="tabular" label="Tabular file with Transcript - Gene mapping"/> + <param name="tabular_file" type="data" format="tabular" label="Tabular file with Transcript-ID to Gene-ID mapping"/> </when> </conditional> </when> @@ -190,7 +195,7 @@ help=" DESeq2 performs independent filtering by default using the mean of normalized counts as a filter statistic" /> </inputs> <outputs> - <data format="tabular" name="deseq_out" label="DESeq2 result file on ${on_string}"> + <data name="deseq_out" format="tabular" label="DESeq2 result file on ${on_string}"> <filter>many_contrasts is False</filter> <actions> <action name="column_names" type="metadata" default="GeneID,Base mean,log2(FC),StdErr,Wald-Stats,P-value,P-adj" /> @@ -200,16 +205,16 @@ <filter>many_contrasts is True</filter> <discover_datasets pattern="None.(?P<designation>.+_vs_.+)" format="tabular" directory="." visible="false"/> </collection> - <data format="pdf" name="plots" label="DESeq2 plots on ${on_string}"> + <data name="plots" format="pdf" label="DESeq2 plots on ${on_string}"> <filter>pdf == True</filter> </data> - <data format="tabular" name="counts_out" label="Normalized counts file on ${on_string}"> + <data name="counts_out" format="tabular" label="Normalized counts file on ${on_string}"> <filter>normCounts == True</filter> </data> - <data format="tabular" name="rlog_out" label="rLog-Normalized counts file on ${on_string}"> + <data name="rlog_out" format="tabular" label="rLog-Normalized counts file on ${on_string}"> <filter>normRLog == True</filter> </data> - <data format="tabular" name="vst_out" label="VST-Normalized counts file on ${on_string}"> + <data name="vst_out" format="tabular" label="VST-Normalized counts file on ${on_string}"> <filter>normVST == True</filter> </data> </outputs> @@ -251,7 +256,7 @@ </output> <output name="deseq_out" > <assert_contents> - <has_text_matching expression="FBgn0003360\t1933.9504.*\t-2.8399.*\t0.1309.*-21.6851.*2.831.*8.024" /> + <has_text_matching expression="FBgn0003360\t1933\.9504.*\t-2\.8399.*\t0\.1309.*\t-21\.68.*\t.*e-104\t.*e-101" /> </assert_contents> </output> </test> @@ -315,7 +320,7 @@ </output> <output name="deseq_out" > <assert_contents> - <has_text_matching expression="FBgn0003360\t1933.9504.*\t-2.8399.*\t0.1309.*-21.6851.*2.831.*8.024" /> + <has_text_matching expression="FBgn0003360\t1933\.9504.*\t-2\.8399.*\t0\.1309.*\t-21\.68.*\t.*e-104\t.*e-101" /> </assert_contents> </output> </test> @@ -339,7 +344,31 @@ <param name="tabular_file" value="tx2gene.tab"/> <output name="deseq_out" > <assert_contents> - <has_text_matching expression="MIR6859-2\t1.1858.*\t-1.5832.*\t1.2956.*\t-1.2219.*\t0.2217.*\t0.8868.*" /> + <has_text_matching expression="UGT3A2\t1.8841.*\t-0.1329.*\t0.6936.*\t-0.1917.*\t0.8479.*\t0.9999.*" /> + </assert_contents> + </output> + </test> + <!--Ensure Sailfish/Salmon input with GFF3 annotation works--> + <test expect_num_outputs="1"> + <repeat name="rep_factorName"> + <param name="factorName" value="Treatment"/> + <repeat name="rep_factorLevel"> + <param name="factorLevel" value="Treated"/> + <param name="countsFile" value="sailfish/sailfish_quant.sf1.tab,sailfish/sailfish_quant.sf2.tab,sailfish/sailfish_quant.sf3.tab"/> + </repeat> + <repeat name="rep_factorLevel"> + <param name="factorLevel" value="Untreated"/> + <param name="countsFile" value="sailfish/sailfish_quant.sf4.tab,sailfish/sailfish_quant.sf5.tab,sailfish/sailfish_quant.sf6.tab"/> + </repeat> + </repeat> + <param name="pdf" value="False"/> + <param name="tximport_selector" value="tximport"/> + <param name="txtype" value="sailfish"/> + <param name="mapping_format_selector" value="gtf"/> + <param name="gtf_file" value="GRCh38_latest_genomic.gff"/> + <output name="deseq_out" > + <assert_contents> + <has_text_matching expression="UGT3A2\t1.8841.*\t-0.1329.*\t0.6936.*\t-0.1917.*\t0.8479.*\t0.9999.*" /> </assert_contents> </output> </test>
--- a/get_deseq_dataset.R Fri Nov 16 14:47:19 2018 -0500 +++ b/get_deseq_dataset.R Tue Dec 04 08:19:06 2018 -0500 @@ -9,11 +9,11 @@ } if (!is.null(tximport)) { - if (is.null(tx2gene)) stop("A transcript-to-gene map or a GTF file is required for tximport") - if (tolower(file_ext(opt$tx2gene)) == "gtf") { - gtfFile <-tx2gene + if (is.null(tx2gene)) stop("A transcript-to-gene map or a GTF/GFF3 file is required for tximport") + if (tolower(file_ext(opt$tx2gene)) == "gff") { + gffFile <-tx2gene } else { - gtfFile <- NULL + gffFile <- NULL tx2gene <- read.table(tx2gene, header=FALSE) } useTXI <- TRUE @@ -45,22 +45,26 @@ } else { # construct the object using tximport - # first need to make the tx2gene table - # this takes ~2-3 minutes using Bioconductor functions - if (!is.null(gtfFile)) { - suppressPackageStartupMessages({ - library("GenomicFeatures") - }) - txdb <- makeTxDbFromGFF(gtfFile, format="gtf") - k <- keys(txdb, keytype = "GENEID") - df <- select(txdb, keys = k, keytype = "GENEID", columns = "TXNAME") - tx2gene <- df[, 2:1] # tx ID, then gene ID - } library("tximport") txiFiles <- as.character(sampleTable$filename) labs <- row.names(sampleTable) names(txiFiles) <- labs - txi <- tximport(txiFiles, type=txtype, tx2gene=tx2gene) + if (!is.null(gffFile)) { + # first need to make the tx2gene table + # this takes ~2-3 minutes using Bioconductor functions + suppressPackageStartupMessages({ + library("GenomicFeatures") + }) + txdb <- makeTxDbFromGFF(gffFile) + k <- keys(txdb, keytype = "TXNAME") + tx2gene <- select(txdb, k, "GENEID", "TXNAME") + } + try(txi <- tximport(txiFiles, type=txtype, tx2gene=tx2gene)) + if (!exists("txi")) { + # Remove version from transcript IDs + tx2gene$TXNAME <- sub('\\.[0-9]+', '', tx2gene$TXNAME) + txi <- tximport(txiFiles, type=txtype, tx2gene=tx2gene) + } dds <- DESeqDataSetFromTximport(txi, subset(sampleTable, select=-c(filename)), designFormula)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/GRCh38_latest_genomic.gff Tue Dec 04 08:19:06 2018 -0500 @@ -0,0 +1,86 @@ +##gff-version 3 +#!gff-spec-version 1.21 +#!processor NCBI annotwriter +#!genome-build GRCh38.p12 +#!genome-build-accession NCBI_Assembly:GCF_000001405.38 +#!annotation-source NCBI Homo sapiens Annotation Release 109 +# Trimmed version of ftp://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/annotation/GRCh38_latest/refseq_identifiers/GRCh38_latest_genomic.gff.gz +##sequence-region NC_000005.10 1 181538259 +##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9606 +NC_000005.10 RefSeq region 1 181538259 . + . ID=id565344;Dbxref=taxon:9606;Name=5;chromosome=5;gbkey=Src;genome=chromosome;mol_type=genomic DNA +NC_000005.10 BestRefSeq%2CGnomon gene 36035017 36071358 . - . ID=gene14857;Dbxref=GeneID:167127,HGNC:HGNC:27266,MIM:616384;Name=UGT3A2;description=UDP glycosyltransferase family 3 member A2;gbkey=Gene;gene=UGT3A2;gene_biotype=protein_coding +NC_000005.10 BestRefSeq mRNA 36035017 36066921 . - . ID=rna45581;Parent=gene14857;Dbxref=GeneID:167127,Genbank:NM_001168316.1,HGNC:HGNC:27266,MIM:616384;Name=NM_001168316.1;gbkey=mRNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 2;transcript_id=NM_001168316.1 +NC_000005.10 BestRefSeq exon 36066696 36066921 . - . ID=id576076;Parent=rna45581;Dbxref=GeneID:167127,Genbank:NM_001168316.1,HGNC:HGNC:27266,MIM:616384;gbkey=mRNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 2;transcript_id=NM_001168316.1 +NC_000005.10 BestRefSeq exon 36051870 36051984 . - . ID=id576077;Parent=rna45581;Dbxref=GeneID:167127,Genbank:NM_001168316.1,HGNC:HGNC:27266,MIM:616384;gbkey=mRNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 2;transcript_id=NM_001168316.1 +NC_000005.10 BestRefSeq exon 36048889 36049420 . - . ID=id576078;Parent=rna45581;Dbxref=GeneID:167127,Genbank:NM_001168316.1,HGNC:HGNC:27266,MIM:616384;gbkey=mRNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 2;transcript_id=NM_001168316.1 +NC_000005.10 BestRefSeq exon 36039477 36039708 . - . ID=id576079;Parent=rna45581;Dbxref=GeneID:167127,Genbank:NM_001168316.1,HGNC:HGNC:27266,MIM:616384;gbkey=mRNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 2;transcript_id=NM_001168316.1 +NC_000005.10 BestRefSeq exon 36037797 36038016 . - . ID=id576080;Parent=rna45581;Dbxref=GeneID:167127,Genbank:NM_001168316.1,HGNC:HGNC:27266,MIM:616384;gbkey=mRNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 2;transcript_id=NM_001168316.1 +NC_000005.10 BestRefSeq exon 36035017 36035974 . - . ID=id576081;Parent=rna45581;Dbxref=GeneID:167127,Genbank:NM_001168316.1,HGNC:HGNC:27266,MIM:616384;gbkey=mRNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 2;transcript_id=NM_001168316.1 +NC_000005.10 BestRefSeq transcript 36035017 36066921 . - . ID=rna45582;Parent=gene14857;Dbxref=GeneID:167127,Genbank:NR_031764.1,HGNC:HGNC:27266,MIM:616384;Name=NR_031764.1;gbkey=misc_RNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 3;transcript_id=NR_031764.1 +NC_000005.10 BestRefSeq exon 36066696 36066921 . - . ID=id576082;Parent=rna45582;Dbxref=GeneID:167127,Genbank:NR_031764.1,HGNC:HGNC:27266,MIM:616384;gbkey=misc_RNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 3;transcript_id=NR_031764.1 +NC_000005.10 BestRefSeq exon 36064249 36064350 . - . ID=id576083;Parent=rna45582;Dbxref=GeneID:167127,Genbank:NR_031764.1,HGNC:HGNC:27266,MIM:616384;gbkey=misc_RNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 3;transcript_id=NR_031764.1 +NC_000005.10 BestRefSeq exon 36051870 36051984 . - . ID=id576084;Parent=rna45582;Dbxref=GeneID:167127,Genbank:NR_031764.1,HGNC:HGNC:27266,MIM:616384;gbkey=misc_RNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 3;transcript_id=NR_031764.1 +NC_000005.10 BestRefSeq exon 36039477 36039708 . - . ID=id576085;Parent=rna45582;Dbxref=GeneID:167127,Genbank:NR_031764.1,HGNC:HGNC:27266,MIM:616384;gbkey=misc_RNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 3;transcript_id=NR_031764.1 +NC_000005.10 BestRefSeq exon 36037797 36038016 . - . ID=id576086;Parent=rna45582;Dbxref=GeneID:167127,Genbank:NR_031764.1,HGNC:HGNC:27266,MIM:616384;gbkey=misc_RNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 3;transcript_id=NR_031764.1 +NC_000005.10 BestRefSeq exon 36035017 36035974 . - . ID=id576087;Parent=rna45582;Dbxref=GeneID:167127,Genbank:NR_031764.1,HGNC:HGNC:27266,MIM:616384;gbkey=misc_RNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 3;transcript_id=NR_031764.1 +NC_000005.10 BestRefSeq mRNA 36035017 36066921 . - . ID=rna45583;Parent=gene14857;Dbxref=GeneID:167127,Genbank:NM_174914.3,HGNC:HGNC:27266,MIM:616384;Name=NM_174914.3;gbkey=mRNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 1;transcript_id=NM_174914.3 +NC_000005.10 BestRefSeq exon 36066696 36066921 . - . ID=id576088;Parent=rna45583;Dbxref=GeneID:167127,Genbank:NM_174914.3,HGNC:HGNC:27266,MIM:616384;gbkey=mRNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 1;transcript_id=NM_174914.3 +NC_000005.10 BestRefSeq exon 36064249 36064350 . - . ID=id576089;Parent=rna45583;Dbxref=GeneID:167127,Genbank:NM_174914.3,HGNC:HGNC:27266,MIM:616384;gbkey=mRNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 1;transcript_id=NM_174914.3 +NC_000005.10 BestRefSeq exon 36051870 36051984 . - . ID=id576090;Parent=rna45583;Dbxref=GeneID:167127,Genbank:NM_174914.3,HGNC:HGNC:27266,MIM:616384;gbkey=mRNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 1;transcript_id=NM_174914.3 +NC_000005.10 BestRefSeq exon 36048889 36049420 . - . ID=id576091;Parent=rna45583;Dbxref=GeneID:167127,Genbank:NM_174914.3,HGNC:HGNC:27266,MIM:616384;gbkey=mRNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 1;transcript_id=NM_174914.3 +NC_000005.10 BestRefSeq exon 36039477 36039708 . - . ID=id576092;Parent=rna45583;Dbxref=GeneID:167127,Genbank:NM_174914.3,HGNC:HGNC:27266,MIM:616384;gbkey=mRNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 1;transcript_id=NM_174914.3 +NC_000005.10 BestRefSeq exon 36037797 36038016 . - . ID=id576093;Parent=rna45583;Dbxref=GeneID:167127,Genbank:NM_174914.3,HGNC:HGNC:27266,MIM:616384;gbkey=mRNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 1;transcript_id=NM_174914.3 +NC_000005.10 BestRefSeq exon 36035017 36035974 . - . ID=id576094;Parent=rna45583;Dbxref=GeneID:167127,Genbank:NM_174914.3,HGNC:HGNC:27266,MIM:616384;gbkey=mRNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 1;transcript_id=NM_174914.3 +##sequence-region NC_000012.12 1 133275309 +##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9606 +NC_000012.12 RefSeq region 1 133275309 . + . ID=id1163836;Dbxref=taxon:9606;Name=12;chromosome=12;gbkey=Src;genome=chromosome;mol_type=genomic DNA +NC_000012.12 BestRefSeq gene 53938792 53946544 . + . ID=gene33473;Dbxref=GeneID:3229,HGNC:HGNC:5125,MIM:142976;Name=HOXC13;description=homeobox C13;gbkey=Gene;gene=HOXC13;gene_biotype=protein_coding;gene_synonym=ECTD9,HOX3,HOX3G +NC_000012.12 BestRefSeq mRNA 53938792 53946544 . + . ID=rna100330;Parent=gene33473;Dbxref=GeneID:3229,Genbank:NM_017410.2,HGNC:HGNC:5125,MIM:142976;Name=NM_017410.2;gbkey=mRNA;gene=HOXC13;product=homeobox C13;transcript_id=NM_017410.2 +NC_000012.12 BestRefSeq exon 53938792 53939642 . + . ID=id1209110;Parent=rna100330;Dbxref=GeneID:3229,Genbank:NM_017410.2,HGNC:HGNC:5125,MIM:142976;gbkey=mRNA;gene=HOXC13;product=homeobox C13;transcript_id=NM_017410.2 +NC_000012.12 BestRefSeq exon 53945000 53946544 . + . ID=id1209111;Parent=rna100330;Dbxref=GeneID:3229,Genbank:NM_017410.2,HGNC:HGNC:5125,MIM:142976;gbkey=mRNA;gene=HOXC13;product=homeobox C13;transcript_id=NM_017410.2 +NC_000012.12 BestRefSeq gene 53954868 53956606 . + . ID=gene33475;Dbxref=GeneID:3228,HGNC:HGNC:5124,MIM:142975;Name=HOXC12;description=homeobox C12;gbkey=Gene;gene=HOXC12;gene_biotype=protein_coding;gene_synonym=HOC3F,HOX3,HOX3F +NC_000012.12 BestRefSeq mRNA 53954868 53956606 . + . ID=rna100332;Parent=gene33475;Dbxref=GeneID:3228,Genbank:NM_173860.2,HGNC:HGNC:5124,MIM:142975;Name=NM_173860.2;gbkey=mRNA;gene=HOXC12;product=homeobox C12;transcript_id=NM_173860.2 +NC_000012.12 BestRefSeq exon 53954868 53955539 . + . ID=id1209115;Parent=rna100332;Dbxref=GeneID:3228,Genbank:NM_173860.2,HGNC:HGNC:5124,MIM:142975;gbkey=mRNA;gene=HOXC12;product=homeobox C12;transcript_id=NM_173860.2 +NC_000012.12 BestRefSeq exon 53956328 53956606 . + . ID=id1209116;Parent=rna100332;Dbxref=GeneID:3228,Genbank:NM_173860.2,HGNC:HGNC:5124,MIM:142975;gbkey=mRNA;gene=HOXC12;product=homeobox C12;transcript_id=NM_173860.2 +NC_000012.12 BestRefSeq gene 53973126 53976419 . + . ID=gene33477;Dbxref=GeneID:3227,HGNC:HGNC:5123,MIM:605559;Name=HOXC11;description=homeobox C11;gbkey=Gene;gene=HOXC11;gene_biotype=protein_coding;gene_synonym=HOX3H +NC_000012.12 BestRefSeq mRNA 53973126 53976419 . + . ID=rna100336;Parent=gene33477;Dbxref=GeneID:3227,Genbank:NM_014212.3,HGNC:HGNC:5123,MIM:605559;Name=NM_014212.3;Note=The RefSeq transcript has 1 non-frameshifting indel compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=mRNA;gene=HOXC11;inference=similar to RNA sequence%2C mRNA (same species):RefSeq:NM_014212.3;product=homeobox C11;transcript_id=NM_014212.3 +NC_000012.12 BestRefSeq exon 53973126 53973923 . + . ID=id1209133;Parent=rna100336;Dbxref=GeneID:3227,Genbank:NM_014212.3,HGNC:HGNC:5123,MIM:605559;Note=The RefSeq transcript has 1 non-frameshifting indel compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=mRNA;gene=HOXC11;inference=similar to RNA sequence%2C mRNA (same species):RefSeq:NM_014212.3;product=homeobox C11;transcript_id=NM_014212.3 +NC_000012.12 BestRefSeq exon 53975181 53976419 . + . ID=id1209134;Parent=rna100336;Dbxref=GeneID:3227,Genbank:NM_014212.3,HGNC:HGNC:5123,MIM:605559;Note=The RefSeq transcript has 1 non-frameshifting indel compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=mRNA;gene=HOXC11;inference=similar to RNA sequence%2C mRNA (same species):RefSeq:NM_014212.3;product=homeobox C11;transcript_id=NM_014212.3 +NC_000012.12 BestRefSeq gene 53985162 53990279 . + . ID=gene33479;Dbxref=GeneID:3226,HGNC:HGNC:5122,MIM:605560;Name=HOXC10;description=homeobox C10;gbkey=Gene;gene=HOXC10;gene_biotype=protein_coding;gene_synonym=HOX3I +NC_000012.12 BestRefSeq mRNA 53985162 53990279 . + . ID=rna100338;Parent=gene33479;Dbxref=GeneID:3226,Genbank:NM_017409.3,HGNC:HGNC:5122,MIM:605560;Name=NM_017409.3;gbkey=mRNA;gene=HOXC10;product=homeobox C10;transcript_id=NM_017409.3 +NC_000012.12 BestRefSeq exon 53985162 53986010 . + . ID=id1209144;Parent=rna100338;Dbxref=GeneID:3226,Genbank:NM_017409.3,HGNC:HGNC:5122,MIM:605560;gbkey=mRNA;gene=HOXC10;product=homeobox C10;transcript_id=NM_017409.3 +NC_000012.12 BestRefSeq exon 53989169 53990279 . + . ID=id1209145;Parent=rna100338;Dbxref=GeneID:3226,Genbank:NM_017409.3,HGNC:HGNC:5122,MIM:605560;gbkey=mRNA;gene=HOXC10;product=homeobox C10;transcript_id=NM_017409.3 +NC_000012.12 BestRefSeq gene 54000119 54003337 . + . ID=gene33483;Dbxref=GeneID:3225,HGNC:HGNC:5130,MIM:142971;Name=HOXC9;description=homeobox C9;gbkey=Gene;gene=HOXC9;gene_biotype=protein_coding;gene_synonym=HOX3,HOX3B +NC_000012.12 BestRefSeq mRNA 54000119 54003337 . + . ID=rna100344;Parent=gene33483;Dbxref=GeneID:3225,Genbank:NM_006897.2,HGNC:HGNC:5130,MIM:142971;Name=NM_006897.2;gbkey=mRNA;gene=HOXC9;product=homeobox C9;transcript_id=NM_006897.2 +NC_000012.12 BestRefSeq exon 54000119 54000726 . + . ID=id1209154;Parent=rna100344;Dbxref=GeneID:3225,Genbank:NM_006897.2,HGNC:HGNC:5130,MIM:142971;gbkey=mRNA;gene=HOXC9;product=homeobox C9;transcript_id=NM_006897.2 +NC_000012.12 BestRefSeq exon 54002430 54003337 . + . ID=id1209155;Parent=rna100344;Dbxref=GeneID:3225,Genbank:NM_006897.2,HGNC:HGNC:5130,MIM:142971;gbkey=mRNA;gene=HOXC9;product=homeobox C9;transcript_id=NM_006897.2 +NC_000012.12 BestRefSeq gene 54009106 54012763 . + . ID=gene33485;Dbxref=GeneID:3224,HGNC:HGNC:5129,MIM:142970;Name=HOXC8;description=homeobox C8;gbkey=Gene;gene=HOXC8;gene_biotype=protein_coding;gene_synonym=HOX3,HOX3A +NC_000012.12 BestRefSeq mRNA 54009106 54012763 . + . ID=rna100346;Parent=gene33485;Dbxref=GeneID:3224,Genbank:NM_022658.3,HGNC:HGNC:5129,MIM:142970;Name=NM_022658.3;gbkey=mRNA;gene=HOXC8;product=homeobox C8;transcript_id=NM_022658.3 +NC_000012.12 BestRefSeq exon 54009106 54009720 . + . ID=id1209158;Parent=rna100346;Dbxref=GeneID:3224,Genbank:NM_022658.3,HGNC:HGNC:5129,MIM:142970;gbkey=mRNA;gene=HOXC8;product=homeobox C8;transcript_id=NM_022658.3 +NC_000012.12 BestRefSeq exon 54011089 54012763 . + . ID=id1209159;Parent=rna100346;Dbxref=GeneID:3224,Genbank:NM_022658.3,HGNC:HGNC:5129,MIM:142970;gbkey=mRNA;gene=HOXC8;product=homeobox C8;transcript_id=NM_022658.3 +NC_000012.12 BestRefSeq gene 54016852 54056030 . + . ID=gene33486;Dbxref=GeneID:3221,HGNC:HGNC:5126,MIM:142974;Name=HOXC4;description=homeobox C4;gbkey=Gene;gene=HOXC4;gene_biotype=protein_coding;gene_synonym=cp19,HOX3,HOX3E +NC_000012.12 BestRefSeq mRNA 54016852 54056030 . + . ID=rna100347;Parent=gene33486;Dbxref=GeneID:3221,Genbank:NM_014620.5,HGNC:HGNC:5126,MIM:142974;Name=NM_014620.5;gbkey=mRNA;gene=HOXC4;product=homeobox C4%2C transcript variant 1;transcript_id=NM_014620.5 +NC_000012.12 BestRefSeq exon 54016852 54017414 . + . ID=id1209160;Parent=rna100347;Dbxref=GeneID:3221,Genbank:NM_014620.5,HGNC:HGNC:5126,MIM:142974;gbkey=mRNA;gene=HOXC4;product=homeobox C4%2C transcript variant 1;transcript_id=NM_014620.5 +NC_000012.12 BestRefSeq exon 54053160 54053276 . + . ID=id1209161;Parent=rna100347;Dbxref=GeneID:3221,Genbank:NM_014620.5,HGNC:HGNC:5126,MIM:142974;gbkey=mRNA;gene=HOXC4;product=homeobox C4%2C transcript variant 1;transcript_id=NM_014620.5 +NC_000012.12 BestRefSeq exon 54053917 54054361 . + . ID=id1209162;Parent=rna100347;Dbxref=GeneID:3221,Genbank:NM_014620.5,HGNC:HGNC:5126,MIM:142974;gbkey=mRNA;gene=HOXC4;product=homeobox C4%2C transcript variant 1;transcript_id=NM_014620.5 +NC_000012.12 BestRefSeq exon 54054850 54056030 . + . ID=id1209163;Parent=rna100347;Dbxref=GeneID:3221,Genbank:NM_014620.5,HGNC:HGNC:5126,MIM:142974;gbkey=mRNA;gene=HOXC4;product=homeobox C4%2C transcript variant 1;transcript_id=NM_014620.5 +NC_000012.12 BestRefSeq mRNA 54053877 54056030 . + . ID=rna100348;Parent=gene33486;Dbxref=GeneID:3221,Genbank:NM_153633.2,HGNC:HGNC:5126,MIM:142974;Name=NM_153633.2;gbkey=mRNA;gene=HOXC4;product=homeobox C4%2C transcript variant 2;transcript_id=NM_153633.2 +NC_000012.12 BestRefSeq exon 54053877 54054361 . + . ID=id1209164;Parent=rna100348;Dbxref=GeneID:3221,Genbank:NM_153633.2,HGNC:HGNC:5126,MIM:142974;gbkey=mRNA;gene=HOXC4;product=homeobox C4%2C transcript variant 2;transcript_id=NM_153633.2 +NC_000012.12 BestRefSeq exon 54054850 54056030 . + . ID=id1209165;Parent=rna100348;Dbxref=GeneID:3221,Genbank:NM_153633.2,HGNC:HGNC:5126,MIM:142974;gbkey=mRNA;gene=HOXC4;product=homeobox C4%2C transcript variant 2;transcript_id=NM_153633.2 +NC_000012.12 BestRefSeq gene 54016852 54035361 . + . ID=gene33487;Dbxref=GeneID:3222,HGNC:HGNC:5127,MIM:142973;Name=HOXC5;description=homeobox C5;gbkey=Gene;gene=HOXC5;gene_biotype=protein_coding;gene_synonym=CP11,HOX3,HOX3D +NC_000012.12 BestRefSeq transcript 54016852 54035361 . + . ID=rna100349;Parent=gene33487;Dbxref=GeneID:3222,Genbank:NR_003084.2,HGNC:HGNC:5127,MIM:142973;Name=NR_003084.2;gbkey=misc_RNA;gene=HOXC5;product=homeobox C5%2C transcript variant 2;transcript_id=NR_003084.2 +NC_000012.12 BestRefSeq exon 54016852 54017414 . + . ID=id1209166;Parent=rna100349;Dbxref=GeneID:3222,Genbank:NR_003084.2,HGNC:HGNC:5127,MIM:142973;gbkey=misc_RNA;gene=HOXC5;product=homeobox C5%2C transcript variant 2;transcript_id=NR_003084.2 +NC_000012.12 BestRefSeq exon 54034278 54035361 . + . ID=id1209167;Parent=rna100349;Dbxref=GeneID:3222,Genbank:NR_003084.2,HGNC:HGNC:5127,MIM:142973;gbkey=misc_RNA;gene=HOXC5;product=homeobox C5%2C transcript variant 2;transcript_id=NR_003084.2 +NC_000012.12 BestRefSeq mRNA 54033048 54035361 . + . ID=rna100350;Parent=gene33487;Dbxref=GeneID:3222,Genbank:NM_018953.3,HGNC:HGNC:5127,MIM:142973;Name=NM_018953.3;gbkey=mRNA;gene=HOXC5;product=homeobox C5%2C transcript variant 1;transcript_id=NM_018953.3 +NC_000012.12 BestRefSeq exon 54033048 54033576 . + . ID=id1209168;Parent=rna100350;Dbxref=GeneID:3222,Genbank:NM_018953.3,HGNC:HGNC:5127,MIM:142973;gbkey=mRNA;gene=HOXC5;product=homeobox C5%2C transcript variant 1;transcript_id=NM_018953.3 +NC_000012.12 BestRefSeq exon 54034278 54035361 . + . ID=id1209169;Parent=rna100350;Dbxref=GeneID:3222,Genbank:NM_018953.3,HGNC:HGNC:5127,MIM:142973;gbkey=mRNA;gene=HOXC5;product=homeobox C5%2C transcript variant 1;transcript_id=NM_018953.3 +NC_000012.12 BestRefSeq gene 54016852 54030823 . + . ID=gene33488;Dbxref=GeneID:3223,HGNC:HGNC:5128,MIM:142972;Name=HOXC6;description=homeobox C6;gbkey=Gene;gene=HOXC6;gene_biotype=protein_coding;gene_synonym=CP25,HHO.C8,HOX3,HOX3C +NC_000012.12 BestRefSeq mRNA 54016852 54030823 . + . ID=rna100351;Parent=gene33488;Dbxref=GeneID:3223,Genbank:NM_153693.4,HGNC:HGNC:5128,MIM:142972;Name=NM_153693.4;gbkey=mRNA;gene=HOXC6;product=homeobox C6%2C transcript variant 2;transcript_id=NM_153693.4 +NC_000012.12 BestRefSeq exon 54016852 54017414 . + . ID=id1209172;Parent=rna100351;Dbxref=GeneID:3223,Genbank:NM_153693.4,HGNC:HGNC:5128,MIM:142972;gbkey=mRNA;gene=HOXC6;product=homeobox C6%2C transcript variant 2;transcript_id=NM_153693.4 +NC_000012.12 BestRefSeq exon 54028576 54028921 . + . ID=id1209173;Parent=rna100351;Dbxref=GeneID:3223,Genbank:NM_153693.4,HGNC:HGNC:5128,MIM:142972;gbkey=mRNA;gene=HOXC6;product=homeobox C6%2C transcript variant 2;transcript_id=NM_153693.4 +NC_000012.12 BestRefSeq exon 54029655 54030823 . + . ID=id1209174;Parent=rna100351;Dbxref=GeneID:3223,Genbank:NM_153693.4,HGNC:HGNC:5128,MIM:142972;gbkey=mRNA;gene=HOXC6;product=homeobox C6%2C transcript variant 2;transcript_id=NM_153693.4 +NC_000012.12 BestRefSeq mRNA 54028410 54030823 . + . ID=rna100352;Parent=gene33488;Dbxref=GeneID:3223,Genbank:NM_004503.3,HGNC:HGNC:5128,MIM:142972;Name=NM_004503.3;gbkey=mRNA;gene=HOXC6;product=homeobox C6%2C transcript variant 1;transcript_id=NM_004503.3 +NC_000012.12 BestRefSeq exon 54028410 54028921 . + . ID=id1209175;Parent=rna100352;Dbxref=GeneID:3223,Genbank:NM_004503.3,HGNC:HGNC:5128,MIM:142972;gbkey=mRNA;gene=HOXC6;product=homeobox C6%2C transcript variant 1;transcript_id=NM_004503.3 +NC_000012.12 BestRefSeq exon 54029655 54030823 . + . ID=id1209176;Parent=rna100352;Dbxref=GeneID:3223,Genbank:NM_004503.3,HGNC:HGNC:5128,MIM:142972;gbkey=mRNA;gene=HOXC6;product=homeobox C6%2C transcript variant 1;transcript_id=NM_004503.3 +NC_000012.12 RefSeq cDNA_match 53973126 53973923 798 + . ID=46c4f9c5-f2cf-415e-a892-fbd053a8f7eb;Target=NM_014212.3 1 798 +;assembly_bases_aln=152;assembly_bases_seq=152;consensus_splices=2;exon_identity=0.991241;for_remapping=2;gap_count=1;identity=0.991241;idty=1;matches=2037;num_ident=2037;num_mismatch=0;pct_coverage=99.1241;pct_coverage_hiqual=99.1241;pct_identity_gap=99.1241;pct_identity_ungap=100;product_coverage=1;rank=1;splices=2;weighted_identity=0.991461 +NC_000012.12 RefSeq cDNA_match 53975181 53976419 1232.82 + . ID=46c4f9c5-f2cf-415e-a892-fbd053a8f7eb;Target=NM_014212.3 799 2055 +;assembly_bases_aln=152;assembly_bases_seq=152;consensus_splices=2;exon_identity=0.991241;for_remapping=2;gap_count=1;identity=0.991241;idty=0.98568;matches=2037;num_ident=2037;num_mismatch=0;pct_coverage=99.1241;pct_coverage_hiqual=99.1241;pct_identity_gap=99.1241;pct_identity_ungap=100;product_coverage=1;rank=1;splices=2;weighted_identity=0.991461;Gap=M705 I18 M534
--- a/test-data/tx2gene.tab Fri Nov 16 14:47:19 2018 -0500 +++ b/test-data/tx2gene.tab Tue Dec 04 08:19:06 2018 -0500 @@ -1,16 +1,16 @@ TXNAME GENEID -NM_001168316 DDX11L1 -NM_174914 DDX11L1 -NR_031764 DDX11L1 -NM_004503 WASH7P -NM_006897 WASH7P -NM_014212 WASH7P -NM_014620 WASH7P -NM_017409 WASH7P -NM_017410 WASH7P -NM_018953 MIR6859-2 -NM_022658 MIR6859-1 -NM_153633 WASH7P -NM_153693 WASH7P -NM_173860 WASH7P -NR_003084 WASH7P +NM_001168316 UGT3A2 +NM_174914 UGT3A2 +NR_031764 UGT3A2 +NM_004503 HOXC6 +NM_006897 HOXC9 +NM_014212 HOXC11 +NM_014620 HOXC4 +NM_017409 HOXC10 +NM_017410 HOXC13 +NM_018953 HOXC5 +NM_022658 HOXC8 +NM_153633 HOXC4 +NM_153693 HOXC6 +NM_173860 HOXC12 +NR_003084 HOXC5