# HG changeset patch # User iuc # Date 1543929546 18000 # Node ID c56e0689e46e64617ace4c0b53309b130f1c82ff # Parent 3bf1b3ec1ddf3962b56bd99f4d0c4d033418ce6b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5b6dc96c6e14582d5bb1dc213ac8d26dc7b2829e diff -r 3bf1b3ec1ddf -r c56e0689e46e deseq2.R --- a/deseq2.R Fri Nov 16 14:47:19 2018 -0500 +++ b/deseq2.R Tue Dec 04 08:19:06 2018 -0500 @@ -57,7 +57,7 @@ "plots" , "p", 1, "character", "tximport", "i", 0, "logical", "txtype", "y", 1, "character", - "tx2gene", "x", 1, "character", # a space-sep tx-to-gene map or GTF file (auto detect .gtf/.GTF) + "tx2gene", "x", 1, "character", # a space-sep tx-to-gene map or GTF/GFF3 file "esf", "e", 1, "character", "fit_type", "t", 1, "integer", "many_contrasts", "m", 0, "logical", diff -r 3bf1b3ec1ddf -r c56e0689e46e deseq2.xml --- a/deseq2.xml Fri Nov 16 14:47:19 2018 -0500 +++ b/deseq2.xml Tue Dec 04 08:19:06 2018 -0500 @@ -1,11 +1,16 @@ - + Determines differentially expressed features from count tables - bioconductor-deseq2 - bioconductor-tximport - bioconductor-genomicfeatures - r-ggrepel - r-pheatmap + bioconductor-deseq2 + + bioconductor-rhdf5 + bioconductor-tximport + bioconductor-genomicfeatures + r-getopt + r-ggrepel + r-gplots + r-pheatmap + r-rjson - - + + - + - + @@ -190,7 +195,7 @@ help=" DESeq2 performs independent filtering by default using the mean of normalized counts as a filter statistic" /> - + many_contrasts is False @@ -200,16 +205,16 @@ many_contrasts is True - + pdf == True - + normCounts == True - + normRLog == True - + normVST == True @@ -251,7 +256,7 @@ - + @@ -315,7 +320,7 @@ - + @@ -339,7 +344,31 @@ - + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 3bf1b3ec1ddf -r c56e0689e46e get_deseq_dataset.R --- a/get_deseq_dataset.R Fri Nov 16 14:47:19 2018 -0500 +++ b/get_deseq_dataset.R Tue Dec 04 08:19:06 2018 -0500 @@ -9,11 +9,11 @@ } if (!is.null(tximport)) { - if (is.null(tx2gene)) stop("A transcript-to-gene map or a GTF file is required for tximport") - if (tolower(file_ext(opt$tx2gene)) == "gtf") { - gtfFile <-tx2gene + if (is.null(tx2gene)) stop("A transcript-to-gene map or a GTF/GFF3 file is required for tximport") + if (tolower(file_ext(opt$tx2gene)) == "gff") { + gffFile <-tx2gene } else { - gtfFile <- NULL + gffFile <- NULL tx2gene <- read.table(tx2gene, header=FALSE) } useTXI <- TRUE @@ -45,22 +45,26 @@ } else { # construct the object using tximport - # first need to make the tx2gene table - # this takes ~2-3 minutes using Bioconductor functions - if (!is.null(gtfFile)) { - suppressPackageStartupMessages({ - library("GenomicFeatures") - }) - txdb <- makeTxDbFromGFF(gtfFile, format="gtf") - k <- keys(txdb, keytype = "GENEID") - df <- select(txdb, keys = k, keytype = "GENEID", columns = "TXNAME") - tx2gene <- df[, 2:1] # tx ID, then gene ID - } library("tximport") txiFiles <- as.character(sampleTable$filename) labs <- row.names(sampleTable) names(txiFiles) <- labs - txi <- tximport(txiFiles, type=txtype, tx2gene=tx2gene) + if (!is.null(gffFile)) { + # first need to make the tx2gene table + # this takes ~2-3 minutes using Bioconductor functions + suppressPackageStartupMessages({ + library("GenomicFeatures") + }) + txdb <- makeTxDbFromGFF(gffFile) + k <- keys(txdb, keytype = "TXNAME") + tx2gene <- select(txdb, k, "GENEID", "TXNAME") + } + try(txi <- tximport(txiFiles, type=txtype, tx2gene=tx2gene)) + if (!exists("txi")) { + # Remove version from transcript IDs + tx2gene$TXNAME <- sub('\\.[0-9]+', '', tx2gene$TXNAME) + txi <- tximport(txiFiles, type=txtype, tx2gene=tx2gene) + } dds <- DESeqDataSetFromTximport(txi, subset(sampleTable, select=-c(filename)), designFormula) diff -r 3bf1b3ec1ddf -r c56e0689e46e test-data/GRCh38_latest_genomic.gff --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/GRCh38_latest_genomic.gff Tue Dec 04 08:19:06 2018 -0500 @@ -0,0 +1,86 @@ +##gff-version 3 +#!gff-spec-version 1.21 +#!processor NCBI annotwriter +#!genome-build GRCh38.p12 +#!genome-build-accession NCBI_Assembly:GCF_000001405.38 +#!annotation-source NCBI Homo sapiens Annotation Release 109 +# Trimmed version of ftp://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/annotation/GRCh38_latest/refseq_identifiers/GRCh38_latest_genomic.gff.gz +##sequence-region NC_000005.10 1 181538259 +##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9606 +NC_000005.10 RefSeq region 1 181538259 . + . ID=id565344;Dbxref=taxon:9606;Name=5;chromosome=5;gbkey=Src;genome=chromosome;mol_type=genomic DNA +NC_000005.10 BestRefSeq%2CGnomon gene 36035017 36071358 . - . ID=gene14857;Dbxref=GeneID:167127,HGNC:HGNC:27266,MIM:616384;Name=UGT3A2;description=UDP glycosyltransferase family 3 member A2;gbkey=Gene;gene=UGT3A2;gene_biotype=protein_coding +NC_000005.10 BestRefSeq mRNA 36035017 36066921 . - . ID=rna45581;Parent=gene14857;Dbxref=GeneID:167127,Genbank:NM_001168316.1,HGNC:HGNC:27266,MIM:616384;Name=NM_001168316.1;gbkey=mRNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 2;transcript_id=NM_001168316.1 +NC_000005.10 BestRefSeq exon 36066696 36066921 . - . ID=id576076;Parent=rna45581;Dbxref=GeneID:167127,Genbank:NM_001168316.1,HGNC:HGNC:27266,MIM:616384;gbkey=mRNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 2;transcript_id=NM_001168316.1 +NC_000005.10 BestRefSeq exon 36051870 36051984 . - . ID=id576077;Parent=rna45581;Dbxref=GeneID:167127,Genbank:NM_001168316.1,HGNC:HGNC:27266,MIM:616384;gbkey=mRNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 2;transcript_id=NM_001168316.1 +NC_000005.10 BestRefSeq exon 36048889 36049420 . - . ID=id576078;Parent=rna45581;Dbxref=GeneID:167127,Genbank:NM_001168316.1,HGNC:HGNC:27266,MIM:616384;gbkey=mRNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 2;transcript_id=NM_001168316.1 +NC_000005.10 BestRefSeq exon 36039477 36039708 . - . ID=id576079;Parent=rna45581;Dbxref=GeneID:167127,Genbank:NM_001168316.1,HGNC:HGNC:27266,MIM:616384;gbkey=mRNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 2;transcript_id=NM_001168316.1 +NC_000005.10 BestRefSeq exon 36037797 36038016 . - . ID=id576080;Parent=rna45581;Dbxref=GeneID:167127,Genbank:NM_001168316.1,HGNC:HGNC:27266,MIM:616384;gbkey=mRNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 2;transcript_id=NM_001168316.1 +NC_000005.10 BestRefSeq exon 36035017 36035974 . - . ID=id576081;Parent=rna45581;Dbxref=GeneID:167127,Genbank:NM_001168316.1,HGNC:HGNC:27266,MIM:616384;gbkey=mRNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 2;transcript_id=NM_001168316.1 +NC_000005.10 BestRefSeq transcript 36035017 36066921 . - . ID=rna45582;Parent=gene14857;Dbxref=GeneID:167127,Genbank:NR_031764.1,HGNC:HGNC:27266,MIM:616384;Name=NR_031764.1;gbkey=misc_RNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 3;transcript_id=NR_031764.1 +NC_000005.10 BestRefSeq exon 36066696 36066921 . - . ID=id576082;Parent=rna45582;Dbxref=GeneID:167127,Genbank:NR_031764.1,HGNC:HGNC:27266,MIM:616384;gbkey=misc_RNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 3;transcript_id=NR_031764.1 +NC_000005.10 BestRefSeq exon 36064249 36064350 . - . ID=id576083;Parent=rna45582;Dbxref=GeneID:167127,Genbank:NR_031764.1,HGNC:HGNC:27266,MIM:616384;gbkey=misc_RNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 3;transcript_id=NR_031764.1 +NC_000005.10 BestRefSeq exon 36051870 36051984 . - . ID=id576084;Parent=rna45582;Dbxref=GeneID:167127,Genbank:NR_031764.1,HGNC:HGNC:27266,MIM:616384;gbkey=misc_RNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 3;transcript_id=NR_031764.1 +NC_000005.10 BestRefSeq exon 36039477 36039708 . - . ID=id576085;Parent=rna45582;Dbxref=GeneID:167127,Genbank:NR_031764.1,HGNC:HGNC:27266,MIM:616384;gbkey=misc_RNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 3;transcript_id=NR_031764.1 +NC_000005.10 BestRefSeq exon 36037797 36038016 . - . ID=id576086;Parent=rna45582;Dbxref=GeneID:167127,Genbank:NR_031764.1,HGNC:HGNC:27266,MIM:616384;gbkey=misc_RNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 3;transcript_id=NR_031764.1 +NC_000005.10 BestRefSeq exon 36035017 36035974 . - . ID=id576087;Parent=rna45582;Dbxref=GeneID:167127,Genbank:NR_031764.1,HGNC:HGNC:27266,MIM:616384;gbkey=misc_RNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 3;transcript_id=NR_031764.1 +NC_000005.10 BestRefSeq mRNA 36035017 36066921 . - . ID=rna45583;Parent=gene14857;Dbxref=GeneID:167127,Genbank:NM_174914.3,HGNC:HGNC:27266,MIM:616384;Name=NM_174914.3;gbkey=mRNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 1;transcript_id=NM_174914.3 +NC_000005.10 BestRefSeq exon 36066696 36066921 . - . ID=id576088;Parent=rna45583;Dbxref=GeneID:167127,Genbank:NM_174914.3,HGNC:HGNC:27266,MIM:616384;gbkey=mRNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 1;transcript_id=NM_174914.3 +NC_000005.10 BestRefSeq exon 36064249 36064350 . - . ID=id576089;Parent=rna45583;Dbxref=GeneID:167127,Genbank:NM_174914.3,HGNC:HGNC:27266,MIM:616384;gbkey=mRNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 1;transcript_id=NM_174914.3 +NC_000005.10 BestRefSeq exon 36051870 36051984 . - . ID=id576090;Parent=rna45583;Dbxref=GeneID:167127,Genbank:NM_174914.3,HGNC:HGNC:27266,MIM:616384;gbkey=mRNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 1;transcript_id=NM_174914.3 +NC_000005.10 BestRefSeq exon 36048889 36049420 . - . ID=id576091;Parent=rna45583;Dbxref=GeneID:167127,Genbank:NM_174914.3,HGNC:HGNC:27266,MIM:616384;gbkey=mRNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 1;transcript_id=NM_174914.3 +NC_000005.10 BestRefSeq exon 36039477 36039708 . - . ID=id576092;Parent=rna45583;Dbxref=GeneID:167127,Genbank:NM_174914.3,HGNC:HGNC:27266,MIM:616384;gbkey=mRNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 1;transcript_id=NM_174914.3 +NC_000005.10 BestRefSeq exon 36037797 36038016 . - . ID=id576093;Parent=rna45583;Dbxref=GeneID:167127,Genbank:NM_174914.3,HGNC:HGNC:27266,MIM:616384;gbkey=mRNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 1;transcript_id=NM_174914.3 +NC_000005.10 BestRefSeq exon 36035017 36035974 . - . ID=id576094;Parent=rna45583;Dbxref=GeneID:167127,Genbank:NM_174914.3,HGNC:HGNC:27266,MIM:616384;gbkey=mRNA;gene=UGT3A2;product=UDP glycosyltransferase family 3 member A2%2C transcript variant 1;transcript_id=NM_174914.3 +##sequence-region NC_000012.12 1 133275309 +##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9606 +NC_000012.12 RefSeq region 1 133275309 . + . ID=id1163836;Dbxref=taxon:9606;Name=12;chromosome=12;gbkey=Src;genome=chromosome;mol_type=genomic DNA +NC_000012.12 BestRefSeq gene 53938792 53946544 . + . ID=gene33473;Dbxref=GeneID:3229,HGNC:HGNC:5125,MIM:142976;Name=HOXC13;description=homeobox C13;gbkey=Gene;gene=HOXC13;gene_biotype=protein_coding;gene_synonym=ECTD9,HOX3,HOX3G +NC_000012.12 BestRefSeq mRNA 53938792 53946544 . + . ID=rna100330;Parent=gene33473;Dbxref=GeneID:3229,Genbank:NM_017410.2,HGNC:HGNC:5125,MIM:142976;Name=NM_017410.2;gbkey=mRNA;gene=HOXC13;product=homeobox C13;transcript_id=NM_017410.2 +NC_000012.12 BestRefSeq exon 53938792 53939642 . + . ID=id1209110;Parent=rna100330;Dbxref=GeneID:3229,Genbank:NM_017410.2,HGNC:HGNC:5125,MIM:142976;gbkey=mRNA;gene=HOXC13;product=homeobox C13;transcript_id=NM_017410.2 +NC_000012.12 BestRefSeq exon 53945000 53946544 . + . ID=id1209111;Parent=rna100330;Dbxref=GeneID:3229,Genbank:NM_017410.2,HGNC:HGNC:5125,MIM:142976;gbkey=mRNA;gene=HOXC13;product=homeobox C13;transcript_id=NM_017410.2 +NC_000012.12 BestRefSeq gene 53954868 53956606 . + . ID=gene33475;Dbxref=GeneID:3228,HGNC:HGNC:5124,MIM:142975;Name=HOXC12;description=homeobox C12;gbkey=Gene;gene=HOXC12;gene_biotype=protein_coding;gene_synonym=HOC3F,HOX3,HOX3F +NC_000012.12 BestRefSeq mRNA 53954868 53956606 . + . ID=rna100332;Parent=gene33475;Dbxref=GeneID:3228,Genbank:NM_173860.2,HGNC:HGNC:5124,MIM:142975;Name=NM_173860.2;gbkey=mRNA;gene=HOXC12;product=homeobox C12;transcript_id=NM_173860.2 +NC_000012.12 BestRefSeq exon 53954868 53955539 . + . ID=id1209115;Parent=rna100332;Dbxref=GeneID:3228,Genbank:NM_173860.2,HGNC:HGNC:5124,MIM:142975;gbkey=mRNA;gene=HOXC12;product=homeobox C12;transcript_id=NM_173860.2 +NC_000012.12 BestRefSeq exon 53956328 53956606 . + . ID=id1209116;Parent=rna100332;Dbxref=GeneID:3228,Genbank:NM_173860.2,HGNC:HGNC:5124,MIM:142975;gbkey=mRNA;gene=HOXC12;product=homeobox C12;transcript_id=NM_173860.2 +NC_000012.12 BestRefSeq gene 53973126 53976419 . + . ID=gene33477;Dbxref=GeneID:3227,HGNC:HGNC:5123,MIM:605559;Name=HOXC11;description=homeobox C11;gbkey=Gene;gene=HOXC11;gene_biotype=protein_coding;gene_synonym=HOX3H +NC_000012.12 BestRefSeq mRNA 53973126 53976419 . + . ID=rna100336;Parent=gene33477;Dbxref=GeneID:3227,Genbank:NM_014212.3,HGNC:HGNC:5123,MIM:605559;Name=NM_014212.3;Note=The RefSeq transcript has 1 non-frameshifting indel compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=mRNA;gene=HOXC11;inference=similar to RNA sequence%2C mRNA (same species):RefSeq:NM_014212.3;product=homeobox C11;transcript_id=NM_014212.3 +NC_000012.12 BestRefSeq exon 53973126 53973923 . + . ID=id1209133;Parent=rna100336;Dbxref=GeneID:3227,Genbank:NM_014212.3,HGNC:HGNC:5123,MIM:605559;Note=The RefSeq transcript has 1 non-frameshifting indel compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=mRNA;gene=HOXC11;inference=similar to RNA sequence%2C mRNA (same species):RefSeq:NM_014212.3;product=homeobox C11;transcript_id=NM_014212.3 +NC_000012.12 BestRefSeq exon 53975181 53976419 . + . ID=id1209134;Parent=rna100336;Dbxref=GeneID:3227,Genbank:NM_014212.3,HGNC:HGNC:5123,MIM:605559;Note=The RefSeq transcript has 1 non-frameshifting indel compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=mRNA;gene=HOXC11;inference=similar to RNA sequence%2C mRNA (same species):RefSeq:NM_014212.3;product=homeobox C11;transcript_id=NM_014212.3 +NC_000012.12 BestRefSeq gene 53985162 53990279 . + . ID=gene33479;Dbxref=GeneID:3226,HGNC:HGNC:5122,MIM:605560;Name=HOXC10;description=homeobox C10;gbkey=Gene;gene=HOXC10;gene_biotype=protein_coding;gene_synonym=HOX3I +NC_000012.12 BestRefSeq mRNA 53985162 53990279 . + . ID=rna100338;Parent=gene33479;Dbxref=GeneID:3226,Genbank:NM_017409.3,HGNC:HGNC:5122,MIM:605560;Name=NM_017409.3;gbkey=mRNA;gene=HOXC10;product=homeobox C10;transcript_id=NM_017409.3 +NC_000012.12 BestRefSeq exon 53985162 53986010 . + . ID=id1209144;Parent=rna100338;Dbxref=GeneID:3226,Genbank:NM_017409.3,HGNC:HGNC:5122,MIM:605560;gbkey=mRNA;gene=HOXC10;product=homeobox C10;transcript_id=NM_017409.3 +NC_000012.12 BestRefSeq exon 53989169 53990279 . + . ID=id1209145;Parent=rna100338;Dbxref=GeneID:3226,Genbank:NM_017409.3,HGNC:HGNC:5122,MIM:605560;gbkey=mRNA;gene=HOXC10;product=homeobox C10;transcript_id=NM_017409.3 +NC_000012.12 BestRefSeq gene 54000119 54003337 . + . ID=gene33483;Dbxref=GeneID:3225,HGNC:HGNC:5130,MIM:142971;Name=HOXC9;description=homeobox C9;gbkey=Gene;gene=HOXC9;gene_biotype=protein_coding;gene_synonym=HOX3,HOX3B +NC_000012.12 BestRefSeq mRNA 54000119 54003337 . + . ID=rna100344;Parent=gene33483;Dbxref=GeneID:3225,Genbank:NM_006897.2,HGNC:HGNC:5130,MIM:142971;Name=NM_006897.2;gbkey=mRNA;gene=HOXC9;product=homeobox C9;transcript_id=NM_006897.2 +NC_000012.12 BestRefSeq exon 54000119 54000726 . + . ID=id1209154;Parent=rna100344;Dbxref=GeneID:3225,Genbank:NM_006897.2,HGNC:HGNC:5130,MIM:142971;gbkey=mRNA;gene=HOXC9;product=homeobox C9;transcript_id=NM_006897.2 +NC_000012.12 BestRefSeq exon 54002430 54003337 . + . ID=id1209155;Parent=rna100344;Dbxref=GeneID:3225,Genbank:NM_006897.2,HGNC:HGNC:5130,MIM:142971;gbkey=mRNA;gene=HOXC9;product=homeobox C9;transcript_id=NM_006897.2 +NC_000012.12 BestRefSeq gene 54009106 54012763 . + . ID=gene33485;Dbxref=GeneID:3224,HGNC:HGNC:5129,MIM:142970;Name=HOXC8;description=homeobox C8;gbkey=Gene;gene=HOXC8;gene_biotype=protein_coding;gene_synonym=HOX3,HOX3A +NC_000012.12 BestRefSeq mRNA 54009106 54012763 . + . ID=rna100346;Parent=gene33485;Dbxref=GeneID:3224,Genbank:NM_022658.3,HGNC:HGNC:5129,MIM:142970;Name=NM_022658.3;gbkey=mRNA;gene=HOXC8;product=homeobox C8;transcript_id=NM_022658.3 +NC_000012.12 BestRefSeq exon 54009106 54009720 . + . ID=id1209158;Parent=rna100346;Dbxref=GeneID:3224,Genbank:NM_022658.3,HGNC:HGNC:5129,MIM:142970;gbkey=mRNA;gene=HOXC8;product=homeobox C8;transcript_id=NM_022658.3 +NC_000012.12 BestRefSeq exon 54011089 54012763 . + . ID=id1209159;Parent=rna100346;Dbxref=GeneID:3224,Genbank:NM_022658.3,HGNC:HGNC:5129,MIM:142970;gbkey=mRNA;gene=HOXC8;product=homeobox C8;transcript_id=NM_022658.3 +NC_000012.12 BestRefSeq gene 54016852 54056030 . + . ID=gene33486;Dbxref=GeneID:3221,HGNC:HGNC:5126,MIM:142974;Name=HOXC4;description=homeobox C4;gbkey=Gene;gene=HOXC4;gene_biotype=protein_coding;gene_synonym=cp19,HOX3,HOX3E +NC_000012.12 BestRefSeq mRNA 54016852 54056030 . + . ID=rna100347;Parent=gene33486;Dbxref=GeneID:3221,Genbank:NM_014620.5,HGNC:HGNC:5126,MIM:142974;Name=NM_014620.5;gbkey=mRNA;gene=HOXC4;product=homeobox C4%2C transcript variant 1;transcript_id=NM_014620.5 +NC_000012.12 BestRefSeq exon 54016852 54017414 . + . ID=id1209160;Parent=rna100347;Dbxref=GeneID:3221,Genbank:NM_014620.5,HGNC:HGNC:5126,MIM:142974;gbkey=mRNA;gene=HOXC4;product=homeobox C4%2C transcript variant 1;transcript_id=NM_014620.5 +NC_000012.12 BestRefSeq exon 54053160 54053276 . + . ID=id1209161;Parent=rna100347;Dbxref=GeneID:3221,Genbank:NM_014620.5,HGNC:HGNC:5126,MIM:142974;gbkey=mRNA;gene=HOXC4;product=homeobox C4%2C transcript variant 1;transcript_id=NM_014620.5 +NC_000012.12 BestRefSeq exon 54053917 54054361 . + . ID=id1209162;Parent=rna100347;Dbxref=GeneID:3221,Genbank:NM_014620.5,HGNC:HGNC:5126,MIM:142974;gbkey=mRNA;gene=HOXC4;product=homeobox C4%2C transcript variant 1;transcript_id=NM_014620.5 +NC_000012.12 BestRefSeq exon 54054850 54056030 . + . ID=id1209163;Parent=rna100347;Dbxref=GeneID:3221,Genbank:NM_014620.5,HGNC:HGNC:5126,MIM:142974;gbkey=mRNA;gene=HOXC4;product=homeobox C4%2C transcript variant 1;transcript_id=NM_014620.5 +NC_000012.12 BestRefSeq mRNA 54053877 54056030 . + . ID=rna100348;Parent=gene33486;Dbxref=GeneID:3221,Genbank:NM_153633.2,HGNC:HGNC:5126,MIM:142974;Name=NM_153633.2;gbkey=mRNA;gene=HOXC4;product=homeobox C4%2C transcript variant 2;transcript_id=NM_153633.2 +NC_000012.12 BestRefSeq exon 54053877 54054361 . + . ID=id1209164;Parent=rna100348;Dbxref=GeneID:3221,Genbank:NM_153633.2,HGNC:HGNC:5126,MIM:142974;gbkey=mRNA;gene=HOXC4;product=homeobox C4%2C transcript variant 2;transcript_id=NM_153633.2 +NC_000012.12 BestRefSeq exon 54054850 54056030 . + . ID=id1209165;Parent=rna100348;Dbxref=GeneID:3221,Genbank:NM_153633.2,HGNC:HGNC:5126,MIM:142974;gbkey=mRNA;gene=HOXC4;product=homeobox C4%2C transcript variant 2;transcript_id=NM_153633.2 +NC_000012.12 BestRefSeq gene 54016852 54035361 . + . ID=gene33487;Dbxref=GeneID:3222,HGNC:HGNC:5127,MIM:142973;Name=HOXC5;description=homeobox C5;gbkey=Gene;gene=HOXC5;gene_biotype=protein_coding;gene_synonym=CP11,HOX3,HOX3D +NC_000012.12 BestRefSeq transcript 54016852 54035361 . + . ID=rna100349;Parent=gene33487;Dbxref=GeneID:3222,Genbank:NR_003084.2,HGNC:HGNC:5127,MIM:142973;Name=NR_003084.2;gbkey=misc_RNA;gene=HOXC5;product=homeobox C5%2C transcript variant 2;transcript_id=NR_003084.2 +NC_000012.12 BestRefSeq exon 54016852 54017414 . + . ID=id1209166;Parent=rna100349;Dbxref=GeneID:3222,Genbank:NR_003084.2,HGNC:HGNC:5127,MIM:142973;gbkey=misc_RNA;gene=HOXC5;product=homeobox C5%2C transcript variant 2;transcript_id=NR_003084.2 +NC_000012.12 BestRefSeq exon 54034278 54035361 . + . ID=id1209167;Parent=rna100349;Dbxref=GeneID:3222,Genbank:NR_003084.2,HGNC:HGNC:5127,MIM:142973;gbkey=misc_RNA;gene=HOXC5;product=homeobox C5%2C transcript variant 2;transcript_id=NR_003084.2 +NC_000012.12 BestRefSeq mRNA 54033048 54035361 . + . ID=rna100350;Parent=gene33487;Dbxref=GeneID:3222,Genbank:NM_018953.3,HGNC:HGNC:5127,MIM:142973;Name=NM_018953.3;gbkey=mRNA;gene=HOXC5;product=homeobox C5%2C transcript variant 1;transcript_id=NM_018953.3 +NC_000012.12 BestRefSeq exon 54033048 54033576 . + . ID=id1209168;Parent=rna100350;Dbxref=GeneID:3222,Genbank:NM_018953.3,HGNC:HGNC:5127,MIM:142973;gbkey=mRNA;gene=HOXC5;product=homeobox C5%2C transcript variant 1;transcript_id=NM_018953.3 +NC_000012.12 BestRefSeq exon 54034278 54035361 . + . ID=id1209169;Parent=rna100350;Dbxref=GeneID:3222,Genbank:NM_018953.3,HGNC:HGNC:5127,MIM:142973;gbkey=mRNA;gene=HOXC5;product=homeobox C5%2C transcript variant 1;transcript_id=NM_018953.3 +NC_000012.12 BestRefSeq gene 54016852 54030823 . + . ID=gene33488;Dbxref=GeneID:3223,HGNC:HGNC:5128,MIM:142972;Name=HOXC6;description=homeobox C6;gbkey=Gene;gene=HOXC6;gene_biotype=protein_coding;gene_synonym=CP25,HHO.C8,HOX3,HOX3C +NC_000012.12 BestRefSeq mRNA 54016852 54030823 . + . ID=rna100351;Parent=gene33488;Dbxref=GeneID:3223,Genbank:NM_153693.4,HGNC:HGNC:5128,MIM:142972;Name=NM_153693.4;gbkey=mRNA;gene=HOXC6;product=homeobox C6%2C transcript variant 2;transcript_id=NM_153693.4 +NC_000012.12 BestRefSeq exon 54016852 54017414 . + . ID=id1209172;Parent=rna100351;Dbxref=GeneID:3223,Genbank:NM_153693.4,HGNC:HGNC:5128,MIM:142972;gbkey=mRNA;gene=HOXC6;product=homeobox C6%2C transcript variant 2;transcript_id=NM_153693.4 +NC_000012.12 BestRefSeq exon 54028576 54028921 . + . ID=id1209173;Parent=rna100351;Dbxref=GeneID:3223,Genbank:NM_153693.4,HGNC:HGNC:5128,MIM:142972;gbkey=mRNA;gene=HOXC6;product=homeobox C6%2C transcript variant 2;transcript_id=NM_153693.4 +NC_000012.12 BestRefSeq exon 54029655 54030823 . + . ID=id1209174;Parent=rna100351;Dbxref=GeneID:3223,Genbank:NM_153693.4,HGNC:HGNC:5128,MIM:142972;gbkey=mRNA;gene=HOXC6;product=homeobox C6%2C transcript variant 2;transcript_id=NM_153693.4 +NC_000012.12 BestRefSeq mRNA 54028410 54030823 . + . ID=rna100352;Parent=gene33488;Dbxref=GeneID:3223,Genbank:NM_004503.3,HGNC:HGNC:5128,MIM:142972;Name=NM_004503.3;gbkey=mRNA;gene=HOXC6;product=homeobox C6%2C transcript variant 1;transcript_id=NM_004503.3 +NC_000012.12 BestRefSeq exon 54028410 54028921 . + . ID=id1209175;Parent=rna100352;Dbxref=GeneID:3223,Genbank:NM_004503.3,HGNC:HGNC:5128,MIM:142972;gbkey=mRNA;gene=HOXC6;product=homeobox C6%2C transcript variant 1;transcript_id=NM_004503.3 +NC_000012.12 BestRefSeq exon 54029655 54030823 . + . ID=id1209176;Parent=rna100352;Dbxref=GeneID:3223,Genbank:NM_004503.3,HGNC:HGNC:5128,MIM:142972;gbkey=mRNA;gene=HOXC6;product=homeobox C6%2C transcript variant 1;transcript_id=NM_004503.3 +NC_000012.12 RefSeq cDNA_match 53973126 53973923 798 + . ID=46c4f9c5-f2cf-415e-a892-fbd053a8f7eb;Target=NM_014212.3 1 798 +;assembly_bases_aln=152;assembly_bases_seq=152;consensus_splices=2;exon_identity=0.991241;for_remapping=2;gap_count=1;identity=0.991241;idty=1;matches=2037;num_ident=2037;num_mismatch=0;pct_coverage=99.1241;pct_coverage_hiqual=99.1241;pct_identity_gap=99.1241;pct_identity_ungap=100;product_coverage=1;rank=1;splices=2;weighted_identity=0.991461 +NC_000012.12 RefSeq cDNA_match 53975181 53976419 1232.82 + . ID=46c4f9c5-f2cf-415e-a892-fbd053a8f7eb;Target=NM_014212.3 799 2055 +;assembly_bases_aln=152;assembly_bases_seq=152;consensus_splices=2;exon_identity=0.991241;for_remapping=2;gap_count=1;identity=0.991241;idty=0.98568;matches=2037;num_ident=2037;num_mismatch=0;pct_coverage=99.1241;pct_coverage_hiqual=99.1241;pct_identity_gap=99.1241;pct_identity_ungap=100;product_coverage=1;rank=1;splices=2;weighted_identity=0.991461;Gap=M705 I18 M534 diff -r 3bf1b3ec1ddf -r c56e0689e46e test-data/tx2gene.tab --- a/test-data/tx2gene.tab Fri Nov 16 14:47:19 2018 -0500 +++ b/test-data/tx2gene.tab Tue Dec 04 08:19:06 2018 -0500 @@ -1,16 +1,16 @@ TXNAME GENEID -NM_001168316 DDX11L1 -NM_174914 DDX11L1 -NR_031764 DDX11L1 -NM_004503 WASH7P -NM_006897 WASH7P -NM_014212 WASH7P -NM_014620 WASH7P -NM_017409 WASH7P -NM_017410 WASH7P -NM_018953 MIR6859-2 -NM_022658 MIR6859-1 -NM_153633 WASH7P -NM_153693 WASH7P -NM_173860 WASH7P -NR_003084 WASH7P +NM_001168316 UGT3A2 +NM_174914 UGT3A2 +NR_031764 UGT3A2 +NM_004503 HOXC6 +NM_006897 HOXC9 +NM_014212 HOXC11 +NM_014620 HOXC4 +NM_017409 HOXC10 +NM_017410 HOXC13 +NM_018953 HOXC5 +NM_022658 HOXC8 +NM_153633 HOXC4 +NM_153693 HOXC6 +NM_173860 HOXC12 +NR_003084 HOXC5