changeset 0:2ca1baabdae0 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/length_and_gc_content commit b7dcd020c6a15fa55f392cc09cbc37580d6e75c4
author iuc
date Thu, 17 Nov 2016 16:41:06 -0500
parents
children f088370d2a3c
files all_fasta.loc.sample get_length_and_gc_content.r get_length_and_gc_content.xml test-data/gc.tab test-data/gene_length.tab test-data/in.fasta test-data/in.gtf test-data/length.tab tool_data_table_conf.xml.sample
diffstat 9 files changed, 1131 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/all_fasta.loc.sample	Thu Nov 17 16:41:06 2016 -0500
@@ -0,0 +1,18 @@
+#This file lists the locations and dbkeys of all the fasta files
+#under the "genome" directory (a directory that contains a directory
+#for each build). The script extract_fasta.py will generate the file
+#all_fasta.loc. This file has the format (white space characters are
+#TAB characters):
+#
+#<unique_build_id>	<dbkey>	<display_name>	<file_path>
+#
+#So, all_fasta.loc could look something like this:
+#
+#apiMel3	apiMel3	Honeybee (Apis mellifera): apiMel3	/path/to/genome/apiMel3/apiMel3.fa
+#hg19canon	hg19	Human (Homo sapiens): hg19 Canonical	/path/to/genome/hg19/hg19canon.fa
+#hg19full	hg19	Human (Homo sapiens): hg19 Full	/path/to/genome/hg19/hg19full.fa
+#
+#Your all_fasta.loc file should contain an entry for each individual
+#fasta file. So there will be multiple fasta files for each build,
+#such as with hg19 above.
+#
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/get_length_and_gc_content.r	Thu Nov 17 16:41:06 2016 -0500
@@ -0,0 +1,59 @@
+# originally by Devon Ryan, https://www.biostars.org/p/84467/
+
+options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
+
+# we need that to not crash galaxy with an UTF8 error on German LC settings.
+loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
+
+suppressPackageStartupMessages({
+    library("GenomicRanges")
+    library("rtracklayer")
+    library("Rsamtools")
+    library("optparse")
+    library("data.table")
+})
+
+option_list <- list(
+    make_option(c("-g","--gtf"), type="character", help="Input GTF file with gene / exon information."),
+    make_option(c("-f","--fasta"), type="character", default=FALSE, help="Fasta file that corresponds to the supplied GTF."),
+    make_option(c("-l","--length"), type="character", default=FALSE, help="Output file with gene name and length."),
+    make_option(c("-gc","--gc_content"), type="character", default=FALSE, help="Output file with gene name and GC content.")
+  )
+
+parser <- OptionParser(usage = "%prog [options] file", option_list=option_list)
+args = parse_args(parser)
+
+GTFfile = args$gtf
+FASTAfile = args$fasta
+length = args$length
+gc_content = args$gc_content
+
+#Load the annotation and reduce it
+GTF <- import.gff(GTFfile, format="gtf", genome=NA, feature.type="exon")
+grl <- reduce(split(GTF, elementMetadata(GTF)$gene_id))
+reducedGTF <- unlist(grl, use.names=T)
+elementMetadata(reducedGTF)$gene_id <- rep(names(grl), elementNROWS(grl))
+
+#Open the fasta file
+FASTA <- FaFile(FASTAfile)
+open(FASTA)
+
+#Add the GC numbers
+elementMetadata(reducedGTF)$nGCs <- letterFrequency(getSeq(FASTA, reducedGTF), "GC")[,1]
+elementMetadata(reducedGTF)$widths <- width(reducedGTF)
+
+#Create a list of the ensembl_id/GC/length
+calc_GC_length <- function(x) {
+    nGCs = sum(elementMetadata(x)$nGCs)
+    width = sum(elementMetadata(x)$widths)
+    c(width, nGCs/width)
+}
+output <- t(sapply(split(reducedGTF, elementMetadata(reducedGTF)$gene_id), calc_GC_length))
+output <- data.frame(setDT(data.frame(output), keep.rownames = TRUE)[])
+
+
+write.table(output[,c(1,2)], file=length, col.names=FALSE, row.names=FALSE, quote=FALSE, sep="\t")
+write.table(output[,c(1,3)], file=gc_content, col.names=FALSE, row.names=FALSE, quote=FALSE, sep="\t")
+
+
+sessionInfo()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/get_length_and_gc_content.xml	Thu Nov 17 16:41:06 2016 -0500
@@ -0,0 +1,89 @@
+<tool id="length_and_gc_content" name="Gene length and gc content" version="0.1.0">
+    <description>from GTF file</description>
+    <requirements>
+        <requirement type="package" version="1.3.2">r-optparse</requirement>
+        <requirement type="package" version="1.4.1">r-reshape2</requirement>
+        <requirement type="package" version="1.9.6">r-data.table</requirement>
+        <requirement type="package" version="1.34.1">bioconductor-rtracklayer</requirement>
+    </requirements>
+    <stdio>
+        <regex match="Execution halted"
+               source="both"
+               level="fatal"
+               description="Execution halted." />
+        <regex match="Error in"
+               source="both"
+               level="fatal"
+               description="An undefined error occured, please check your input carefully and contact your administrator." />
+        <regex match="Fatal error"
+               source="both"
+               level="fatal"
+               description="An undefined error occured, please check your input carefully and contact your administrator." />
+    </stdio>
+    <command><![CDATA[
+        Rscript '$__tool_directory__'/get_length_and_gc_content.r --gtf '$gtf'
+        #if $fastaSource.genomeSource == 'indexed':
+            --fasta '$fastaSource.fasta_pre_installed.fields.path'
+        #else:
+            --fasta '$fastaSource.fasta_history'
+        #end if
+        --length '$length'
+        --gc_content '$gc_content'
+    ]]></command>
+    <inputs>
+        <param name="gtf" type="data" format="gtf" help="The GTF must match the FASTA file" label="GTF file for length and GC calculation"/>
+        <conditional name="fastaSource">
+            <param help="choose history if you don't see the correct genome fasta" label="Select a reference fasta from your history or use a built-in fasta?" name="genomeSource" type="select">
+                <option value="indexed">Use a built-in fasta</option>
+                <option value="history">Use fasta from history</option>
+            </param>
+            <when value="indexed">
+                <param name="fasta_pre_installed" type="select" help="Select the fasta file from a list of pre-installed genomes" label="Select a fasta sequence">
+                    <options from_data_table="all_fasta">
+                        <filter type="data_meta" key="dbkey" ref="gtf" column="0"/>
+                    </options>
+                    <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
+                </param>
+            </when>
+        <when value="history">
+            <param name="fasta_history" type="data" format="fasta" label="Select a fasta file that matches the supplied GTF file">
+                <options>
+                    <filter type="data_meta" key="dbkey" ref="gtf"/>
+                </options>
+                <validator type="no_options" message="The current history does not include a fasta dataset with the build associated with the selected input file"/>
+            </param>
+        </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="length" format="tabular" label="gene length">
+            <actions>
+                <action name="column_names" type="metadata" default="gene,length" />
+            </actions>
+        </data>
+        <data name="gc_content" format="tabular" label="gene gc content">
+             <actions>
+                <action name="column_names" type="metadata" default="gene,gc_content" />
+            </actions>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="gtf" value="in.gtf" ftype="gtf"></param>
+            <param name="fastaSource|genomeSource" value="history"></param>
+            <param name="fastaSource|fasta_history" value="in.fasta" ftype="fasta"></param>
+            <output name="length" file="length.tab"></output>
+            <output name="gc_content" file="gc.tab"></output>
+        </test>
+    </tests>
+    <help>
+
+        **What it does**
+
+        Returns a tabular file with gene id and length and a tabular file with gene id and GC content, based on a supplied GTF and a FASTA file.
+
+
+        </help>
+    <citations>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gc.tab	Thu Nov 17 16:41:06 2016 -0500
@@ -0,0 +1,1 @@
+ENSG00000162526	0.388349514563107
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gene_length.tab	Thu Nov 17 16:41:06 2016 -0500
@@ -0,0 +1,948 @@
+ENSG00000003096	6983
+ENSG00000004534	7302
+ENSG00000006327	1848
+ENSG00000006831	5878
+ENSG00000006837	3057
+ENSG00000007392	6176
+ENSG00000008735	5901
+ENSG00000009844	7262
+ENSG00000010322	9161
+ENSG00000010932	3602
+ENSG00000011638	2558
+ENSG00000012983	7620
+ENSG00000013275	2387
+ENSG00000014216	7553
+ENSG00000018408	8413
+ENSG00000018607	1774
+ENSG00000018699	3185
+ENSG00000022556	6313
+ENSG00000023041	3748
+ENSG00000023330	2536
+ENSG00000023697	2844
+ENSG00000023892	2545
+ENSG00000027697	2571
+ENSG00000029363	9297
+ENSG00000032389	5775
+ENSG00000033050	4448
+ENSG00000035403	9992
+ENSG00000042445	4005
+ENSG00000049541	2211
+ENSG00000057608	4033
+ENSG00000057935	10580
+ENSG00000059122	8419
+ENSG00000059588	7743
+ENSG00000063015	5115
+ENSG00000063322	3835
+ENSG00000064545	3281
+ENSG00000065000	8601
+ENSG00000065060	9908
+ENSG00000066739	14778
+ENSG00000066923	8583
+ENSG00000068028	3683
+ENSG00000068650	13733
+ENSG00000069712	4556
+ENSG00000070495	5969
+ENSG00000070610	4803
+ENSG00000070961	9493
+ENSG00000071889	5098
+ENSG00000072071	8487
+ENSG00000072121	15706
+ENSG00000072134	8517
+ENSG00000072864	5525
+ENSG00000072958	14371
+ENSG00000073614	12106
+ENSG00000074054	11052
+ENSG00000074071	1097
+ENSG00000074211	9198
+ENSG00000074319	3839
+ENSG00000074621	9084
+ENSG00000075399	3428
+ENSG00000076356	13781
+ENSG00000079215	6265
+ENSG00000079246	5463
+ENSG00000079785	3833
+ENSG00000079974	5870
+ENSG00000080603	13674
+ENSG00000080815	9947
+ENSG00000081087	5325
+ENSG00000082068	7039
+ENSG00000083535	4253
+ENSG00000083544	6814
+ENSG00000083720	4194
+ENSG00000084073	3572
+ENSG00000085365	5106
+ENSG00000085377	3762
+ENSG00000085982	9352
+ENSG00000085999	3212
+ENSG00000086205	3824
+ENSG00000086289	2864
+ENSG00000087586	2928
+ENSG00000088340	9067
+ENSG00000088448	4563
+ENSG00000089009	4447
+ENSG00000090020	5974
+ENSG00000090273	2856
+ENSG00000090402	6138
+ENSG00000091140	5299
+ENSG00000092068	5991
+ENSG00000092098	4982
+ENSG00000092208	2532
+ENSG00000092445	10945
+ENSG00000099139	12705
+ENSG00000099910	4646
+ENSG00000100014	7324
+ENSG00000100027	4713
+ENSG00000100038	6641
+ENSG00000100106	12238
+ENSG00000100191	2030
+ENSG00000100292	2405
+ENSG00000100336	4377
+ENSG00000100354	19998
+ENSG00000100441	8015
+ENSG00000100478	7223
+ENSG00000100526	1836
+ENSG00000100577	8477
+ENSG00000100852	10888
+ENSG00000101247	7217
+ENSG00000101294	10000
+ENSG00000101473	4213
+ENSG00000102030	4742
+ENSG00000102349	9022
+ENSG00000102606	11768
+ENSG00000102804	8586
+ENSG00000102901	5539
+ENSG00000103035	2740
+ENSG00000103121	12781
+ENSG00000103932	7046
+ENSG00000104325	3507
+ENSG00000104331	7594
+ENSG00000104368	6618
+ENSG00000104450	5055
+ENSG00000105173	2550
+ENSG00000105220	9112
+ENSG00000105223	5184
+ENSG00000105325	5802
+ENSG00000105355	2813
+ENSG00000105438	2251
+ENSG00000105519	4428
+ENSG00000105568	7437
+ENSG00000105879	5481
+ENSG00000106012	9240
+ENSG00000106305	1838
+ENSG00000106683	6902
+ENSG00000106771	9484
+ENSG00000106789	5717
+ENSG00000106803	1190
+ENSG00000106868	4183
+ENSG00000106948	10601
+ENSG00000107295	2682
+ENSG00000107833	928
+ENSG00000108055	4275
+ENSG00000108091	7345
+ENSG00000108306	11038
+ENSG00000108591	7411
+ENSG00000108666	5740
+ENSG00000108848	8151
+ENSG00000108947	3222
+ENSG00000108953	3847
+ENSG00000108960	3177
+ENSG00000109079	3889
+ENSG00000109171	6524
+ENSG00000109610	2128
+ENSG00000109680	3434
+ENSG00000109771	7360
+ENSG00000109787	6297
+ENSG00000109920	7596
+ENSG00000109929	5566
+ENSG00000110002	6403
+ENSG00000110092	4830
+ENSG00000110906	9865
+ENSG00000111247	2558
+ENSG00000111249	7648
+ENSG00000111331	8251
+ENSG00000111652	3113
+ENSG00000111707	5731
+ENSG00000111860	9462
+ENSG00000111877	12496
+ENSG00000112062	6860
+ENSG00000112306	767
+ENSG00000112312	2476
+ENSG00000112365	5519
+ENSG00000112406	5614
+ENSG00000112531	17368
+ENSG00000112874	4304
+ENSG00000113048	6511
+ENSG00000113328	3096
+ENSG00000113621	5265
+ENSG00000113649	8714
+ENSG00000113812	4066
+ENSG00000113916	5938
+ENSG00000114026	8733
+ENSG00000114120	9277
+ENSG00000114315	2062
+ENSG00000114416	12054
+ENSG00000114735	19425
+ENSG00000114999	15732
+ENSG00000115107	4562
+ENSG00000115221	6357
+ENSG00000115464	18125
+ENSG00000115841	5999
+ENSG00000116127	14054
+ENSG00000116171	8305
+ENSG00000116209	9149
+ENSG00000116455	2805
+ENSG00000116580	11967
+ENSG00000116604	6400
+ENSG00000116685	3281
+ENSG00000116761	2414
+ENSG00000116977	9710
+ENSG00000117262	5557
+ENSG00000117410	3535
+ENSG00000117505	10949
+ENSG00000117569	12937
+ENSG00000117758	3846
+ENSG00000117868	7098
+ENSG00000117984	2999
+ENSG00000118096	3814
+ENSG00000118420	6394
+ENSG00000118503	5051
+ENSG00000118507	4394
+ENSG00000118564	4573
+ENSG00000118620	6845
+ENSG00000118655	3940
+ENSG00000118680	1761
+ENSG00000118707	3784
+ENSG00000118873	10344
+ENSG00000119041	7105
+ENSG00000119326	3131
+ENSG00000119421	844
+ENSG00000119669	4157
+ENSG00000119686	4913
+ENSG00000119723	6686
+ENSG00000120158	4215
+ENSG00000120217	4108
+ENSG00000120907	5848
+ENSG00000121390	3223
+ENSG00000121753	7756
+ENSG00000121851	1799
+ENSG00000121988	9115
+ENSG00000122194	6380
+ENSG00000122390	5639
+ENSG00000122644	3561
+ENSG00000122691	2033
+ENSG00000122966	13230
+ENSG00000123384	20839
+ENSG00000123472	5677
+ENSG00000123562	2648
+ENSG00000124103	1137
+ENSG00000124120	7044
+ENSG00000124181	9648
+ENSG00000124275	6498
+ENSG00000124496	7732
+ENSG00000124549	4177
+ENSG00000124574	8083
+ENSG00000124615	5174
+ENSG00000124783	11349
+ENSG00000125637	13442
+ENSG00000125691	5979
+ENSG00000125733	3982
+ENSG00000125734	3213
+ENSG00000125818	5296
+ENSG00000125901	1127
+ENSG00000125944	8676
+ENSG00000126001	16295
+ENSG00000126003	5705
+ENSG00000126226	6243
+ENSG00000126562	5089
+ENSG00000126858	6372
+ENSG00000127314	16942
+ENSG00000127399	2735
+ENSG00000127989	4890
+ENSG00000128016	2010
+ENSG00000128590	2837
+ENSG00000128604	4375
+ENSG00000128654	1717
+ENSG00000128731	20027
+ENSG00000128739	2919
+ENSG00000128965	2058
+ENSG00000128973	4225
+ENSG00000129354	2214
+ENSG00000129518	2030
+ENSG00000129559	3879
+ENSG00000129625	4230
+ENSG00000129932	2387
+ENSG00000130222	1322
+ENSG00000130227	6909
+ENSG00000130305	3414
+ENSG00000130349	2036
+ENSG00000130592	5399
+ENSG00000130695	5422
+ENSG00000130717	2688
+ENSG00000130770	2225
+ENSG00000130787	7102
+ENSG00000130827	11710
+ENSG00000131051	9307
+ENSG00000131100	3212
+ENSG00000131467	5152
+ENSG00000131759	4731
+ENSG00000132155	7847
+ENSG00000132274	6020
+ENSG00000132294	6106
+ENSG00000132604	5621
+ENSG00000132823	1712
+ENSG00000132879	3527
+ENSG00000133114	10842
+ENSG00000133138	9503
+ENSG00000133226	8168
+ENSG00000133302	7185
+ENSG00000133678	2996
+ENSG00000133816	15809
+ENSG00000133872	2990
+ENSG00000134109	6758
+ENSG00000134153	1538
+ENSG00000134291	3475
+ENSG00000134324	11861
+ENSG00000134326	4302
+ENSG00000134461	3059
+ENSG00000134463	2185
+ENSG00000134690	2469
+ENSG00000134765	4271
+ENSG00000134852	11762
+ENSG00000134905	10460
+ENSG00000134986	9337
+ENSG00000134987	7656
+ENSG00000135045	2327
+ENSG00000135048	9625
+ENSG00000135124	8159
+ENSG00000135250	6341
+ENSG00000135535	6944
+ENSG00000135537	8450
+ENSG00000135605	4030
+ENSG00000135679	13273
+ENSG00000135698	2190
+ENSG00000135930	7177
+ENSG00000136048	4180
+ENSG00000136238	3114
+ENSG00000136709	14580
+ENSG00000136720	4683
+ENSG00000136819	5099
+ENSG00000137210	4815
+ENSG00000137285	2194
+ENSG00000137642	19449
+ENSG00000137776	8157
+ENSG00000137806	1790
+ENSG00000137812	10250
+ENSG00000137824	5114
+ENSG00000137965	2038
+ENSG00000138028	4039
+ENSG00000138459	4853
+ENSG00000138587	2504
+ENSG00000138594	10632
+ENSG00000138685	6775
+ENSG00000138750	4062
+ENSG00000138798	6127
+ENSG00000138835	13793
+ENSG00000139192	3188
+ENSG00000139200	3266
+ENSG00000139233	7993
+ENSG00000139496	10422
+ENSG00000139718	8314
+ENSG00000139726	3576
+ENSG00000139910	10046
+ENSG00000139971	7565
+ENSG00000140265	6634
+ENSG00000140391	8506
+ENSG00000140396	10026
+ENSG00000140416	12492
+ENSG00000140459	3491
+ENSG00000141012	8402
+ENSG00000141179	5880
+ENSG00000141367	11836
+ENSG00000141404	8445
+ENSG00000141425	8442
+ENSG00000141448	3945
+ENSG00000141510	3936
+ENSG00000141699	5145
+ENSG00000141736	10321
+ENSG00000141756	4273
+ENSG00000142082	5202
+ENSG00000142208	11162
+ENSG00000142599	11503
+ENSG00000142676	1987
+ENSG00000142784	6024
+ENSG00000142871	2455
+ENSG00000142961	5916
+ENSG00000143198	8401
+ENSG00000143256	677
+ENSG00000143337	9615
+ENSG00000143401	3605
+ENSG00000143418	4644
+ENSG00000143458	9189
+ENSG00000143641	7743
+ENSG00000143727	4160
+ENSG00000143816	3971
+ENSG00000143919	3991
+ENSG00000143970	13023
+ENSG00000144524	6066
+ENSG00000145354	2607
+ENSG00000145494	1451
+ENSG00000145555	13963
+ENSG00000145604	3837
+ENSG00000145736	8676
+ENSG00000145990	10686
+ENSG00000146066	668
+ENSG00000146350	6912
+ENSG00000146425	2949
+ENSG00000146574	7316
+ENSG00000146733	2762
+ENSG00000146802	7995
+ENSG00000147050	7139
+ENSG00000147251	6721
+ENSG00000147364	11438
+ENSG00000147394	4812
+ENSG00000147439	5072
+ENSG00000147669	1711
+ENSG00000147684	2629
+ENSG00000148158	11578
+ENSG00000148334	3039
+ENSG00000148408	9897
+ENSG00000148484	4191
+ENSG00000148700	6541
+ENSG00000148834	1691
+ENSG00000149292	5302
+ENSG00000149308	7534
+ENSG00000149483	6348
+ENSG00000149679	3785
+ENSG00000150779	1166
+ENSG00000150991	3898
+ENSG00000151623	6607
+ENSG00000151657	6844
+ENSG00000151690	6031
+ENSG00000151718	9966
+ENSG00000151743	3195
+ENSG00000151748	5630
+ENSG00000151876	3014
+ENSG00000152270	11707
+ENSG00000152377	6174
+ENSG00000152457	6326
+ENSG00000152527	9835
+ENSG00000152642	4802
+ENSG00000152782	7638
+ENSG00000153130	6294
+ENSG00000153147	8385
+ENSG00000154065	6391
+ENSG00000154146	1309
+ENSG00000154269	3885
+ENSG00000154608	4737
+ENSG00000154743	5667
+ENSG00000155100	3977
+ENSG00000155313	7823
+ENSG00000155621	4973
+ENSG00000155636	4784
+ENSG00000155729	4915
+ENSG00000156050	5056
+ENSG00000156831	2991
+ENSG00000156973	2850
+ENSG00000157107	6127
+ENSG00000157343	1418
+ENSG00000157423	23408
+ENSG00000157625	12711
+ENSG00000157637	7716
+ENSG00000157703	6302
+ENSG00000157823	9237
+ENSG00000157833	5939
+ENSG00000158092	3727
+ENSG00000158158	5105
+ENSG00000158296	6055
+ENSG00000158406	397
+ENSG00000158467	6858
+ENSG00000158470	4722
+ENSG00000158669	7939
+ENSG00000159173	7524
+ENSG00000159259	5118
+ENSG00000159346	3193
+ENSG00000159921	5477
+ENSG00000160051	2478
+ENSG00000160172	3402
+ENSG00000160199	7621
+ENSG00000160213	2490
+ENSG00000161040	4009
+ENSG00000161277	1825
+ENSG00000161328	2769
+ENSG00000161692	7146
+ENSG00000161888	2839
+ENSG00000162063	6309
+ENSG00000162227	3149
+ENSG00000162402	11708
+ENSG00000162444	861
+ENSG00000162526	2191
+ENSG00000162669	5827
+ENSG00000162819	4980
+ENSG00000162851	1848
+ENSG00000162980	6716
+ENSG00000163083	3204
+ENSG00000163156	3265
+ENSG00000163191	680
+ENSG00000163320	6490
+ENSG00000163322	7777
+ENSG00000163348	3630
+ENSG00000163376	4736
+ENSG00000163521	3411
+ENSG00000163577	5593
+ENSG00000163743	6565
+ENSG00000163832	3313
+ENSG00000163848	10574
+ENSG00000163900	8668
+ENSG00000163960	11291
+ENSG00000164073	5023
+ENSG00000164104	2775
+ENSG00000164144	4290
+ENSG00000164209	8283
+ENSG00000164241	7190
+ENSG00000164252	5051
+ENSG00000164308	7422
+ENSG00000164332	2676
+ENSG00000164418	7375
+ENSG00000164535	5756
+ENSG00000164597	8467
+ENSG00000164778	3395
+ENSG00000164902	4319
+ENSG00000164930	4065
+ENSG00000164934	9199
+ENSG00000164967	1092
+ENSG00000164985	6768
+ENSG00000164989	15144
+ENSG00000165030	2085
+ENSG00000165055	6749
+ENSG00000165275	2866
+ENSG00000165283	1806
+ENSG00000165410	7179
+ENSG00000165521	11000
+ENSG00000165629	2326
+ENSG00000165671	15242
+ENSG00000165816	3301
+ENSG00000165905	3115
+ENSG00000165959	15701
+ENSG00000165983	4003
+ENSG00000166068	8260
+ENSG00000166340	4901
+ENSG00000166343	2423
+ENSG00000166398	7459
+ENSG00000166788	3070
+ENSG00000166974	6180
+ENSG00000167195	2082
+ENSG00000167258	12773
+ENSG00000167614	7068
+ENSG00000167642	5199
+ENSG00000167645	3193
+ENSG00000167685	7239
+ENSG00000167785	8184
+ENSG00000167815	2093
+ENSG00000167842	3157
+ENSG00000167930	4699
+ENSG00000167977	3420
+ENSG00000168005	2807
+ENSG00000168078	2165
+ENSG00000168096	7868
+ENSG00000168159	3285
+ENSG00000168306	3859
+ENSG00000168495	5611
+ENSG00000168612	2769
+ENSG00000168661	2745
+ENSG00000168758	6157
+ENSG00000168826	2889
+ENSG00000168916	6922
+ENSG00000169116	5034
+ENSG00000169188	3245
+ENSG00000169302	7776
+ENSG00000169372	4276
+ENSG00000169519	5499
+ENSG00000169627	1365
+ENSG00000169660	4153
+ENSG00000169762	7828
+ENSG00000169946	7775
+ENSG00000169957	2395
+ENSG00000170011	5872
+ENSG00000170113	8700
+ENSG00000170142	3074
+ENSG00000170296	2266
+ENSG00000170340	2861
+ENSG00000170412	8005
+ENSG00000170619	2917
+ENSG00000170638	3153
+ENSG00000170889	4371
+ENSG00000170949	8511
+ENSG00000171055	8938
+ENSG00000171067	3406
+ENSG00000171163	3929
+ENSG00000171606	8333
+ENSG00000172009	7464
+ENSG00000172058	3285
+ENSG00000172086	1753
+ENSG00000172186	594
+ENSG00000172239	3391
+ENSG00000172315	3520
+ENSG00000172458	3327
+ENSG00000172746	497
+ENSG00000173153	3030
+ENSG00000173163	2392
+ENSG00000173366	3804
+ENSG00000173480	4204
+ENSG00000173674	4427
+ENSG00000173681	8630
+ENSG00000173715	3765
+ENSG00000173848	4175
+ENSG00000174442	4849
+ENSG00000174469	10864
+ENSG00000174483	6145
+ENSG00000174705	7977
+ENSG00000174799	8640
+ENSG00000174996	5479
+ENSG00000175220	6138
+ENSG00000175322	15787
+ENSG00000175390	9423
+ENSG00000176396	1981
+ENSG00000176407	7968
+ENSG00000176444	3670
+ENSG00000176593	8787
+ENSG00000176871	4616
+ENSG00000177239	8331
+ENSG00000177352	1781
+ENSG00000177788	1155
+ENSG00000177932	5411
+ENSG00000178184	4224
+ENSG00000178229	3659
+ENSG00000178233	5459
+ENSG00000178719	2527
+ENSG00000178935	2989
+ENSG00000178950	15155
+ENSG00000178996	5559
+ENSG00000179119	6005
+ENSG00000179163	2047
+ENSG00000179455	5731
+ENSG00000179918	2551
+ENSG00000179988	6118
+ENSG00000180178	8075
+ENSG00000180182	9656
+ENSG00000180818	2331
+ENSG00000180881	6689
+ENSG00000180884	4234
+ENSG00000180979	8114
+ENSG00000181220	8260
+ENSG00000181359	853
+ENSG00000181444	2734
+ENSG00000181472	3090
+ENSG00000181690	7833
+ENSG00000181915	3627
+ENSG00000182021	5046
+ENSG00000182134	5092
+ENSG00000182141	4956
+ENSG00000182324	4340
+ENSG00000182484	4618
+ENSG00000182810	2169
+ENSG00000182827	3578
+ENSG00000182841	5092
+ENSG00000182923	10116
+ENSG00000183022	643
+ENSG00000183171	2143
+ENSG00000183172	1731
+ENSG00000183309	7146
+ENSG00000183474	6989
+ENSG00000183506	7474
+ENSG00000183530	11809
+ENSG00000183569	2821
+ENSG00000184216	4001
+ENSG00000184319	4250
+ENSG00000184481	3447
+ENSG00000184731	3968
+ENSG00000184831	2353
+ENSG00000185189	4924
+ENSG00000185238	5467
+ENSG00000185482	2096
+ENSG00000185504	5354
+ENSG00000185670	3348
+ENSG00000185798	1797
+ENSG00000185885	1080
+ENSG00000186056	4024
+ENSG00000186106	5820
+ENSG00000186130	4106
+ENSG00000186532	6777
+ENSG00000186583	2007
+ENSG00000186743	740
+ENSG00000187144	4506
+ENSG00000187172	4080
+ENSG00000187186	1833
+ENSG00000187187	9007
+ENSG00000187210	6463
+ENSG00000187504	746
+ENSG00000187605	12271
+ENSG00000187994	4201
+ENSG00000188021	4230
+ENSG00000188039	8495
+ENSG00000188206	5007
+ENSG00000188295	1980
+ENSG00000188529	10277
+ENSG00000188549	7510
+ENSG00000188738	23761
+ENSG00000188786	8024
+ENSG00000188997	4164
+ENSG00000189143	4702
+ENSG00000189221	5528
+ENSG00000189343	883
+ENSG00000196074	6466
+ENSG00000196119	1146
+ENSG00000196123	4780
+ENSG00000196263	6783
+ENSG00000196305	7401
+ENSG00000196357	3953
+ENSG00000196419	3288
+ENSG00000196507	2305
+ENSG00000196549	9620
+ENSG00000196632	11472
+ENSG00000196696	6114
+ENSG00000196739	13988
+ENSG00000196741	1253
+ENSG00000196747	503
+ENSG00000196812	1328
+ENSG00000197045	7966
+ENSG00000197062	2933
+ENSG00000197121	12761
+ENSG00000197128	6096
+ENSG00000197535	12988
+ENSG00000197744	327
+ENSG00000197888	2077
+ENSG00000197961	7251
+ENSG00000198046	8810
+ENSG00000198155	2804
+ENSG00000198198	17331
+ENSG00000198431	6708
+ENSG00000198464	4836
+ENSG00000198482	5432
+ENSG00000198515	3581
+ENSG00000198520	3166
+ENSG00000198648	4247
+ENSG00000198746	2225
+ENSG00000198752	7958
+ENSG00000198865	3431
+ENSG00000198929	7771
+ENSG00000198961	5217
+ENSG00000203441	872
+ENSG00000203616	333
+ENSG00000203668	7078
+ENSG00000203837	2354
+ENSG00000203865	2458
+ENSG00000203950	1265
+ENSG00000203995	4694
+ENSG00000204253	881
+ENSG00000204334	601
+ENSG00000204348	2483
+ENSG00000204385	3369
+ENSG00000204392	1077
+ENSG00000204394	6081
+ENSG00000204396	3638
+ENSG00000204628	5990
+ENSG00000204805	342
+ENSG00000204859	4503
+ENSG00000204956	6252
+ENSG00000205084	6015
+ENSG00000205208	3730
+ENSG00000205268	7425
+ENSG00000205485	3110
+ENSG00000205560	4102
+ENSG00000205628	3484
+ENSG00000206192	2978
+ENSG00000206560	11279
+ENSG00000210100	69
+ENSG00000210174	65
+ENSG00000211584	4921
+ENSG00000213033	1212
+ENSG00000213062	911
+ENSG00000213066	15159
+ENSG00000213077	2281
+ENSG00000213148	464
+ENSG00000213174	414
+ENSG00000213197	694
+ENSG00000213318	783
+ENSG00000213339	3430
+ENSG00000213493	1451
+ENSG00000213588	3014
+ENSG00000213711	814
+ENSG00000213742	5308
+ENSG00000213760	2147
+ENSG00000213793	551
+ENSG00000213864	676
+ENSG00000213880	797
+ENSG00000213904	4208
+ENSG00000213906	3233
+ENSG00000213917	815
+ENSG00000213971	5091
+ENSG00000214029	15455
+ENSG00000214174	3858
+ENSG00000214389	784
+ENSG00000214617	4479
+ENSG00000214694	5490
+ENSG00000214810	311
+ENSG00000214961	1372
+ENSG00000214975	499
+ENSG00000215286	754
+ENSG00000215333	1283
+ENSG00000216854	553
+ENSG00000216915	1495
+ENSG00000217716	494
+ENSG00000217801	2171
+ENSG00000218965	609
+ENSG00000219553	723
+ENSG00000220131	354
+ENSG00000220157	961
+ENSG00000220483	871
+ENSG00000221843	6199
+ENSG00000221909	2717
+ENSG00000222046	1869
+ENSG00000223382	1326
+ENSG00000223620	1102
+ENSG00000223877	622
+ENSG00000224016	291
+ENSG00000224520	1447
+ENSG00000224578	1377
+ENSG00000224628	1519
+ENSG00000224664	316
+ENSG00000224892	997
+ENSG00000225405	390
+ENSG00000225544	392
+ENSG00000225787	306
+ENSG00000225806	1521
+ENSG00000226067	2075
+ENSG00000226086	822
+ENSG00000226114	361
+ENSG00000226144	454
+ENSG00000226232	1728
+ENSG00000226268	959
+ENSG00000226478	1126
+ENSG00000226703	812
+ENSG00000226752	7181
+ENSG00000226790	1139
+ENSG00000226833	1438
+ENSG00000227006	861
+ENSG00000227057	3115
+ENSG00000227343	600
+ENSG00000227376	552
+ENSG00000227401	284
+ENSG00000227543	3835
+ENSG00000227666	316
+ENSG00000227742	946
+ENSG00000227968	999
+ENSG00000228118	459
+ENSG00000228195	881
+ENSG00000228236	315
+ENSG00000228599	742
+ENSG00000228612	2737
+ENSG00000228981	843
+ENSG00000229044	439
+ENSG00000229344	682
+ENSG00000229503	477
+ENSG00000229956	6794
+ENSG00000230006	8042
+ENSG00000230022	634
+ENSG00000230074	665
+ENSG00000230118	258
+ENSG00000230146	1176
+ENSG00000230243	319
+ENSG00000230295	351
+ENSG00000230406	421
+ENSG00000230531	1798
+ENSG00000230551	8636
+ENSG00000230650	3130
+ENSG00000230667	909
+ENSG00000230863	742
+ENSG00000230869	2418
+ENSG00000230913	744
+ENSG00000231096	390
+ENSG00000231181	559
+ENSG00000231245	402
+ENSG00000231434	2167
+ENSG00000231615	1337
+ENSG00000231711	4947
+ENSG00000231955	1411
+ENSG00000232186	1228
+ENSG00000232581	357
+ENSG00000232676	1124
+ENSG00000232699	736
+ENSG00000232905	946
+ENSG00000232943	400
+ENSG00000233122	2436
+ENSG00000233454	275
+ENSG00000233503	1501
+ENSG00000233602	619
+ENSG00000233836	3242
+ENSG00000233846	487
+ENSG00000234231	2095
+ENSG00000234639	1239
+ENSG00000234722	3487
+ENSG00000234742	555
+ENSG00000234981	792
+ENSG00000235065	475
+ENSG00000235363	225
+ENSG00000235424	288
+ENSG00000235444	618
+ENSG00000235512	292
+ENSG00000235623	574
+ENSG00000235655	411
+ENSG00000235698	1200
+ENSG00000235750	4783
+ENSG00000235847	965
+ENSG00000235859	1234
+ENSG00000235892	1677
+ENSG00000236086	262
+ENSG00000236285	837
+ENSG00000236290	703
+ENSG00000236330	886
+ENSG00000236468	1335
+ENSG00000236570	1227
+ENSG00000236680	1238
+ENSG00000236681	523
+ENSG00000236735	375
+ENSG00000236739	535
+ENSG00000236753	2715
+ENSG00000236801	474
+ENSG00000236824	13458
+ENSG00000236946	1087
+ENSG00000237017	4158
+ENSG00000237033	609
+ENSG00000237054	3194
+ENSG00000237101	1323
+ENSG00000237357	2579
+ENSG00000237517	7448
+ENSG00000237939	652
+ENSG00000237977	563
+ENSG00000238221	500
+ENSG00000238251	514
+ENSG00000239377	420
+ENSG00000239524	400
+ENSG00000239569	736
+ENSG00000239791	1918
+ENSG00000239887	4495
+ENSG00000239926	747
+ENSG00000240005	589
+ENSG00000240392	575
+ENSG00000240418	893
+ENSG00000240540	1183
+ENSG00000240821	579
+ENSG00000241258	3540
+ENSG00000241370	1606
+ENSG00000241494	438
+ENSG00000241680	375
+ENSG00000241697	2611
+ENSG00000241772	1051
+ENSG00000241923	622
+ENSG00000242061	438
+ENSG00000242140	231
+ENSG00000242349	1427
+ENSG00000242600	2616
+ENSG00000242612	4046
+ENSG00000242858	602
+ENSG00000243122	413
+ENSG00000243396	402
+ENSG00000243701	4206
+ENSG00000243779	321
+ENSG00000244171	1291
+ENSG00000244270	403
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/in.fasta	Thu Nov 17 16:41:06 2016 -0500
@@ -0,0 +1,2 @@
+>1
+AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGGAAAAAAAAAAAAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGGAAAAAAAAAATTTTT
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/in.gtf	Thu Nov 17 16:41:06 2016 -0500
@@ -0,0 +1,6 @@
+1	ensembl_havana	gene	1	103	.	+	.	gene_id "ENSG00000162526"; gene_version "4"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1";
+1	ensembl_havana	transcript	1	103	.	+	.	gene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; tag "basic"; transcript_support_level "NA";
+1	ensembl_havana	exon	1	103	.	+	.	gene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; exon_number "1"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; exon_id "ENSE00002319515"; exon_version "1"; tag "basic"; transcript_support_level "NA";
+1	ensembl_havana	CDS	1	100	.	+	0	gene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; exon_number "1"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; protein_id "ENSP00000334393"; protein_version "3"; tag "basic"; transcript_support_level "NA";
+1	ensembl_havana	start_codon	1	3	.	+	0	gene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; exon_number "1"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; tag "basic"; transcript_support_level "NA";
+1	ensembl_havana	stop_codon	101	103	.	+	0	gene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; exon_number "1"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; tag "basic"; transcript_support_level "NA";
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/length.tab	Thu Nov 17 16:41:06 2016 -0500
@@ -0,0 +1,1 @@
+ENSG00000162526	103
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Thu Nov 17 16:41:06 2016 -0500
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Locations of all fasta files under genome directory -->
+    <table name="all_fasta" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/all_fasta.loc" />
+    </table>
+</tables>