Mercurial > repos > iuc > length_and_gc_content
changeset 0:2ca1baabdae0 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/length_and_gc_content commit b7dcd020c6a15fa55f392cc09cbc37580d6e75c4
author | iuc |
---|---|
date | Thu, 17 Nov 2016 16:41:06 -0500 |
parents | |
children | f088370d2a3c |
files | all_fasta.loc.sample get_length_and_gc_content.r get_length_and_gc_content.xml test-data/gc.tab test-data/gene_length.tab test-data/in.fasta test-data/in.gtf test-data/length.tab tool_data_table_conf.xml.sample |
diffstat | 9 files changed, 1131 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/all_fasta.loc.sample Thu Nov 17 16:41:06 2016 -0500 @@ -0,0 +1,18 @@ +#This file lists the locations and dbkeys of all the fasta files +#under the "genome" directory (a directory that contains a directory +#for each build). The script extract_fasta.py will generate the file +#all_fasta.loc. This file has the format (white space characters are +#TAB characters): +# +#<unique_build_id> <dbkey> <display_name> <file_path> +# +#So, all_fasta.loc could look something like this: +# +#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa +# +#Your all_fasta.loc file should contain an entry for each individual +#fasta file. So there will be multiple fasta files for each build, +#such as with hg19 above. +#
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/get_length_and_gc_content.r Thu Nov 17 16:41:06 2016 -0500 @@ -0,0 +1,59 @@ +# originally by Devon Ryan, https://www.biostars.org/p/84467/ + +options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) + +# we need that to not crash galaxy with an UTF8 error on German LC settings. +loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") + +suppressPackageStartupMessages({ + library("GenomicRanges") + library("rtracklayer") + library("Rsamtools") + library("optparse") + library("data.table") +}) + +option_list <- list( + make_option(c("-g","--gtf"), type="character", help="Input GTF file with gene / exon information."), + make_option(c("-f","--fasta"), type="character", default=FALSE, help="Fasta file that corresponds to the supplied GTF."), + make_option(c("-l","--length"), type="character", default=FALSE, help="Output file with gene name and length."), + make_option(c("-gc","--gc_content"), type="character", default=FALSE, help="Output file with gene name and GC content.") + ) + +parser <- OptionParser(usage = "%prog [options] file", option_list=option_list) +args = parse_args(parser) + +GTFfile = args$gtf +FASTAfile = args$fasta +length = args$length +gc_content = args$gc_content + +#Load the annotation and reduce it +GTF <- import.gff(GTFfile, format="gtf", genome=NA, feature.type="exon") +grl <- reduce(split(GTF, elementMetadata(GTF)$gene_id)) +reducedGTF <- unlist(grl, use.names=T) +elementMetadata(reducedGTF)$gene_id <- rep(names(grl), elementNROWS(grl)) + +#Open the fasta file +FASTA <- FaFile(FASTAfile) +open(FASTA) + +#Add the GC numbers +elementMetadata(reducedGTF)$nGCs <- letterFrequency(getSeq(FASTA, reducedGTF), "GC")[,1] +elementMetadata(reducedGTF)$widths <- width(reducedGTF) + +#Create a list of the ensembl_id/GC/length +calc_GC_length <- function(x) { + nGCs = sum(elementMetadata(x)$nGCs) + width = sum(elementMetadata(x)$widths) + c(width, nGCs/width) +} +output <- t(sapply(split(reducedGTF, elementMetadata(reducedGTF)$gene_id), calc_GC_length)) +output <- data.frame(setDT(data.frame(output), keep.rownames = TRUE)[]) + + +write.table(output[,c(1,2)], file=length, col.names=FALSE, row.names=FALSE, quote=FALSE, sep="\t") +write.table(output[,c(1,3)], file=gc_content, col.names=FALSE, row.names=FALSE, quote=FALSE, sep="\t") + + +sessionInfo()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/get_length_and_gc_content.xml Thu Nov 17 16:41:06 2016 -0500 @@ -0,0 +1,89 @@ +<tool id="length_and_gc_content" name="Gene length and gc content" version="0.1.0"> + <description>from GTF file</description> + <requirements> + <requirement type="package" version="1.3.2">r-optparse</requirement> + <requirement type="package" version="1.4.1">r-reshape2</requirement> + <requirement type="package" version="1.9.6">r-data.table</requirement> + <requirement type="package" version="1.34.1">bioconductor-rtracklayer</requirement> + </requirements> + <stdio> + <regex match="Execution halted" + source="both" + level="fatal" + description="Execution halted." /> + <regex match="Error in" + source="both" + level="fatal" + description="An undefined error occured, please check your input carefully and contact your administrator." /> + <regex match="Fatal error" + source="both" + level="fatal" + description="An undefined error occured, please check your input carefully and contact your administrator." /> + </stdio> + <command><![CDATA[ + Rscript '$__tool_directory__'/get_length_and_gc_content.r --gtf '$gtf' + #if $fastaSource.genomeSource == 'indexed': + --fasta '$fastaSource.fasta_pre_installed.fields.path' + #else: + --fasta '$fastaSource.fasta_history' + #end if + --length '$length' + --gc_content '$gc_content' + ]]></command> + <inputs> + <param name="gtf" type="data" format="gtf" help="The GTF must match the FASTA file" label="GTF file for length and GC calculation"/> + <conditional name="fastaSource"> + <param help="choose history if you don't see the correct genome fasta" label="Select a reference fasta from your history or use a built-in fasta?" name="genomeSource" type="select"> + <option value="indexed">Use a built-in fasta</option> + <option value="history">Use fasta from history</option> + </param> + <when value="indexed"> + <param name="fasta_pre_installed" type="select" help="Select the fasta file from a list of pre-installed genomes" label="Select a fasta sequence"> + <options from_data_table="all_fasta"> + <filter type="data_meta" key="dbkey" ref="gtf" column="0"/> + </options> + <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/> + </param> + </when> + <when value="history"> + <param name="fasta_history" type="data" format="fasta" label="Select a fasta file that matches the supplied GTF file"> + <options> + <filter type="data_meta" key="dbkey" ref="gtf"/> + </options> + <validator type="no_options" message="The current history does not include a fasta dataset with the build associated with the selected input file"/> + </param> + </when> + </conditional> + </inputs> + <outputs> + <data name="length" format="tabular" label="gene length"> + <actions> + <action name="column_names" type="metadata" default="gene,length" /> + </actions> + </data> + <data name="gc_content" format="tabular" label="gene gc content"> + <actions> + <action name="column_names" type="metadata" default="gene,gc_content" /> + </actions> + </data> + </outputs> + <tests> + <test> + <param name="gtf" value="in.gtf" ftype="gtf"></param> + <param name="fastaSource|genomeSource" value="history"></param> + <param name="fastaSource|fasta_history" value="in.fasta" ftype="fasta"></param> + <output name="length" file="length.tab"></output> + <output name="gc_content" file="gc.tab"></output> + </test> + </tests> + <help> + + **What it does** + + Returns a tabular file with gene id and length and a tabular file with gene id and GC content, based on a supplied GTF and a FASTA file. + + + </help> + <citations> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gc.tab Thu Nov 17 16:41:06 2016 -0500 @@ -0,0 +1,1 @@ +ENSG00000162526 0.388349514563107
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gene_length.tab Thu Nov 17 16:41:06 2016 -0500 @@ -0,0 +1,948 @@ +ENSG00000003096 6983 +ENSG00000004534 7302 +ENSG00000006327 1848 +ENSG00000006831 5878 +ENSG00000006837 3057 +ENSG00000007392 6176 +ENSG00000008735 5901 +ENSG00000009844 7262 +ENSG00000010322 9161 +ENSG00000010932 3602 +ENSG00000011638 2558 +ENSG00000012983 7620 +ENSG00000013275 2387 +ENSG00000014216 7553 +ENSG00000018408 8413 +ENSG00000018607 1774 +ENSG00000018699 3185 +ENSG00000022556 6313 +ENSG00000023041 3748 +ENSG00000023330 2536 +ENSG00000023697 2844 +ENSG00000023892 2545 +ENSG00000027697 2571 +ENSG00000029363 9297 +ENSG00000032389 5775 +ENSG00000033050 4448 +ENSG00000035403 9992 +ENSG00000042445 4005 +ENSG00000049541 2211 +ENSG00000057608 4033 +ENSG00000057935 10580 +ENSG00000059122 8419 +ENSG00000059588 7743 +ENSG00000063015 5115 +ENSG00000063322 3835 +ENSG00000064545 3281 +ENSG00000065000 8601 +ENSG00000065060 9908 +ENSG00000066739 14778 +ENSG00000066923 8583 +ENSG00000068028 3683 +ENSG00000068650 13733 +ENSG00000069712 4556 +ENSG00000070495 5969 +ENSG00000070610 4803 +ENSG00000070961 9493 +ENSG00000071889 5098 +ENSG00000072071 8487 +ENSG00000072121 15706 +ENSG00000072134 8517 +ENSG00000072864 5525 +ENSG00000072958 14371 +ENSG00000073614 12106 +ENSG00000074054 11052 +ENSG00000074071 1097 +ENSG00000074211 9198 +ENSG00000074319 3839 +ENSG00000074621 9084 +ENSG00000075399 3428 +ENSG00000076356 13781 +ENSG00000079215 6265 +ENSG00000079246 5463 +ENSG00000079785 3833 +ENSG00000079974 5870 +ENSG00000080603 13674 +ENSG00000080815 9947 +ENSG00000081087 5325 +ENSG00000082068 7039 +ENSG00000083535 4253 +ENSG00000083544 6814 +ENSG00000083720 4194 +ENSG00000084073 3572 +ENSG00000085365 5106 +ENSG00000085377 3762 +ENSG00000085982 9352 +ENSG00000085999 3212 +ENSG00000086205 3824 +ENSG00000086289 2864 +ENSG00000087586 2928 +ENSG00000088340 9067 +ENSG00000088448 4563 +ENSG00000089009 4447 +ENSG00000090020 5974 +ENSG00000090273 2856 +ENSG00000090402 6138 +ENSG00000091140 5299 +ENSG00000092068 5991 +ENSG00000092098 4982 +ENSG00000092208 2532 +ENSG00000092445 10945 +ENSG00000099139 12705 +ENSG00000099910 4646 +ENSG00000100014 7324 +ENSG00000100027 4713 +ENSG00000100038 6641 +ENSG00000100106 12238 +ENSG00000100191 2030 +ENSG00000100292 2405 +ENSG00000100336 4377 +ENSG00000100354 19998 +ENSG00000100441 8015 +ENSG00000100478 7223 +ENSG00000100526 1836 +ENSG00000100577 8477 +ENSG00000100852 10888 +ENSG00000101247 7217 +ENSG00000101294 10000 +ENSG00000101473 4213 +ENSG00000102030 4742 +ENSG00000102349 9022 +ENSG00000102606 11768 +ENSG00000102804 8586 +ENSG00000102901 5539 +ENSG00000103035 2740 +ENSG00000103121 12781 +ENSG00000103932 7046 +ENSG00000104325 3507 +ENSG00000104331 7594 +ENSG00000104368 6618 +ENSG00000104450 5055 +ENSG00000105173 2550 +ENSG00000105220 9112 +ENSG00000105223 5184 +ENSG00000105325 5802 +ENSG00000105355 2813 +ENSG00000105438 2251 +ENSG00000105519 4428 +ENSG00000105568 7437 +ENSG00000105879 5481 +ENSG00000106012 9240 +ENSG00000106305 1838 +ENSG00000106683 6902 +ENSG00000106771 9484 +ENSG00000106789 5717 +ENSG00000106803 1190 +ENSG00000106868 4183 +ENSG00000106948 10601 +ENSG00000107295 2682 +ENSG00000107833 928 +ENSG00000108055 4275 +ENSG00000108091 7345 +ENSG00000108306 11038 +ENSG00000108591 7411 +ENSG00000108666 5740 +ENSG00000108848 8151 +ENSG00000108947 3222 +ENSG00000108953 3847 +ENSG00000108960 3177 +ENSG00000109079 3889 +ENSG00000109171 6524 +ENSG00000109610 2128 +ENSG00000109680 3434 +ENSG00000109771 7360 +ENSG00000109787 6297 +ENSG00000109920 7596 +ENSG00000109929 5566 +ENSG00000110002 6403 +ENSG00000110092 4830 +ENSG00000110906 9865 +ENSG00000111247 2558 +ENSG00000111249 7648 +ENSG00000111331 8251 +ENSG00000111652 3113 +ENSG00000111707 5731 +ENSG00000111860 9462 +ENSG00000111877 12496 +ENSG00000112062 6860 +ENSG00000112306 767 +ENSG00000112312 2476 +ENSG00000112365 5519 +ENSG00000112406 5614 +ENSG00000112531 17368 +ENSG00000112874 4304 +ENSG00000113048 6511 +ENSG00000113328 3096 +ENSG00000113621 5265 +ENSG00000113649 8714 +ENSG00000113812 4066 +ENSG00000113916 5938 +ENSG00000114026 8733 +ENSG00000114120 9277 +ENSG00000114315 2062 +ENSG00000114416 12054 +ENSG00000114735 19425 +ENSG00000114999 15732 +ENSG00000115107 4562 +ENSG00000115221 6357 +ENSG00000115464 18125 +ENSG00000115841 5999 +ENSG00000116127 14054 +ENSG00000116171 8305 +ENSG00000116209 9149 +ENSG00000116455 2805 +ENSG00000116580 11967 +ENSG00000116604 6400 +ENSG00000116685 3281 +ENSG00000116761 2414 +ENSG00000116977 9710 +ENSG00000117262 5557 +ENSG00000117410 3535 +ENSG00000117505 10949 +ENSG00000117569 12937 +ENSG00000117758 3846 +ENSG00000117868 7098 +ENSG00000117984 2999 +ENSG00000118096 3814 +ENSG00000118420 6394 +ENSG00000118503 5051 +ENSG00000118507 4394 +ENSG00000118564 4573 +ENSG00000118620 6845 +ENSG00000118655 3940 +ENSG00000118680 1761 +ENSG00000118707 3784 +ENSG00000118873 10344 +ENSG00000119041 7105 +ENSG00000119326 3131 +ENSG00000119421 844 +ENSG00000119669 4157 +ENSG00000119686 4913 +ENSG00000119723 6686 +ENSG00000120158 4215 +ENSG00000120217 4108 +ENSG00000120907 5848 +ENSG00000121390 3223 +ENSG00000121753 7756 +ENSG00000121851 1799 +ENSG00000121988 9115 +ENSG00000122194 6380 +ENSG00000122390 5639 +ENSG00000122644 3561 +ENSG00000122691 2033 +ENSG00000122966 13230 +ENSG00000123384 20839 +ENSG00000123472 5677 +ENSG00000123562 2648 +ENSG00000124103 1137 +ENSG00000124120 7044 +ENSG00000124181 9648 +ENSG00000124275 6498 +ENSG00000124496 7732 +ENSG00000124549 4177 +ENSG00000124574 8083 +ENSG00000124615 5174 +ENSG00000124783 11349 +ENSG00000125637 13442 +ENSG00000125691 5979 +ENSG00000125733 3982 +ENSG00000125734 3213 +ENSG00000125818 5296 +ENSG00000125901 1127 +ENSG00000125944 8676 +ENSG00000126001 16295 +ENSG00000126003 5705 +ENSG00000126226 6243 +ENSG00000126562 5089 +ENSG00000126858 6372 +ENSG00000127314 16942 +ENSG00000127399 2735 +ENSG00000127989 4890 +ENSG00000128016 2010 +ENSG00000128590 2837 +ENSG00000128604 4375 +ENSG00000128654 1717 +ENSG00000128731 20027 +ENSG00000128739 2919 +ENSG00000128965 2058 +ENSG00000128973 4225 +ENSG00000129354 2214 +ENSG00000129518 2030 +ENSG00000129559 3879 +ENSG00000129625 4230 +ENSG00000129932 2387 +ENSG00000130222 1322 +ENSG00000130227 6909 +ENSG00000130305 3414 +ENSG00000130349 2036 +ENSG00000130592 5399 +ENSG00000130695 5422 +ENSG00000130717 2688 +ENSG00000130770 2225 +ENSG00000130787 7102 +ENSG00000130827 11710 +ENSG00000131051 9307 +ENSG00000131100 3212 +ENSG00000131467 5152 +ENSG00000131759 4731 +ENSG00000132155 7847 +ENSG00000132274 6020 +ENSG00000132294 6106 +ENSG00000132604 5621 +ENSG00000132823 1712 +ENSG00000132879 3527 +ENSG00000133114 10842 +ENSG00000133138 9503 +ENSG00000133226 8168 +ENSG00000133302 7185 +ENSG00000133678 2996 +ENSG00000133816 15809 +ENSG00000133872 2990 +ENSG00000134109 6758 +ENSG00000134153 1538 +ENSG00000134291 3475 +ENSG00000134324 11861 +ENSG00000134326 4302 +ENSG00000134461 3059 +ENSG00000134463 2185 +ENSG00000134690 2469 +ENSG00000134765 4271 +ENSG00000134852 11762 +ENSG00000134905 10460 +ENSG00000134986 9337 +ENSG00000134987 7656 +ENSG00000135045 2327 +ENSG00000135048 9625 +ENSG00000135124 8159 +ENSG00000135250 6341 +ENSG00000135535 6944 +ENSG00000135537 8450 +ENSG00000135605 4030 +ENSG00000135679 13273 +ENSG00000135698 2190 +ENSG00000135930 7177 +ENSG00000136048 4180 +ENSG00000136238 3114 +ENSG00000136709 14580 +ENSG00000136720 4683 +ENSG00000136819 5099 +ENSG00000137210 4815 +ENSG00000137285 2194 +ENSG00000137642 19449 +ENSG00000137776 8157 +ENSG00000137806 1790 +ENSG00000137812 10250 +ENSG00000137824 5114 +ENSG00000137965 2038 +ENSG00000138028 4039 +ENSG00000138459 4853 +ENSG00000138587 2504 +ENSG00000138594 10632 +ENSG00000138685 6775 +ENSG00000138750 4062 +ENSG00000138798 6127 +ENSG00000138835 13793 +ENSG00000139192 3188 +ENSG00000139200 3266 +ENSG00000139233 7993 +ENSG00000139496 10422 +ENSG00000139718 8314 +ENSG00000139726 3576 +ENSG00000139910 10046 +ENSG00000139971 7565 +ENSG00000140265 6634 +ENSG00000140391 8506 +ENSG00000140396 10026 +ENSG00000140416 12492 +ENSG00000140459 3491 +ENSG00000141012 8402 +ENSG00000141179 5880 +ENSG00000141367 11836 +ENSG00000141404 8445 +ENSG00000141425 8442 +ENSG00000141448 3945 +ENSG00000141510 3936 +ENSG00000141699 5145 +ENSG00000141736 10321 +ENSG00000141756 4273 +ENSG00000142082 5202 +ENSG00000142208 11162 +ENSG00000142599 11503 +ENSG00000142676 1987 +ENSG00000142784 6024 +ENSG00000142871 2455 +ENSG00000142961 5916 +ENSG00000143198 8401 +ENSG00000143256 677 +ENSG00000143337 9615 +ENSG00000143401 3605 +ENSG00000143418 4644 +ENSG00000143458 9189 +ENSG00000143641 7743 +ENSG00000143727 4160 +ENSG00000143816 3971 +ENSG00000143919 3991 +ENSG00000143970 13023 +ENSG00000144524 6066 +ENSG00000145354 2607 +ENSG00000145494 1451 +ENSG00000145555 13963 +ENSG00000145604 3837 +ENSG00000145736 8676 +ENSG00000145990 10686 +ENSG00000146066 668 +ENSG00000146350 6912 +ENSG00000146425 2949 +ENSG00000146574 7316 +ENSG00000146733 2762 +ENSG00000146802 7995 +ENSG00000147050 7139 +ENSG00000147251 6721 +ENSG00000147364 11438 +ENSG00000147394 4812 +ENSG00000147439 5072 +ENSG00000147669 1711 +ENSG00000147684 2629 +ENSG00000148158 11578 +ENSG00000148334 3039 +ENSG00000148408 9897 +ENSG00000148484 4191 +ENSG00000148700 6541 +ENSG00000148834 1691 +ENSG00000149292 5302 +ENSG00000149308 7534 +ENSG00000149483 6348 +ENSG00000149679 3785 +ENSG00000150779 1166 +ENSG00000150991 3898 +ENSG00000151623 6607 +ENSG00000151657 6844 +ENSG00000151690 6031 +ENSG00000151718 9966 +ENSG00000151743 3195 +ENSG00000151748 5630 +ENSG00000151876 3014 +ENSG00000152270 11707 +ENSG00000152377 6174 +ENSG00000152457 6326 +ENSG00000152527 9835 +ENSG00000152642 4802 +ENSG00000152782 7638 +ENSG00000153130 6294 +ENSG00000153147 8385 +ENSG00000154065 6391 +ENSG00000154146 1309 +ENSG00000154269 3885 +ENSG00000154608 4737 +ENSG00000154743 5667 +ENSG00000155100 3977 +ENSG00000155313 7823 +ENSG00000155621 4973 +ENSG00000155636 4784 +ENSG00000155729 4915 +ENSG00000156050 5056 +ENSG00000156831 2991 +ENSG00000156973 2850 +ENSG00000157107 6127 +ENSG00000157343 1418 +ENSG00000157423 23408 +ENSG00000157625 12711 +ENSG00000157637 7716 +ENSG00000157703 6302 +ENSG00000157823 9237 +ENSG00000157833 5939 +ENSG00000158092 3727 +ENSG00000158158 5105 +ENSG00000158296 6055 +ENSG00000158406 397 +ENSG00000158467 6858 +ENSG00000158470 4722 +ENSG00000158669 7939 +ENSG00000159173 7524 +ENSG00000159259 5118 +ENSG00000159346 3193 +ENSG00000159921 5477 +ENSG00000160051 2478 +ENSG00000160172 3402 +ENSG00000160199 7621 +ENSG00000160213 2490 +ENSG00000161040 4009 +ENSG00000161277 1825 +ENSG00000161328 2769 +ENSG00000161692 7146 +ENSG00000161888 2839 +ENSG00000162063 6309 +ENSG00000162227 3149 +ENSG00000162402 11708 +ENSG00000162444 861 +ENSG00000162526 2191 +ENSG00000162669 5827 +ENSG00000162819 4980 +ENSG00000162851 1848 +ENSG00000162980 6716 +ENSG00000163083 3204 +ENSG00000163156 3265 +ENSG00000163191 680 +ENSG00000163320 6490 +ENSG00000163322 7777 +ENSG00000163348 3630 +ENSG00000163376 4736 +ENSG00000163521 3411 +ENSG00000163577 5593 +ENSG00000163743 6565 +ENSG00000163832 3313 +ENSG00000163848 10574 +ENSG00000163900 8668 +ENSG00000163960 11291 +ENSG00000164073 5023 +ENSG00000164104 2775 +ENSG00000164144 4290 +ENSG00000164209 8283 +ENSG00000164241 7190 +ENSG00000164252 5051 +ENSG00000164308 7422 +ENSG00000164332 2676 +ENSG00000164418 7375 +ENSG00000164535 5756 +ENSG00000164597 8467 +ENSG00000164778 3395 +ENSG00000164902 4319 +ENSG00000164930 4065 +ENSG00000164934 9199 +ENSG00000164967 1092 +ENSG00000164985 6768 +ENSG00000164989 15144 +ENSG00000165030 2085 +ENSG00000165055 6749 +ENSG00000165275 2866 +ENSG00000165283 1806 +ENSG00000165410 7179 +ENSG00000165521 11000 +ENSG00000165629 2326 +ENSG00000165671 15242 +ENSG00000165816 3301 +ENSG00000165905 3115 +ENSG00000165959 15701 +ENSG00000165983 4003 +ENSG00000166068 8260 +ENSG00000166340 4901 +ENSG00000166343 2423 +ENSG00000166398 7459 +ENSG00000166788 3070 +ENSG00000166974 6180 +ENSG00000167195 2082 +ENSG00000167258 12773 +ENSG00000167614 7068 +ENSG00000167642 5199 +ENSG00000167645 3193 +ENSG00000167685 7239 +ENSG00000167785 8184 +ENSG00000167815 2093 +ENSG00000167842 3157 +ENSG00000167930 4699 +ENSG00000167977 3420 +ENSG00000168005 2807 +ENSG00000168078 2165 +ENSG00000168096 7868 +ENSG00000168159 3285 +ENSG00000168306 3859 +ENSG00000168495 5611 +ENSG00000168612 2769 +ENSG00000168661 2745 +ENSG00000168758 6157 +ENSG00000168826 2889 +ENSG00000168916 6922 +ENSG00000169116 5034 +ENSG00000169188 3245 +ENSG00000169302 7776 +ENSG00000169372 4276 +ENSG00000169519 5499 +ENSG00000169627 1365 +ENSG00000169660 4153 +ENSG00000169762 7828 +ENSG00000169946 7775 +ENSG00000169957 2395 +ENSG00000170011 5872 +ENSG00000170113 8700 +ENSG00000170142 3074 +ENSG00000170296 2266 +ENSG00000170340 2861 +ENSG00000170412 8005 +ENSG00000170619 2917 +ENSG00000170638 3153 +ENSG00000170889 4371 +ENSG00000170949 8511 +ENSG00000171055 8938 +ENSG00000171067 3406 +ENSG00000171163 3929 +ENSG00000171606 8333 +ENSG00000172009 7464 +ENSG00000172058 3285 +ENSG00000172086 1753 +ENSG00000172186 594 +ENSG00000172239 3391 +ENSG00000172315 3520 +ENSG00000172458 3327 +ENSG00000172746 497 +ENSG00000173153 3030 +ENSG00000173163 2392 +ENSG00000173366 3804 +ENSG00000173480 4204 +ENSG00000173674 4427 +ENSG00000173681 8630 +ENSG00000173715 3765 +ENSG00000173848 4175 +ENSG00000174442 4849 +ENSG00000174469 10864 +ENSG00000174483 6145 +ENSG00000174705 7977 +ENSG00000174799 8640 +ENSG00000174996 5479 +ENSG00000175220 6138 +ENSG00000175322 15787 +ENSG00000175390 9423 +ENSG00000176396 1981 +ENSG00000176407 7968 +ENSG00000176444 3670 +ENSG00000176593 8787 +ENSG00000176871 4616 +ENSG00000177239 8331 +ENSG00000177352 1781 +ENSG00000177788 1155 +ENSG00000177932 5411 +ENSG00000178184 4224 +ENSG00000178229 3659 +ENSG00000178233 5459 +ENSG00000178719 2527 +ENSG00000178935 2989 +ENSG00000178950 15155 +ENSG00000178996 5559 +ENSG00000179119 6005 +ENSG00000179163 2047 +ENSG00000179455 5731 +ENSG00000179918 2551 +ENSG00000179988 6118 +ENSG00000180178 8075 +ENSG00000180182 9656 +ENSG00000180818 2331 +ENSG00000180881 6689 +ENSG00000180884 4234 +ENSG00000180979 8114 +ENSG00000181220 8260 +ENSG00000181359 853 +ENSG00000181444 2734 +ENSG00000181472 3090 +ENSG00000181690 7833 +ENSG00000181915 3627 +ENSG00000182021 5046 +ENSG00000182134 5092 +ENSG00000182141 4956 +ENSG00000182324 4340 +ENSG00000182484 4618 +ENSG00000182810 2169 +ENSG00000182827 3578 +ENSG00000182841 5092 +ENSG00000182923 10116 +ENSG00000183022 643 +ENSG00000183171 2143 +ENSG00000183172 1731 +ENSG00000183309 7146 +ENSG00000183474 6989 +ENSG00000183506 7474 +ENSG00000183530 11809 +ENSG00000183569 2821 +ENSG00000184216 4001 +ENSG00000184319 4250 +ENSG00000184481 3447 +ENSG00000184731 3968 +ENSG00000184831 2353 +ENSG00000185189 4924 +ENSG00000185238 5467 +ENSG00000185482 2096 +ENSG00000185504 5354 +ENSG00000185670 3348 +ENSG00000185798 1797 +ENSG00000185885 1080 +ENSG00000186056 4024 +ENSG00000186106 5820 +ENSG00000186130 4106 +ENSG00000186532 6777 +ENSG00000186583 2007 +ENSG00000186743 740 +ENSG00000187144 4506 +ENSG00000187172 4080 +ENSG00000187186 1833 +ENSG00000187187 9007 +ENSG00000187210 6463 +ENSG00000187504 746 +ENSG00000187605 12271 +ENSG00000187994 4201 +ENSG00000188021 4230 +ENSG00000188039 8495 +ENSG00000188206 5007 +ENSG00000188295 1980 +ENSG00000188529 10277 +ENSG00000188549 7510 +ENSG00000188738 23761 +ENSG00000188786 8024 +ENSG00000188997 4164 +ENSG00000189143 4702 +ENSG00000189221 5528 +ENSG00000189343 883 +ENSG00000196074 6466 +ENSG00000196119 1146 +ENSG00000196123 4780 +ENSG00000196263 6783 +ENSG00000196305 7401 +ENSG00000196357 3953 +ENSG00000196419 3288 +ENSG00000196507 2305 +ENSG00000196549 9620 +ENSG00000196632 11472 +ENSG00000196696 6114 +ENSG00000196739 13988 +ENSG00000196741 1253 +ENSG00000196747 503 +ENSG00000196812 1328 +ENSG00000197045 7966 +ENSG00000197062 2933 +ENSG00000197121 12761 +ENSG00000197128 6096 +ENSG00000197535 12988 +ENSG00000197744 327 +ENSG00000197888 2077 +ENSG00000197961 7251 +ENSG00000198046 8810 +ENSG00000198155 2804 +ENSG00000198198 17331 +ENSG00000198431 6708 +ENSG00000198464 4836 +ENSG00000198482 5432 +ENSG00000198515 3581 +ENSG00000198520 3166 +ENSG00000198648 4247 +ENSG00000198746 2225 +ENSG00000198752 7958 +ENSG00000198865 3431 +ENSG00000198929 7771 +ENSG00000198961 5217 +ENSG00000203441 872 +ENSG00000203616 333 +ENSG00000203668 7078 +ENSG00000203837 2354 +ENSG00000203865 2458 +ENSG00000203950 1265 +ENSG00000203995 4694 +ENSG00000204253 881 +ENSG00000204334 601 +ENSG00000204348 2483 +ENSG00000204385 3369 +ENSG00000204392 1077 +ENSG00000204394 6081 +ENSG00000204396 3638 +ENSG00000204628 5990 +ENSG00000204805 342 +ENSG00000204859 4503 +ENSG00000204956 6252 +ENSG00000205084 6015 +ENSG00000205208 3730 +ENSG00000205268 7425 +ENSG00000205485 3110 +ENSG00000205560 4102 +ENSG00000205628 3484 +ENSG00000206192 2978 +ENSG00000206560 11279 +ENSG00000210100 69 +ENSG00000210174 65 +ENSG00000211584 4921 +ENSG00000213033 1212 +ENSG00000213062 911 +ENSG00000213066 15159 +ENSG00000213077 2281 +ENSG00000213148 464 +ENSG00000213174 414 +ENSG00000213197 694 +ENSG00000213318 783 +ENSG00000213339 3430 +ENSG00000213493 1451 +ENSG00000213588 3014 +ENSG00000213711 814 +ENSG00000213742 5308 +ENSG00000213760 2147 +ENSG00000213793 551 +ENSG00000213864 676 +ENSG00000213880 797 +ENSG00000213904 4208 +ENSG00000213906 3233 +ENSG00000213917 815 +ENSG00000213971 5091 +ENSG00000214029 15455 +ENSG00000214174 3858 +ENSG00000214389 784 +ENSG00000214617 4479 +ENSG00000214694 5490 +ENSG00000214810 311 +ENSG00000214961 1372 +ENSG00000214975 499 +ENSG00000215286 754 +ENSG00000215333 1283 +ENSG00000216854 553 +ENSG00000216915 1495 +ENSG00000217716 494 +ENSG00000217801 2171 +ENSG00000218965 609 +ENSG00000219553 723 +ENSG00000220131 354 +ENSG00000220157 961 +ENSG00000220483 871 +ENSG00000221843 6199 +ENSG00000221909 2717 +ENSG00000222046 1869 +ENSG00000223382 1326 +ENSG00000223620 1102 +ENSG00000223877 622 +ENSG00000224016 291 +ENSG00000224520 1447 +ENSG00000224578 1377 +ENSG00000224628 1519 +ENSG00000224664 316 +ENSG00000224892 997 +ENSG00000225405 390 +ENSG00000225544 392 +ENSG00000225787 306 +ENSG00000225806 1521 +ENSG00000226067 2075 +ENSG00000226086 822 +ENSG00000226114 361 +ENSG00000226144 454 +ENSG00000226232 1728 +ENSG00000226268 959 +ENSG00000226478 1126 +ENSG00000226703 812 +ENSG00000226752 7181 +ENSG00000226790 1139 +ENSG00000226833 1438 +ENSG00000227006 861 +ENSG00000227057 3115 +ENSG00000227343 600 +ENSG00000227376 552 +ENSG00000227401 284 +ENSG00000227543 3835 +ENSG00000227666 316 +ENSG00000227742 946 +ENSG00000227968 999 +ENSG00000228118 459 +ENSG00000228195 881 +ENSG00000228236 315 +ENSG00000228599 742 +ENSG00000228612 2737 +ENSG00000228981 843 +ENSG00000229044 439 +ENSG00000229344 682 +ENSG00000229503 477 +ENSG00000229956 6794 +ENSG00000230006 8042 +ENSG00000230022 634 +ENSG00000230074 665 +ENSG00000230118 258 +ENSG00000230146 1176 +ENSG00000230243 319 +ENSG00000230295 351 +ENSG00000230406 421 +ENSG00000230531 1798 +ENSG00000230551 8636 +ENSG00000230650 3130 +ENSG00000230667 909 +ENSG00000230863 742 +ENSG00000230869 2418 +ENSG00000230913 744 +ENSG00000231096 390 +ENSG00000231181 559 +ENSG00000231245 402 +ENSG00000231434 2167 +ENSG00000231615 1337 +ENSG00000231711 4947 +ENSG00000231955 1411 +ENSG00000232186 1228 +ENSG00000232581 357 +ENSG00000232676 1124 +ENSG00000232699 736 +ENSG00000232905 946 +ENSG00000232943 400 +ENSG00000233122 2436 +ENSG00000233454 275 +ENSG00000233503 1501 +ENSG00000233602 619 +ENSG00000233836 3242 +ENSG00000233846 487 +ENSG00000234231 2095 +ENSG00000234639 1239 +ENSG00000234722 3487 +ENSG00000234742 555 +ENSG00000234981 792 +ENSG00000235065 475 +ENSG00000235363 225 +ENSG00000235424 288 +ENSG00000235444 618 +ENSG00000235512 292 +ENSG00000235623 574 +ENSG00000235655 411 +ENSG00000235698 1200 +ENSG00000235750 4783 +ENSG00000235847 965 +ENSG00000235859 1234 +ENSG00000235892 1677 +ENSG00000236086 262 +ENSG00000236285 837 +ENSG00000236290 703 +ENSG00000236330 886 +ENSG00000236468 1335 +ENSG00000236570 1227 +ENSG00000236680 1238 +ENSG00000236681 523 +ENSG00000236735 375 +ENSG00000236739 535 +ENSG00000236753 2715 +ENSG00000236801 474 +ENSG00000236824 13458 +ENSG00000236946 1087 +ENSG00000237017 4158 +ENSG00000237033 609 +ENSG00000237054 3194 +ENSG00000237101 1323 +ENSG00000237357 2579 +ENSG00000237517 7448 +ENSG00000237939 652 +ENSG00000237977 563 +ENSG00000238221 500 +ENSG00000238251 514 +ENSG00000239377 420 +ENSG00000239524 400 +ENSG00000239569 736 +ENSG00000239791 1918 +ENSG00000239887 4495 +ENSG00000239926 747 +ENSG00000240005 589 +ENSG00000240392 575 +ENSG00000240418 893 +ENSG00000240540 1183 +ENSG00000240821 579 +ENSG00000241258 3540 +ENSG00000241370 1606 +ENSG00000241494 438 +ENSG00000241680 375 +ENSG00000241697 2611 +ENSG00000241772 1051 +ENSG00000241923 622 +ENSG00000242061 438 +ENSG00000242140 231 +ENSG00000242349 1427 +ENSG00000242600 2616 +ENSG00000242612 4046 +ENSG00000242858 602 +ENSG00000243122 413 +ENSG00000243396 402 +ENSG00000243701 4206 +ENSG00000243779 321 +ENSG00000244171 1291 +ENSG00000244270 403
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/in.fasta Thu Nov 17 16:41:06 2016 -0500 @@ -0,0 +1,2 @@ +>1 +AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGGAAAAAAAAAAAAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGGAAAAAAAAAATTTTT \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/in.gtf Thu Nov 17 16:41:06 2016 -0500 @@ -0,0 +1,6 @@ +1 ensembl_havana gene 1 103 . + . gene_id "ENSG00000162526"; gene_version "4"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; +1 ensembl_havana transcript 1 103 . + . gene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; tag "basic"; transcript_support_level "NA"; +1 ensembl_havana exon 1 103 . + . gene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; exon_number "1"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; exon_id "ENSE00002319515"; exon_version "1"; tag "basic"; transcript_support_level "NA"; +1 ensembl_havana CDS 1 100 . + 0 gene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; exon_number "1"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; protein_id "ENSP00000334393"; protein_version "3"; tag "basic"; transcript_support_level "NA"; +1 ensembl_havana start_codon 1 3 . + 0 gene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; exon_number "1"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; tag "basic"; transcript_support_level "NA"; +1 ensembl_havana stop_codon 101 103 . + 0 gene_id "ENSG00000162526"; gene_version "4"; transcript_id "ENST00000335137"; transcript_version "3"; exon_number "1"; gene_name "OR4F5"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; havana_gene "OTTHUMG00000001094"; havana_gene_version "1"; transcript_name "OR4F5-001"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS30547"; havana_transcript "OTTHUMT00000003223"; havana_transcript_version "1"; tag "basic"; transcript_support_level "NA";
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/length.tab Thu Nov 17 16:41:06 2016 -0500 @@ -0,0 +1,1 @@ +ENSG00000162526 103
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Thu Nov 17 16:41:06 2016 -0500 @@ -0,0 +1,7 @@ +<tables> + <!-- Locations of all fasta files under genome directory --> + <table name="all_fasta" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/all_fasta.loc" /> + </table> +</tables>