# HG changeset patch
# User iuc
# Date 1639408413 0
# Node ID db456c398880a54563078bf72ecba0131846b51a
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit 186b72f369eb2a11d92f4d63cac2e8ebe386b9bd"
diff -r 000000000000 -r db456c398880 homer_scanMotifGenomeWide.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/homer_scanMotifGenomeWide.xml Mon Dec 13 15:13:33 2021 +0000
@@ -0,0 +1,94 @@
+
+
+
+ macros.xml
+
+
+
+
+#if $bed:
+ '${output_bed}'
+#else
+ '${output_text}'
+#end if
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ bed is False
+
+
+ bed is True
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 000000000000 -r db456c398880 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Mon Dec 13 15:13:33 2021 +0000
@@ -0,0 +1,49 @@
+
+ 4.11
+
+ 4.11
+ 2
+
+
+ homer
+
+
+
+
+ homer
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 10.1016/j.molcel.2010.05.004
+
+
+
diff -r 000000000000 -r db456c398880 test-data/CTCF_peaks.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/CTCF_peaks.bed Mon Dec 13 15:13:33 2021 +0000
@@ -0,0 +1,66 @@
+chr10 95489848 95489849 chr10:95489648-95490124 9.37574 + 95489848 95489848 236,28,36
+chr10 95550657 95550658 chr10:95550458-95550913 22.4312 + 95550657 95550657 236,28,36
+chr10 95551883 95551884 chr10:95551647-95552156 9.8358 + 95551883 95551883 236,28,36
+chr10 95557476 95557477 chr10:95557210-95557760 15.8474 + 95557476 95557476 236,28,36
+chr10 95858373 95858374 chr10:95858061-95858620 8.91987 - 95858373 95858373 46,49,145
+chr10 95894941 95894942 chr10:95894657-95895156 14.5144 - 95894941 95894941 46,49,145
+chr10 95898656 95898657 chr10:95898384-95898961 12.5406 + 95898656 95898656 236,28,36
+chr10 96033867 96033868 chr10:96033562-96034101 20.0255 - 96033867 96033867 46,49,145
+chr10 96080634 96080635 chr10:96080279-96080903 6.11901 - 96080634 96080634 46,49,145
+chr10 96105359 96105360 chr10:96104999-96105605 12.8558 - 96105359 96105359 46,49,145
+chr10 96132305 96132306 chr10:96131980-96132540 14.3011 + 96132305 96132305 236,28,36
+chr10 96182451 96182452 chr10:96182221-96182632 9.96549 + 96182451 96182451 236,28,36
+chr10 96214052 96214053 chr10:96213797-96214300 26.3993 + 96214052 96214052 236,28,36
+chr10 96616854 96616855 chr10:96616623-96617093 4.72295 - 96616854 96616854 46,49,145
+chr10 96618518 96618519 chr10:96618332-96618740 7.7811 + 96618518 96618518 236,28,36
+chr10 96934833 96934834 chr10:96934538-96935160 10.7794 - 96934833 96934833 46,49,145
+chr10 97160696 97160697 chr10:97160138-97160734 9.26797 - 97160696 97160696 46,49,145
+chr10 97313143 97313144 chr10:97312750-97313424 16.1617 - 97313143 97313143 46,49,145
+chr10 97340069 97340070 chr10:97339748-97340355 8.23779 - 97340069 97340069 46,49,145
+chr10 97345534 97345535 chr10:97345244-97345845 21.9543 + 97345534 97345534 236,28,36
+chr10 97375992 97375993 chr10:97375737-97376277 17.1427 + 97375992 97375992 236,28,36
+chr10 97670411 97670412 chr10:97670243-97670641 15.0213 - 97670411 97670411 46,49,145
+chr10 97689620 97689621 chr10:97689400-97689842 6.20282 - 97689620 97689620 46,49,145
+chr10 97704766 97704767 chr10:97704497-97705056 12.2858 - 97704766 97704766 46,49,145
+chr10 97712886 97712887 chr10:97712528-97713193 16.5834 + 97712886 97712886 236,28,36
+chr10 97768453 97768454 chr10:97768293-97768873 -1.29559 . 97768453 97768453 0,0,0
+chr2 73745858 73745859 chr2:73745562-73746116 15.583 - 73745858 73745858 46,49,145
+chr2 73792430 73792431 chr2:73792224-73792669 11.3969 + 73792430 73792430 236,28,36
+chr2 73868302 73868303 chr2:73868052-73868532 16.5297 - 73868302 73868302 46,49,145
+chr2 73925250 73925251 chr2:73924997-73925519 6.64944 + 73925250 73925250 236,28,36
+chr2 73947567 73947568 chr2:73947347-73947809 11.7 - 73947567 73947567 46,49,145
+chr2 73965796 73965797 chr2:73965501-73966069 18.8918 + 73965796 73965796 236,28,36
+chr2 73987358 73987359 chr2:73987054-73987519 5.75056 - 73987358 73987358 46,49,145
+chr2 73998748 73998749 chr2:73998449-73999023 4.45417 - 73998748 73998748 46,49,145
+chr2 74027927 74027928 chr2:74027638-74028176 17.1136 + 74027927 74027927 236,28,36
+chr2 74066830 74066831 chr2:74066544-74067106 4.26306 + 74066830 74066830 236,28,36
+chr2 74247701 74247702 chr2:74247519-74247926 19.6437 - 74247701 74247701 46,49,145
+chr2 74300454 74300455 chr2:74300183-74300724 21.3163 + 74300454 74300454 236,28,36
+chr2 74305972 74305973 chr2:74305723-74306142 15.4325 - 74305972 74305972 46,49,145
+chr2 74655531 74655532 chr2:74655214-74655823 11.0865 - 74655531 74655531 46,49,145
+chr2 74661783 74661784 chr2:74661581-74662231 6.32635 - 74661783 74661783 46,49,145
+chr2 74663751 74663752 chr2:74663455-74664030 5.5749 - 74663751 74663751 46,49,145
+chr2 74673182 74673183 chr2:74672881-74673432 11.6759 - 74673182 74673182 46,49,145
+chr2 74686486 74686487 chr2:74686220-74686785 8.18583 + 74686486 74686486 236,28,36
+chr2 74691769 74691770 chr2:74691505-74692018 10.8344 + 74691769 74691769 236,28,36
+chr2 74694233 74694234 chr2:74693923-74694529 6.49186 - 74694233 74694233 46,49,145
+chr2 74702401 74702402 chr2:74702109-74702707 16.3674 + 74702401 74702401 236,28,36
+chr2 74715927 74715928 chr2:74715602-74716472 18.4944 + 74715927 74715927 236,28,36
+chr2 74964543 74964544 chr2:74964211-74964829 19.6671 - 74964543 74964543 46,49,145
+chr2 74977548 74977549 chr2:74977360-74977813 6.64332 + 74977548 74977548 236,28,36
+chr2 75055369 75055370 chr2:75055077-75055687 25.0987 - 75055369 75055369 46,49,145
+chr2 75138095 75138096 chr2:75137844-75138351 14.9999 - 75138095 75138095 46,49,145
+chr2 75151177 75151178 chr2:75150910-75151415 21.6232 - 75151177 75151177 46,49,145
+chr2 75152696 75152697 chr2:75152384-75152964 19.3701 - 75152696 75152696 46,49,145
+chr2 75208425 75208426 chr2:75208163-75208613 20.9151 - 75208425 75208425 46,49,145
+chr2 75312439 75312440 chr2:75312148-75312690 12.976 - 75312439 75312439 46,49,145
+chr2 75318398 75318399 chr2:75318086-75318603 15.8984 - 75318398 75318398 46,49,145
+chr2 75515510 75515511 chr2:75515254-75515778 6.45021 - 75515510 75515510 46,49,145
+chr2 75563785 75563786 chr2:75563556-75564002 6.81984 - 75563785 75563785 46,49,145
+chr2 75588782 75588783 chr2:75588511-75588998 13.9441 - 75588782 75588782 46,49,145
+chr2 75590417 75590418 chr2:75590116-75590691 15.844 - 75590417 75590417 46,49,145
+chr2 75603329 75603330 chr2:75603091-75603554 11.5679 - 75603329 75603329 46,49,145
+chr2 75605891 75605892 chr2:75605600-75606199 11.5138 + 75605891 75605891 236,28,36
+chr2 75658764 75658765 chr2:75658517-75658909 0.26272 + 75658764 75658764 236,28,36
+chr2 75781339 75781340 chr2:75781071-75781581 10.7504 - 75781339 75781339 46,49,145
+chr2 75786285 75786286 chr2:75786025-75786608 17.3776 + 75786285 75786285 236,28,36
diff -r 000000000000 -r db456c398880 test-data/CTCF_peaks_first.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/CTCF_peaks_first.txt Mon Dec 13 15:13:33 2021 +0000
@@ -0,0 +1,67 @@
+PeakID (cmd=annotatePeaks.pl test-data/CTCF_peaks.bed none -gtf test-data/small_simplified.gtf -ann test-data/annotations_default_simplified.txt) Chr Start End Strand Peak Score Focus Ratio/Region Size Annotation Detailed Annotation Distance to TSS Nearest PromoterID Entrez ID Nearest Unigene Nearest Refseq Nearest Ensembl Gene Name Gene Alias Gene Description Gene Type
+chr10:96213797-96214300 chr10 96214053 96214053 + 26.3993 NA NA NA NA NA
+chr2:75055077-75055687 chr2 75055370 75055370 - 25.0987 NA Intergenic NA -339374 ENSMUST00000145799 ENSMUSG00000052371 ENSMUST00000145799 Hoxd3os1 antisense
+chr10:95550458-95550913 chr10 95550658 95550658 + 22.4312 NA NA NA NA NA
+chr10:97345244-97345845 chr10 97345535 97345535 + 21.9543 NA NA NA NA NA
+chr2:75150910-75151415 chr2 75151178 75151178 - 21.6232 NA Intergenic NA -435182 ENSMUST00000145799 ENSMUSG00000052371 ENSMUST00000145799 Hoxd3os1 antisense
+chr2:74300183-74300724 chr2 74300455 74300455 + 21.3163 NA Intergenic NA -367855 ENSMUST00000001872 ENSMUSG00000001819 ENSMUST00000001872 Hoxd13 protein_coding
+chr2:75208163-75208613 chr2 75208426 75208426 - 20.9151 NA Intergenic NA -492430 ENSMUST00000145799 ENSMUSG00000052371 ENSMUST00000145799 Hoxd3os1 antisense
+chr10:96033562-96034101 chr10 96033868 96033868 - 20.0255 NA NA NA NA NA
+chr2:74964211-74964829 chr2 74964544 74964544 - 19.6671 NA Intergenic NA -248548 ENSMUST00000145799 ENSMUSG00000052371 ENSMUST00000145799 Hoxd3os1 antisense
+chr2:74247519-74247926 chr2 74247702 74247702 - 19.6437 NA Intergenic NA -420608 ENSMUST00000001872 ENSMUSG00000001819 ENSMUST00000001872 Hoxd13 protein_coding
+chr2:75152384-75152964 chr2 75152697 75152697 - 19.3701 NA Intergenic NA -436701 ENSMUST00000145799 ENSMUSG00000052371 ENSMUST00000145799 Hoxd3os1 antisense
+chr2:73965501-73966069 chr2 73965797 73965797 + 18.8918 NA Intergenic NA -702513 ENSMUST00000001872 ENSMUSG00000001819 ENSMUST00000001872 Hoxd13 protein_coding
+chr2:74715602-74716472 chr2 74715928 74715928 + 18.4944 NA promoter-TSS (ENSMUST00000145799) NA 68 ENSMUST00000145799 ENSMUSG00000052371 ENSMUST00000145799 Hoxd3os1 antisense
+chr2:75786025-75786608 chr2 75786286 75786286 + 17.3776 NA Intergenic NA -1070290 ENSMUST00000145799 ENSMUSG00000052371 ENSMUST00000145799 Hoxd3os1 antisense
+chr10:97375737-97376277 chr10 97375993 97375993 + 17.1427 NA NA NA NA NA
+chr2:74027638-74028176 chr2 74027928 74027928 + 17.1136 NA Intergenic NA -640382 ENSMUST00000001872 ENSMUSG00000001819 ENSMUST00000001872 Hoxd13 protein_coding
+chr10:97712528-97713193 chr10 97712887 97712887 + 16.5834 NA NA NA NA NA
+chr2:73868052-73868532 chr2 73868303 73868303 - 16.5297 NA Intergenic NA -800007 ENSMUST00000001872 ENSMUSG00000001819 ENSMUST00000001872 Hoxd13 protein_coding
+chr2:74702109-74702707 chr2 74702402 74702402 + 16.3674 NA Intergenic NA -1260 ENSMUST00000198895 ENSMUSG00000104570 ENSMUST00000198895 Gm44463 miRNA
+chr10:97312750-97313424 chr10 97313144 97313144 - 16.1617 NA NA NA NA NA
+chr2:75318086-75318603 chr2 75318399 75318399 - 15.8984 NA Intergenic NA -602403 ENSMUST00000145799 ENSMUSG00000052371 ENSMUST00000145799 Hoxd3os1 antisense
+chr10:95557210-95557760 chr10 95557477 95557477 + 15.8474 NA NA NA NA NA
+chr2:75590116-75590691 chr2 75590418 75590418 - 15.844 NA Intergenic NA -874422 ENSMUST00000145799 ENSMUSG00000052371 ENSMUST00000145799 Hoxd3os1 antisense
+chr2:73745562-73746116 chr2 73745859 73745859 - 15.583 NA Intergenic NA -922451 ENSMUST00000001872 ENSMUSG00000001819 ENSMUST00000001872 Hoxd13 protein_coding
+chr2:74305723-74306142 chr2 74305973 74305973 - 15.4325 NA Intergenic NA -362337 ENSMUST00000001872 ENSMUSG00000001819 ENSMUST00000001872 Hoxd13 protein_coding
+chr10:97670243-97670641 chr10 97670412 97670412 - 15.0213 NA NA NA NA NA
+chr2:75137844-75138351 chr2 75138096 75138096 - 14.9999 NA Intergenic NA -422100 ENSMUST00000145799 ENSMUSG00000052371 ENSMUST00000145799 Hoxd3os1 antisense
+chr10:95894657-95895156 chr10 95894942 95894942 - 14.5144 NA NA NA NA NA
+chr10:96131980-96132540 chr10 96132306 96132306 + 14.3011 NA NA NA NA NA
+chr2:75588511-75588998 chr2 75588783 75588783 - 13.9441 NA Intergenic NA -872787 ENSMUST00000145799 ENSMUSG00000052371 ENSMUST00000145799 Hoxd3os1 antisense
+chr2:75312148-75312690 chr2 75312440 75312440 - 12.976 NA Intergenic NA -596444 ENSMUST00000145799 ENSMUSG00000052371 ENSMUST00000145799 Hoxd3os1 antisense
+chr10:96104999-96105605 chr10 96105360 96105360 - 12.8558 NA NA NA NA NA
+chr10:95898384-95898961 chr10 95898657 95898657 + 12.5406 NA NA NA NA NA
+chr10:97704497-97705056 chr10 97704767 97704767 - 12.2858 NA NA NA NA NA
+chr2:73947347-73947809 chr2 73947568 73947568 - 11.7 NA Intergenic NA -720742 ENSMUST00000001872 ENSMUSG00000001819 ENSMUST00000001872 Hoxd13 protein_coding
+chr2:74672881-74673432 chr2 74673183 74673183 - 11.6759 NA Intergenic NA -1830 ENSMUST00000001878 ENSMUSG00000001823 ENSMUST00000001878 Hoxd12 protein_coding
+chr2:75603091-75603554 chr2 75603330 75603330 - 11.5679 NA Intergenic NA -887334 ENSMUST00000145799 ENSMUSG00000052371 ENSMUST00000145799 Hoxd3os1 antisense
+chr2:75605600-75606199 chr2 75605892 75605892 + 11.5138 NA Intergenic NA -889896 ENSMUST00000145799 ENSMUSG00000052371 ENSMUST00000145799 Hoxd3os1 antisense
+chr2:73792224-73792669 chr2 73792431 73792431 + 11.3969 NA Intergenic NA -875879 ENSMUST00000001872 ENSMUSG00000001819 ENSMUST00000001872 Hoxd13 protein_coding
+chr2:74655214-74655823 chr2 74655532 74655532 - 11.0865 NA Intergenic NA -12778 ENSMUST00000001872 ENSMUSG00000001819 ENSMUST00000001872 Hoxd13 protein_coding
+chr2:74691505-74692018 chr2 74691770 74691770 + 10.8344 NA promoter-TSS (ENSMUST00000061745) NA -154 ENSMUST00000061745 ENSMUSG00000050368 ENSMUST00000061745 Hoxd10 protein_coding
+chr10:96934538-96935160 chr10 96934834 96934834 - 10.7794 NA NA NA NA NA
+chr2:75781071-75781581 chr2 75781340 75781340 - 10.7504 NA Intergenic NA -1065344 ENSMUST00000145799 ENSMUSG00000052371 ENSMUST00000145799 Hoxd3os1 antisense
+chr10:96182221-96182632 chr10 96182452 96182452 + 9.96549 NA NA NA NA NA
+chr10:95551647-95552156 chr10 95551884 95551884 + 9.8358 NA NA NA NA NA
+chr10:95489648-95490124 chr10 95489849 95489849 + 9.37574 NA NA NA NA NA
+chr10:97160138-97160734 chr10 97160697 97160697 - 9.26797 NA NA NA NA NA
+chr10:95858061-95858620 chr10 95858374 95858374 - 8.91987 NA NA NA NA NA
+chr10:97339748-97340355 chr10 97340070 97340070 - 8.23779 NA NA NA NA NA
+chr2:74686220-74686785 chr2 74686487 74686487 + 8.18583 NA exon (ENSMUST00000048086, exon 2 of 2) NA 3041 ENSMUST00000136302 ENSMUSG00000099521 ENSMUST00000136302 Gm28309 processed_transcript
+chr10:96618332-96618740 chr10 96618519 96618519 + 7.7811 NA NA NA NA NA
+chr2:75563556-75564002 chr2 75563786 75563786 - 6.81984 NA Intergenic NA -847790 ENSMUST00000145799 ENSMUSG00000052371 ENSMUST00000145799 Hoxd3os1 antisense
+chr2:73924997-73925519 chr2 73925251 73925251 + 6.64944 NA Intergenic NA -743059 ENSMUST00000001872 ENSMUSG00000001819 ENSMUST00000001872 Hoxd13 protein_coding
+chr2:74977360-74977813 chr2 74977549 74977549 + 6.64332 NA Intergenic NA -261553 ENSMUST00000145799 ENSMUSG00000052371 ENSMUST00000145799 Hoxd3os1 antisense
+chr2:74693923-74694529 chr2 74694234 74694234 - 6.49186 NA TTS (ENSMUST00000136302) NA -1286 ENSMUST00000126966 ENSMUSG00000086077 ENSMUST00000126966 Gm14396 lincRNA
+chr2:75515254-75515778 chr2 75515511 75515511 - 6.45021 NA Intergenic NA -799515 ENSMUST00000145799 ENSMUSG00000052371 ENSMUST00000145799 Hoxd3os1 antisense
+chr2:74661581-74662231 chr2 74661784 74661784 - 6.32635 NA Intergenic NA -6526 ENSMUST00000001872 ENSMUSG00000001819 ENSMUST00000001872 Hoxd13 protein_coding
+chr10:97689400-97689842 chr10 97689621 97689621 - 6.20282 NA NA NA NA NA
+chr10:96080279-96080903 chr10 96080635 96080635 - 6.11901 NA NA NA NA NA
+chr2:73987054-73987519 chr2 73987359 73987359 - 5.75056 NA Intergenic NA -680951 ENSMUST00000001872 ENSMUSG00000001819 ENSMUST00000001872 Hoxd13 protein_coding
+chr2:74663455-74664030 chr2 74663752 74663752 - 5.5749 NA Intergenic NA -4558 ENSMUST00000001872 ENSMUSG00000001819 ENSMUST00000001872 Hoxd13 protein_coding
+chr10:96616623-96617093 chr10 96616855 96616855 - 4.72295 NA NA NA NA NA
+chr2:73998449-73999023 chr2 73998749 73998749 - 4.45417 NA Intergenic NA -669561 ENSMUST00000001872 ENSMUSG00000001819 ENSMUST00000001872 Hoxd13 protein_coding
+chr2:74066544-74067106 chr2 74066831 74066831 + 4.26306 NA Intergenic NA -601479 ENSMUST00000001872 ENSMUSG00000001819 ENSMUST00000001872 Hoxd13 protein_coding
+chr2:75658517-75658909 chr2 75658765 75658765 + 0.26272 NA Intergenic NA -942769 ENSMUST00000145799 ENSMUSG00000052371 ENSMUST00000145799 Hoxd3os1 antisense
+chr10:97768293-97768873 chr10 97768454 97768454 + -1.29559 NA NA NA NA NA
diff -r 000000000000 -r db456c398880 test-data/CTCF_peaks_second.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/CTCF_peaks_second.txt Mon Dec 13 15:13:33 2021 +0000
@@ -0,0 +1,67 @@
+PeakID (cmd=annotatePeaks.pl test-data/CTCF_peaks.bed none -ann test-data/annotations_default.txt) Chr Start End Strand Peak Score Focus Ratio/Region Size Annotation Detailed Annotation Distance to TSS Nearest PromoterID Entrez ID Nearest Unigene Nearest Refseq Nearest Ensembl Gene Name Gene Alias Gene Description Gene Type
+chr10:96213797-96214300 chr10 96214053 96214053 + 26.3993 NA NA NA NA NA
+chr2:75055077-75055687 chr2 75055370 75055370 - 25.0987 NA Intergenic NA NA NA
+chr10:95550458-95550913 chr10 95550658 95550658 + 22.4312 NA NA NA NA NA
+chr10:97345244-97345845 chr10 97345535 97345535 + 21.9543 NA NA NA NA NA
+chr2:75150910-75151415 chr2 75151178 75151178 - 21.6232 NA Intergenic NA NA NA
+chr2:74300183-74300724 chr2 74300455 74300455 + 21.3163 NA Intergenic NA NA NA
+chr2:75208163-75208613 chr2 75208426 75208426 - 20.9151 NA Intergenic NA NA NA
+chr10:96033562-96034101 chr10 96033868 96033868 - 20.0255 NA NA NA NA NA
+chr2:74964211-74964829 chr2 74964544 74964544 - 19.6671 NA Intergenic NA NA NA
+chr2:74247519-74247926 chr2 74247702 74247702 - 19.6437 NA Intergenic NA NA NA
+chr2:75152384-75152964 chr2 75152697 75152697 - 19.3701 NA Intergenic NA NA NA
+chr2:73965501-73966069 chr2 73965797 73965797 + 18.8918 NA Intergenic NA NA NA
+chr2:74715602-74716472 chr2 74715928 74715928 + 18.4944 NA promoter-TSS (ENSMUST00000145799) NA NA NA
+chr2:75786025-75786608 chr2 75786286 75786286 + 17.3776 NA Intergenic NA NA NA
+chr10:97375737-97376277 chr10 97375993 97375993 + 17.1427 NA NA NA NA NA
+chr2:74027638-74028176 chr2 74027928 74027928 + 17.1136 NA Intergenic NA NA NA
+chr10:97712528-97713193 chr10 97712887 97712887 + 16.5834 NA NA NA NA NA
+chr2:73868052-73868532 chr2 73868303 73868303 - 16.5297 NA Intergenic NA NA NA
+chr2:74702109-74702707 chr2 74702402 74702402 + 16.3674 NA TTS (ENSMUST00000132326) NA NA NA
+chr10:97312750-97313424 chr10 97313144 97313144 - 16.1617 NA NA NA NA NA
+chr2:75318086-75318603 chr2 75318399 75318399 - 15.8984 NA Intergenic NA NA NA
+chr10:95557210-95557760 chr10 95557477 95557477 + 15.8474 NA NA NA NA NA
+chr2:75590116-75590691 chr2 75590418 75590418 - 15.844 NA Intergenic NA NA NA
+chr2:73745562-73746116 chr2 73745859 73745859 - 15.583 NA Intergenic NA NA NA
+chr2:74305723-74306142 chr2 74305973 74305973 - 15.4325 NA Intergenic NA NA NA
+chr10:97670243-97670641 chr10 97670412 97670412 - 15.0213 NA NA NA NA NA
+chr2:75137844-75138351 chr2 75138096 75138096 - 14.9999 NA Intergenic NA NA NA
+chr10:95894657-95895156 chr10 95894942 95894942 - 14.5144 NA NA NA NA NA
+chr10:96131980-96132540 chr10 96132306 96132306 + 14.3011 NA NA NA NA NA
+chr2:75588511-75588998 chr2 75588783 75588783 - 13.9441 NA Intergenic NA NA NA
+chr2:75312148-75312690 chr2 75312440 75312440 - 12.976 NA Intergenic NA NA NA
+chr10:96104999-96105605 chr10 96105360 96105360 - 12.8558 NA NA NA NA NA
+chr10:95898384-95898961 chr10 95898657 95898657 + 12.5406 NA NA NA NA NA
+chr10:97704497-97705056 chr10 97704767 97704767 - 12.2858 NA NA NA NA NA
+chr2:73947347-73947809 chr2 73947568 73947568 - 11.7 NA Intergenic NA NA NA
+chr2:74672881-74673432 chr2 74673183 74673183 - 11.6759 NA Intergenic NA NA NA
+chr2:75603091-75603554 chr2 75603330 75603330 - 11.5679 NA Intergenic NA NA NA
+chr2:75605600-75606199 chr2 75605892 75605892 + 11.5138 NA Intergenic NA NA NA
+chr2:73792224-73792669 chr2 73792431 73792431 + 11.3969 NA Intergenic NA NA NA
+chr2:74655214-74655823 chr2 74655532 74655532 - 11.0865 NA Intergenic NA NA NA
+chr2:74691505-74692018 chr2 74691770 74691770 + 10.8344 NA promoter-TSS (ENSMUST00000061745) NA NA NA
+chr10:96934538-96935160 chr10 96934834 96934834 - 10.7794 NA NA NA NA NA
+chr2:75781071-75781581 chr2 75781340 75781340 - 10.7504 NA Intergenic NA NA NA
+chr10:96182221-96182632 chr10 96182452 96182452 + 9.96549 NA NA NA NA NA
+chr10:95551647-95552156 chr10 95551884 95551884 + 9.8358 NA NA NA NA NA
+chr10:95489648-95490124 chr10 95489849 95489849 + 9.37574 NA NA NA NA NA
+chr10:97160138-97160734 chr10 97160697 97160697 - 9.26797 NA NA NA NA NA
+chr10:95858061-95858620 chr10 95858374 95858374 - 8.91987 NA NA NA NA NA
+chr10:97339748-97340355 chr10 97340070 97340070 - 8.23779 NA NA NA NA NA
+chr2:74686220-74686785 chr2 74686487 74686487 + 8.18583 NA exon (ENSMUST00000048086, exon 2 of 2) NA NA NA
+chr10:96618332-96618740 chr10 96618519 96618519 + 7.7811 NA NA NA NA NA
+chr2:75563556-75564002 chr2 75563786 75563786 - 6.81984 NA Intergenic NA NA NA
+chr2:73924997-73925519 chr2 73925251 73925251 + 6.64944 NA Intergenic NA NA NA
+chr2:74977360-74977813 chr2 74977549 74977549 + 6.64332 NA Intergenic NA NA NA
+chr2:74693923-74694529 chr2 74694234 74694234 - 6.49186 NA TTS (ENSMUST00000136302) NA NA NA
+chr2:75515254-75515778 chr2 75515511 75515511 - 6.45021 NA Intergenic NA NA NA
+chr2:74661581-74662231 chr2 74661784 74661784 - 6.32635 NA Intergenic NA NA NA
+chr10:97689400-97689842 chr10 97689621 97689621 - 6.20282 NA NA NA NA NA
+chr10:96080279-96080903 chr10 96080635 96080635 - 6.11901 NA NA NA NA NA
+chr2:73987054-73987519 chr2 73987359 73987359 - 5.75056 NA Intergenic NA NA NA
+chr2:74663455-74664030 chr2 74663752 74663752 - 5.5749 NA Intergenic NA NA NA
+chr10:96616623-96617093 chr10 96616855 96616855 - 4.72295 NA NA NA NA NA
+chr2:73998449-73999023 chr2 73998749 73998749 - 4.45417 NA Intergenic NA NA NA
+chr2:74066544-74067106 chr2 74066831 74066831 + 4.26306 NA Intergenic NA NA NA
+chr2:75658517-75658909 chr2 75658765 75658765 + 0.26272 NA Intergenic NA NA NA
+chr10:97768293-97768873 chr10 97768454 97768454 + -1.29559 NA NA NA NA NA
diff -r 000000000000 -r db456c398880 test-data/CTCF_peaks_shifted.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/CTCF_peaks_shifted.bed Mon Dec 13 15:13:33 2021 +0000
@@ -0,0 +1,24 @@
+mm10_dna 5858 5859 chr2:73745562-73746116 15.583 - 73745858 73745858 46,49,145
+mm10_dna 52430 52431 chr2:73792224-73792669 11.3969 + 73792430 73792430 236,28,36
+mm10_dna 128302 128303 chr2:73868052-73868532 16.5297 - 73868302 73868302 46,49,145
+mm10_dna 185250 185251 chr2:73924997-73925519 6.64944 + 73925250 73925250 236,28,36
+mm10_dna 207567 207568 chr2:73947347-73947809 11.7 - 73947567 73947567 46,49,145
+mm10_dna 225796 225797 chr2:73965501-73966069 18.8918 + 73965796 73965796 236,28,36
+mm10_dna 247358 247359 chr2:73987054-73987519 5.75056 - 73987358 73987358 46,49,145
+mm10_dna 258748 258749 chr2:73998449-73999023 4.45417 - 73998748 73998748 46,49,145
+mm10_dna 287927 287928 chr2:74027638-74028176 17.1136 + 74027927 74027927 236,28,36
+mm10_dna 326830 326831 chr2:74066544-74067106 4.26306 + 74066830 74066830 236,28,36
+mm10_dna 507701 507702 chr2:74247519-74247926 19.6437 - 74247701 74247701 46,49,145
+mm10_dna 560454 560455 chr2:74300183-74300724 21.3163 + 74300454 74300454 236,28,36
+mm10_dna 565972 565973 chr2:74305723-74306142 15.4325 - 74305972 74305972 46,49,145
+mm10_dna 915531 915532 chr2:74655214-74655823 11.0865 - 74655531 74655531 46,49,145
+mm10_dna 921783 921784 chr2:74661581-74662231 6.32635 - 74661783 74661783 46,49,145
+mm10_dna 923751 923752 chr2:74663455-74664030 5.5749 - 74663751 74663751 46,49,145
+mm10_dna 933182 933183 chr2:74672881-74673432 11.6759 - 74673182 74673182 46,49,145
+mm10_dna 946486 946487 chr2:74686220-74686785 8.18583 + 74686486 74686486 236,28,36
+mm10_dna 951769 951770 chr2:74691505-74692018 10.8344 + 74691769 74691769 236,28,36
+mm10_dna 954233 954234 chr2:74693923-74694529 6.49186 - 74694233 74694233 46,49,145
+mm10_dna 962401 962402 chr2:74702109-74702707 16.3674 + 74702401 74702401 236,28,36
+mm10_dna 975927 975928 chr2:74715602-74716472 18.4944 + 74715927 74715927 236,28,36
+mm10_dna 1224543 1224544 chr2:74964211-74964829 19.6671 - 74964543 74964543 46,49,145
+mm10_dna 1237548 1237549 chr2:74977360-74977813 6.64332 + 74977548 74977548 236,28,36
diff -r 000000000000 -r db456c398880 test-data/CTCF_peaks_third.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/CTCF_peaks_third.txt Mon Dec 13 15:13:33 2021 +0000
@@ -0,0 +1,67 @@
+PeakID (cmd=annotatePeaks.pl test-data/CTCF_peaks.bed none -gtf test-data/small_simplified.gtf) Chr Start End Strand Peak Score Focus Ratio/Region Size Annotation Detailed Annotation Distance to TSS Nearest PromoterID Entrez ID Nearest Unigene Nearest Refseq Nearest Ensembl Gene Name Gene Alias Gene Description Gene Type
+chr10:96213797-96214300 chr10 96214053 96214053 + 26.3993 NA NA NA NA NA
+chr2:75055077-75055687 chr2 75055370 75055370 - 25.0987 NA Intergenic Intergenic -339374 ENSMUST00000145799 ENSMUSG00000052371 ENSMUST00000145799 Hoxd3os1 antisense
+chr10:95550458-95550913 chr10 95550658 95550658 + 22.4312 NA NA NA NA NA
+chr10:97345244-97345845 chr10 97345535 97345535 + 21.9543 NA NA NA NA NA
+chr2:75150910-75151415 chr2 75151178 75151178 - 21.6232 NA Intergenic Intergenic -435182 ENSMUST00000145799 ENSMUSG00000052371 ENSMUST00000145799 Hoxd3os1 antisense
+chr2:74300183-74300724 chr2 74300455 74300455 + 21.3163 NA Intergenic Intergenic -367855 ENSMUST00000001872 ENSMUSG00000001819 ENSMUST00000001872 Hoxd13 protein_coding
+chr2:75208163-75208613 chr2 75208426 75208426 - 20.9151 NA Intergenic Intergenic -492430 ENSMUST00000145799 ENSMUSG00000052371 ENSMUST00000145799 Hoxd3os1 antisense
+chr10:96033562-96034101 chr10 96033868 96033868 - 20.0255 NA NA NA NA NA
+chr2:74964211-74964829 chr2 74964544 74964544 - 19.6671 NA Intergenic Intergenic -248548 ENSMUST00000145799 ENSMUSG00000052371 ENSMUST00000145799 Hoxd3os1 antisense
+chr2:74247519-74247926 chr2 74247702 74247702 - 19.6437 NA Intergenic Intergenic -420608 ENSMUST00000001872 ENSMUSG00000001819 ENSMUST00000001872 Hoxd13 protein_coding
+chr2:75152384-75152964 chr2 75152697 75152697 - 19.3701 NA Intergenic Intergenic -436701 ENSMUST00000145799 ENSMUSG00000052371 ENSMUST00000145799 Hoxd3os1 antisense
+chr2:73965501-73966069 chr2 73965797 73965797 + 18.8918 NA Intergenic Intergenic -702513 ENSMUST00000001872 ENSMUSG00000001819 ENSMUST00000001872 Hoxd13 protein_coding
+chr2:74715602-74716472 chr2 74715928 74715928 + 18.4944 NA promoter-TSS (ENSMUST00000145799) antisense-promoter-TSS (ENSMUST00000145799) 68 ENSMUST00000145799 ENSMUSG00000052371 ENSMUST00000145799 Hoxd3os1 antisense
+chr2:75786025-75786608 chr2 75786286 75786286 + 17.3776 NA Intergenic Intergenic -1070290 ENSMUST00000145799 ENSMUSG00000052371 ENSMUST00000145799 Hoxd3os1 antisense
+chr10:97375737-97376277 chr10 97375993 97375993 + 17.1427 NA NA NA NA NA
+chr2:74027638-74028176 chr2 74027928 74027928 + 17.1136 NA Intergenic Intergenic -640382 ENSMUST00000001872 ENSMUSG00000001819 ENSMUST00000001872 Hoxd13 protein_coding
+chr10:97712528-97713193 chr10 97712887 97712887 + 16.5834 NA NA NA NA NA
+chr2:73868052-73868532 chr2 73868303 73868303 - 16.5297 NA Intergenic Intergenic -800007 ENSMUST00000001872 ENSMUSG00000001819 ENSMUST00000001872 Hoxd13 protein_coding
+chr2:74702109-74702707 chr2 74702402 74702402 + 16.3674 NA Intergenic Intergenic -1260 ENSMUST00000198895 ENSMUSG00000104570 ENSMUST00000198895 Gm44463 miRNA
+chr10:97312750-97313424 chr10 97313144 97313144 - 16.1617 NA NA NA NA NA
+chr2:75318086-75318603 chr2 75318399 75318399 - 15.8984 NA Intergenic Intergenic -602403 ENSMUST00000145799 ENSMUSG00000052371 ENSMUST00000145799 Hoxd3os1 antisense
+chr10:95557210-95557760 chr10 95557477 95557477 + 15.8474 NA NA NA NA NA
+chr2:75590116-75590691 chr2 75590418 75590418 - 15.844 NA Intergenic Intergenic -874422 ENSMUST00000145799 ENSMUSG00000052371 ENSMUST00000145799 Hoxd3os1 antisense
+chr2:73745562-73746116 chr2 73745859 73745859 - 15.583 NA Intergenic Intergenic -922451 ENSMUST00000001872 ENSMUSG00000001819 ENSMUST00000001872 Hoxd13 protein_coding
+chr2:74305723-74306142 chr2 74305973 74305973 - 15.4325 NA Intergenic Intergenic -362337 ENSMUST00000001872 ENSMUSG00000001819 ENSMUST00000001872 Hoxd13 protein_coding
+chr10:97670243-97670641 chr10 97670412 97670412 - 15.0213 NA NA NA NA NA
+chr2:75137844-75138351 chr2 75138096 75138096 - 14.9999 NA Intergenic Intergenic -422100 ENSMUST00000145799 ENSMUSG00000052371 ENSMUST00000145799 Hoxd3os1 antisense
+chr10:95894657-95895156 chr10 95894942 95894942 - 14.5144 NA NA NA NA NA
+chr10:96131980-96132540 chr10 96132306 96132306 + 14.3011 NA NA NA NA NA
+chr2:75588511-75588998 chr2 75588783 75588783 - 13.9441 NA Intergenic Intergenic -872787 ENSMUST00000145799 ENSMUSG00000052371 ENSMUST00000145799 Hoxd3os1 antisense
+chr2:75312148-75312690 chr2 75312440 75312440 - 12.976 NA Intergenic Intergenic -596444 ENSMUST00000145799 ENSMUSG00000052371 ENSMUST00000145799 Hoxd3os1 antisense
+chr10:96104999-96105605 chr10 96105360 96105360 - 12.8558 NA NA NA NA NA
+chr10:95898384-95898961 chr10 95898657 95898657 + 12.5406 NA NA NA NA NA
+chr10:97704497-97705056 chr10 97704767 97704767 - 12.2858 NA NA NA NA NA
+chr2:73947347-73947809 chr2 73947568 73947568 - 11.7 NA Intergenic Intergenic -720742 ENSMUST00000001872 ENSMUSG00000001819 ENSMUST00000001872 Hoxd13 protein_coding
+chr2:74672881-74673432 chr2 74673183 74673183 - 11.6759 NA Intergenic Intergenic -1830 ENSMUST00000001878 ENSMUSG00000001823 ENSMUST00000001878 Hoxd12 protein_coding
+chr2:75603091-75603554 chr2 75603330 75603330 - 11.5679 NA Intergenic Intergenic -887334 ENSMUST00000145799 ENSMUSG00000052371 ENSMUST00000145799 Hoxd3os1 antisense
+chr2:75605600-75606199 chr2 75605892 75605892 + 11.5138 NA Intergenic Intergenic -889896 ENSMUST00000145799 ENSMUSG00000052371 ENSMUST00000145799 Hoxd3os1 antisense
+chr2:73792224-73792669 chr2 73792431 73792431 + 11.3969 NA Intergenic Intergenic -875879 ENSMUST00000001872 ENSMUSG00000001819 ENSMUST00000001872 Hoxd13 protein_coding
+chr2:74655214-74655823 chr2 74655532 74655532 - 11.0865 NA Intergenic Intergenic -12778 ENSMUST00000001872 ENSMUSG00000001819 ENSMUST00000001872 Hoxd13 protein_coding
+chr2:74691505-74692018 chr2 74691770 74691770 + 10.8344 NA promoter-TSS (ENSMUST00000061745) protein_coding-promoter-TSS (ENSMUST00000061745) -154 ENSMUST00000061745 ENSMUSG00000050368 ENSMUST00000061745 Hoxd10 protein_coding
+chr10:96934538-96935160 chr10 96934834 96934834 - 10.7794 NA NA NA NA NA
+chr2:75781071-75781581 chr2 75781340 75781340 - 10.7504 NA Intergenic Intergenic -1065344 ENSMUST00000145799 ENSMUSG00000052371 ENSMUST00000145799 Hoxd3os1 antisense
+chr10:96182221-96182632 chr10 96182452 96182452 + 9.96549 NA NA NA NA NA
+chr10:95551647-95552156 chr10 95551884 95551884 + 9.8358 NA NA NA NA NA
+chr10:95489648-95490124 chr10 95489849 95489849 + 9.37574 NA NA NA NA NA
+chr10:97160138-97160734 chr10 97160697 97160697 - 9.26797 NA NA NA NA NA
+chr10:95858061-95858620 chr10 95858374 95858374 - 8.91987 NA NA NA NA NA
+chr10:97339748-97340355 chr10 97340070 97340070 - 8.23779 NA NA NA NA NA
+chr2:74686220-74686785 chr2 74686487 74686487 + 8.18583 NA exon (ENSMUST00000048086, exon 2 of 2) protein_coding-exon (ENSMUST00000048086, exon 2 of 2) 3041 ENSMUST00000136302 ENSMUSG00000099521 ENSMUST00000136302 Gm28309 processed_transcript
+chr10:96618332-96618740 chr10 96618519 96618519 + 7.7811 NA NA NA NA NA
+chr2:75563556-75564002 chr2 75563786 75563786 - 6.81984 NA Intergenic Intergenic -847790 ENSMUST00000145799 ENSMUSG00000052371 ENSMUST00000145799 Hoxd3os1 antisense
+chr2:73924997-73925519 chr2 73925251 73925251 + 6.64944 NA Intergenic Intergenic -743059 ENSMUST00000001872 ENSMUSG00000001819 ENSMUST00000001872 Hoxd13 protein_coding
+chr2:74977360-74977813 chr2 74977549 74977549 + 6.64332 NA Intergenic Intergenic -261553 ENSMUST00000145799 ENSMUSG00000052371 ENSMUST00000145799 Hoxd3os1 antisense
+chr2:74693923-74694529 chr2 74694234 74694234 - 6.49186 NA TTS (ENSMUST00000136302) processed_transcript-TTS (ENSMUST00000136302) -1286 ENSMUST00000126966 ENSMUSG00000086077 ENSMUST00000126966 Gm14396 lincRNA
+chr2:75515254-75515778 chr2 75515511 75515511 - 6.45021 NA Intergenic Intergenic -799515 ENSMUST00000145799 ENSMUSG00000052371 ENSMUST00000145799 Hoxd3os1 antisense
+chr2:74661581-74662231 chr2 74661784 74661784 - 6.32635 NA Intergenic Intergenic -6526 ENSMUST00000001872 ENSMUSG00000001819 ENSMUST00000001872 Hoxd13 protein_coding
+chr10:97689400-97689842 chr10 97689621 97689621 - 6.20282 NA NA NA NA NA
+chr10:96080279-96080903 chr10 96080635 96080635 - 6.11901 NA NA NA NA NA
+chr2:73987054-73987519 chr2 73987359 73987359 - 5.75056 NA Intergenic Intergenic -680951 ENSMUST00000001872 ENSMUSG00000001819 ENSMUST00000001872 Hoxd13 protein_coding
+chr2:74663455-74664030 chr2 74663752 74663752 - 5.5749 NA Intergenic Intergenic -4558 ENSMUST00000001872 ENSMUSG00000001819 ENSMUST00000001872 Hoxd13 protein_coding
+chr10:96616623-96617093 chr10 96616855 96616855 - 4.72295 NA NA NA NA NA
+chr2:73998449-73999023 chr2 73998749 73998749 - 4.45417 NA Intergenic Intergenic -669561 ENSMUST00000001872 ENSMUSG00000001819 ENSMUST00000001872 Hoxd13 protein_coding
+chr2:74066544-74067106 chr2 74066831 74066831 + 4.26306 NA Intergenic Intergenic -601479 ENSMUST00000001872 ENSMUSG00000001819 ENSMUST00000001872 Hoxd13 protein_coding
+chr2:75658517-75658909 chr2 75658765 75658765 + 0.26272 NA Intergenic Intergenic -942769 ENSMUST00000145799 ENSMUSG00000052371 ENSMUST00000145799 Hoxd3os1 antisense
+chr10:97768293-97768873 chr10 97768454 97768454 + -1.29559 NA NA NA NA NA
diff -r 000000000000 -r db456c398880 test-data/all_fasta.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/all_fasta.loc Mon Dec 13 15:13:33 2021 +0000
@@ -0,0 +1,7 @@
+# Tab separated with three columns:
+# - value (Galaxy records this in the Galaxy DB)
+# - dbkey (Database key)
+# - name (Galaxy shows this in the UI)
+# - path (the reference file)
+#
+phiX174 phiX174 phiX174 ${__HERE__}/phiX174.fasta
diff -r 000000000000 -r db456c398880 test-data/annotations_default.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotations_default.txt Mon Dec 13 15:13:33 2021 +0000
@@ -0,0 +1,115 @@
+Intergenic chr2 1 74667309 + N 1900000000
+promoter-TSS (ENSMUST00000001872) chr2 74667310 74668410 + P 1
+exon (ENSMUST00000001872, exon 1 of 2) chr2 74668411 74669078 + E 59
+intron (ENSMUST00000001872, intron 1 of 1) chr2 74669079 74669885 + I 158
+exon (ENSMUST00000001872, exon 2 of 2) chr2 74669886 74670136 + E 61
+3' UTR (ENSMUST00000001872, exon 2 of 2) chr2 74670137 74671498 + 3UTR 60
+TTS (ENSMUST00000001872) chr2 74671499 74672599 + TTS 30
+Intergenic--2 chr2 74672600 74674012 + N 1900000000
+promoter-TSS (ENSMUST00000001878) chr2 74674013 74675113 + P 2
+exon (ENSMUST00000001878, exon 1 of 2) chr2 74675114 74675654 + E 63
+intron (ENSMUST00000001878, intron 1 of 1) chr2 74675655 74675813 + I 158
+exon (ENSMUST00000001878, exon 2 of 2) chr2 74675814 74676052 + E 65
+3' UTR (ENSMUST00000001878, exon 2 of 2) chr2 74676053 74677604 + 3UTR 64
+TTS (ENSMUST00000001878) chr2 74677605 74678556 + TTS 31
+promoter-TSS (ENSMUST00000048086) chr2 74678557 74679657 + P 6
+exon (ENSMUST00000048086, exon 1 of 2) chr2 74679658 74679809 + E 83
+intron (ENSMUST00000048086, intron 1 of 1) chr2 74679810 74681322 + I 158
+promoter-TSS (ENSMUST00000142312) chr2 74681323 74682423 + P 20
+exon (ENSMUST00000142312, exon 1 of 2) chr2 74682424 74682445 + E 131
+promoter-TSS (ENSMUST00000136302) chr2 74682446 74683546 + P 17
+intron (ENSMUST00000136302, intron 1 of 2) chr2 74683547 74683902 + I 158
+exon (ENSMUST00000048086, exon 2 of 2) chr2 74683903 74684067 + E 84
+TTS (ENSMUST00000142312) chr2 74684068 74685168 + TTS 49
+intron (ENSMUST00000136302, intron 2 of 2) chr2 74684357 74685168 + I 158
+exon (ENSMUST00000048086, exon 2 of 2)--2 chr2 74685169 74686915 + E 84
+TTS (ENSMUST00000048086) chr2 74686916 74688016 + TTS 35
+intron (ENSMUST00000136302, intron 2 of 2)--2 chr2 74688017 74690923 + I 158
+promoter-TSS (ENSMUST00000061745) chr2 74690924 74692024 + P 9
+exon (ENSMUST00000061745, exon 1 of 2) chr2 74692025 74692724 + E 96
+intron (ENSMUST00000061745, intron 1 of 1) chr2 74692725 74694090 + I 158
+exon (ENSMUST00000061745, exon 2 of 2) chr2 74694091 74694093 + E 98
+TTS (ENSMUST00000136302) chr2 74694094 74694519 + TTS 46
+promoter-TSS (ENSMUST00000126966) chr2 74694520 74695620 + P 15
+promoter-TSS (ENSMUST00000152027) chr2 74695621 74695701 + P 25
+TTS (ENSMUST00000061745) chr2 74695702 74696105 + TTS 38
+TTS (ENSMUST00000190845) chr2 74696106 74696726 - TTS 57
+promoter-TSS (ENSMUST00000059272) chr2 74696727 74697827 + P 8
+promoter-TSS (ENSMUST00000190845) chr2 74697828 74698586 - P 28
+exon (ENSMUST00000059272, exon 1 of 2) chr2 74698587 74698833 + E 92
+intron (ENSMUST00000059272, intron 1 of 1) chr2 74698834 74699179 + I 158
+exon (ENSMUST00000059272, exon 2 of 2) chr2 74699180 74699421 + E 94
+3' UTR (ENSMUST00000059272, exon 2 of 2) chr2 74699422 74700005 + 3UTR 93
+TTS (ENSMUST00000198895) chr2 74700006 74700107 - TTS 58
+TTS (ENSMUST00000059272) chr2 74700108 74701041 + TTS 37
+promoter-TSS (ENSMUST00000198895) chr2 74701042 74702142 - P 29
+Intergenic--3 chr2 74702143 74702247 + N 1900000000
+TTS (ENSMUST00000132326) chr2 74702248 74703348 - TTS 45
+exon (ENSMUST00000132326, exon 2 of 2) chr2 74703349 74703614 - E 120
+promoter-TSS (ENSMUST00000151380) chr2 74703615 74703904 + P 24
+promoter-TSS (ENSMUST00000019749) chr2 74703905 74705005 + P 3
+promoter-TSS (ENSMUST00000074721) chr2 74705006 74705252 + P 10
+5' UTR (ENSMUST00000019749, exon 1 of 2) chr2 74705253 74705547 + 5UTR 66
+exon (ENSMUST00000019749, exon 1 of 2) chr2 74705548 74706118 + E 67
+intron (ENSMUST00000074721, intron 1 of 1) chr2 74706119 74706513 + I 158
+exon (ENSMUST00000019749, exon 2 of 2) chr2 74706514 74706711 + E 69
+TTS (ENSMUST00000151380) chr2 74706712 74707206 + TTS 53
+TTS (ENSMUST00000074721) chr2 74707207 74707832 + TTS 39
+TTS (ENSMUST00000019749) chr2 74707833 74708933 + TTS 32
+intron (ENSMUST00000132326, intron 1 of 1) chr2 74708934 74709042 - I 158
+TTS (ENSMUST00000156342) chr2 74709043 74709192 - TTS 55
+TTS (ENSMUST00000145799) chr2 74709193 74710293 - TTS 52
+intron (ENSMUST00000145799, intron 2 of 2) chr2 74710294 74710926 - I 158
+promoter-TSS (ENSMUST00000111983) chr2 74710927 74712027 + P 14
+promoter-TSS (ENSMUST00000132326) chr2 74712028 74713054 - P 16
+promoter-TSS (ENSMUST00000156342) chr2 74713055 74713526 - P 26
+exon (ENSMUST00000139005, exon 2 of 2) chr2 74713527 74714401 - E 125
+intron (ENSMUST00000145799, intron 1 of 2) chr2 74714402 74715705 - I 158
+exon (ENSMUST00000139005, exon 1 of 2) chr2 74715706 74715895 - E 126
+promoter-TSS (ENSMUST00000145799) chr2 74715896 74716029 - P 23
+promoter-TSS (ENSMUST00000139005) chr2 74716030 74717130 - P 18
+intron (ENSMUST00000111983, intron 1 of 2) chr2 74717131 74720977 + I 158
+promoter-TSS (ENSMUST00000047904) chr2 74720978 74722078 + P 5
+5' UTR (ENSMUST00000047904, exon 1 of 6) chr2 74722079 74722177 + 5UTR 75
+intron (ENSMUST00000047904, intron 1 of 5) chr2 74722178 74722552 + I 158
+5' UTR (ENSMUST00000047904, exon 2 of 6) chr2 74722553 74722631 + 5UTR 76
+intron (ENSMUST00000144040, intron 2 of 6) chr2 74722632 74723331 + I 158
+5' UTR (ENSMUST00000047904, exon 3 of 6) chr2 74723332 74723367 + 5UTR 77
+intron (ENSMUST00000144040, intron 3 of 6) chr2 74723368 74725069 + I 158
+promoter-TSS (ENSMUST00000083566) chr2 74725070 74725727 + P 11
+promoter-TSS (ENSMUST00000053932) chr2 74725728 74726828 + P 7
+promoter-TSS (ENSMUST00000111980) chr2 74726829 74727174 + P 12
+5' UTR (ENSMUST00000047904, exon 5 of 6) chr2 74727175 74727278 + 5UTR 79
+exon (ENSMUST00000047904, exon 5 of 6) chr2 74727279 74727705 + E 80
+intron (ENSMUST00000053932, intron 1 of 3) chr2 74727706 74728261 + I 158
+exon (ENSMUST00000047904, exon 6 of 6) chr2 74728262 74728587 + E 82
+3' UTR (ENSMUST00000047904, exon 6 of 6) chr2 74728588 74728767 + 3UTR 81
+TTS (ENSMUST00000144040) chr2 74728768 74729022 + TTS 50
+TTS (ENSMUST00000111980) chr2 74729023 74729059 + TTS 41
+TTS (ENSMUST00000047904) chr2 74729060 74730160 + TTS 34
+intron (ENSMUST00000053932, intron 2 of 3) chr2 74730161 74731912 + I 158
+promoter-TSS (ENSMUST00000144544) chr2 74731913 74733013 + P 22
+5' UTR (ENSMUST00000144544, exon 1 of 2) chr2 74733014 74733040 + 5UTR 141
+intron (ENSMUST00000144544, intron 1 of 1) chr2 74733041 74735514 + I 158
+promoter-TSS (ENSMUST00000111982) chr2 74735515 74736615 + P 13
+5' UTR (ENSMUST00000111982, exon 1 of 3) chr2 74736616 74736705 + 5UTR 108
+intron (ENSMUST00000111982, intron 1 of 2) chr2 74736706 74738618 + I 158
+promoter-TSS (ENSMUST00000047830) chr2 74738619 74739719 + P 4
+intron (ENSMUST00000047830, intron 1 of 2) chr2 74739720 74743927 + I 158
+5' UTR (ENSMUST00000047830, exon 2 of 3) chr2 74743928 74744011 + 5UTR 71
+exon (ENSMUST00000047830, exon 2 of 3) chr2 74744012 74744113 + E 72
+TTS (ENSMUST00000144544) chr2 74744114 74744673 + TTS 51
+promoter-TSS (ENSMUST00000140666) chr2 74744674 74745774 + P 19
+promoter-TSS (ENSMUST00000190553) chr2 74745775 74745822 + P 27
+5' UTR (ENSMUST00000140666, exon 1 of 2) chr2 74745823 74746156 + 5UTR 127
+exon (ENSMUST00000140666, exon 1 of 2) chr2 74746157 74746220 + E 128
+exon (ENSMUST00000190553, exon 1 of 1) chr2 74746221 74746321 + E 155
+exon (ENSMUST00000047830, exon 3 of 3) chr2 74746322 74746495 + E 74
+TTS (ENSMUST00000140666) chr2 74746496 74747596 + TTS 48
+3' UTR (ENSMUST00000111983, exon 3 of 3) chr2 74747080 74747799 + 3UTR 116
+3' UTR (ENSMUST00000047830, exon 3 of 3) chr2 74747597 74747799 + 3UTR 73
+TTS (ENSMUST00000047830) chr2 74747800 74748900 + TTS 33
+TTS (ENSMUST00000053932) chr2 74748901 74748918 + TTS 36
+TTS (ENSMUST00000111983) chr2 74748919 74749271 + TTS 43
+TTS (ENSMUST00000190553) chr2 74749272 74749442 + TTS 56
+Intergenic--4 chr2 74749443 1500000000 + N 1900000000
diff -r 000000000000 -r db456c398880 test-data/annotations_default_simplified.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotations_default_simplified.txt Mon Dec 13 15:13:33 2021 +0000
@@ -0,0 +1,87 @@
+Intergenic chr2 1 74667309 + N 1900000000
+promoter-TSS (ENSMUST00000001872) chr2 74667310 74668410 + P 1
+exon (ENSMUST00000001872, exon 1 of 2) chr2 74668411 74669078 + E 33
+intron (ENSMUST00000001872, intron 1 of 1) chr2 74669079 74669885 + I 88
+exon (ENSMUST00000001872, exon 2 of 2) chr2 74669886 74670136 + E 35
+3' UTR (ENSMUST00000001872, exon 2 of 2) chr2 74670137 74671498 + 3UTR 34
+TTS (ENSMUST00000001872) chr2 74671499 74672599 + TTS 17
+Intergenic--2 chr2 74672600 74674012 + N 1900000000
+promoter-TSS (ENSMUST00000001878) chr2 74674013 74675113 + P 2
+exon (ENSMUST00000001878, exon 1 of 2) chr2 74675114 74675654 + E 37
+intron (ENSMUST00000001878, intron 1 of 1) chr2 74675655 74675813 + I 88
+exon (ENSMUST00000001878, exon 2 of 2) chr2 74675814 74676052 + E 39
+3' UTR (ENSMUST00000001878, exon 2 of 2) chr2 74676053 74677604 + 3UTR 38
+TTS (ENSMUST00000001878) chr2 74677605 74678556 + TTS 18
+promoter-TSS (ENSMUST00000048086) chr2 74678557 74679657 + P 4
+exon (ENSMUST00000048086, exon 1 of 2) chr2 74679658 74679809 + E 44
+intron (ENSMUST00000048086, intron 1 of 1) chr2 74679810 74681322 + I 88
+promoter-TSS (ENSMUST00000142312) chr2 74681323 74682423 + P 11
+exon (ENSMUST00000142312, exon 1 of 2) chr2 74682424 74682445 + E 69
+promoter-TSS (ENSMUST00000136302) chr2 74682446 74683546 + P 10
+intron (ENSMUST00000136302, intron 1 of 2) chr2 74683547 74683902 + I 88
+exon (ENSMUST00000048086, exon 2 of 2) chr2 74683903 74684067 + E 45
+TTS (ENSMUST00000142312) chr2 74684068 74685168 + TTS 27
+intron (ENSMUST00000136302, intron 2 of 2) chr2 74684357 74685168 + I 88
+exon (ENSMUST00000048086, exon 2 of 2)--2 chr2 74685169 74686915 + E 45
+TTS (ENSMUST00000048086) chr2 74686916 74688016 + TTS 20
+intron (ENSMUST00000136302, intron 2 of 2)--2 chr2 74688017 74690923 + I 88
+promoter-TSS (ENSMUST00000061745) chr2 74690924 74692024 + P 6
+exon (ENSMUST00000061745, exon 1 of 2) chr2 74692025 74692724 + E 51
+intron (ENSMUST00000061745, intron 1 of 1) chr2 74692725 74694090 + I 88
+exon (ENSMUST00000061745, exon 2 of 2) chr2 74694091 74694093 + E 53
+TTS (ENSMUST00000136302) chr2 74694094 74694519 + TTS 26
+promoter-TSS (ENSMUST00000126966) chr2 74694520 74695620 + P 9
+TTS (ENSMUST00000061745) chr2 74695621 74696105 + TTS 22
+TTS (ENSMUST00000190845) chr2 74696106 74696726 - TTS 31
+promoter-TSS (ENSMUST00000059272) chr2 74696727 74697827 + P 5
+promoter-TSS (ENSMUST00000190845) chr2 74697828 74698586 - P 15
+exon (ENSMUST00000059272, exon 1 of 2) chr2 74698587 74698833 + E 47
+intron (ENSMUST00000059272, intron 1 of 1) chr2 74698834 74699179 + I 88
+exon (ENSMUST00000059272, exon 2 of 2) chr2 74699180 74699421 + E 49
+3' UTR (ENSMUST00000059272, exon 2 of 2) chr2 74699422 74700005 + 3UTR 48
+TTS (ENSMUST00000198895) chr2 74700006 74700107 - TTS 32
+TTS (ENSMUST00000059272) chr2 74700108 74701041 + TTS 21
+promoter-TSS (ENSMUST00000198895) chr2 74701042 74702142 - P 16
+Intergenic--3 chr2 74702143 74703614 + N 1900000000
+promoter-TSS (ENSMUST00000151380) chr2 74703615 74703904 + P 14
+promoter-TSS (ENSMUST00000019749) chr2 74703905 74705005 + P 3
+promoter-TSS (ENSMUST00000074721) chr2 74705006 74705252 + P 7
+5' UTR (ENSMUST00000019749, exon 1 of 2) chr2 74705253 74705547 + 5UTR 40
+exon (ENSMUST00000019749, exon 1 of 2) chr2 74705548 74706118 + E 41
+intron (ENSMUST00000074721, intron 1 of 1) chr2 74706119 74706513 + I 88
+exon (ENSMUST00000019749, exon 2 of 2) chr2 74706514 74706711 + E 43
+TTS (ENSMUST00000151380) chr2 74706712 74707206 + TTS 30
+TTS (ENSMUST00000074721) chr2 74707207 74707832 + TTS 23
+TTS (ENSMUST00000019749) chr2 74707833 74708933 + TTS 19
+Intergenic--4 chr2 74708934 74709192 + N 1900000000
+TTS (ENSMUST00000145799) chr2 74709193 74710293 - TTS 29
+intron (ENSMUST00000145799, intron 2 of 2) chr2 74710294 74710926 - I 88
+promoter-TSS (ENSMUST00000111983) chr2 74710927 74712027 + P 8
+promoter-TSS (ENSMUST00000144040) chr2 74712028 74712029 + P 12
+5' UTR (ENSMUST00000111983, exon 1 of 3) chr2 74712030 74712316 + 5UTR 58
+intron (ENSMUST00000111983, intron 1 of 2) chr2 74712317 74714270 + I 88
+exon (ENSMUST00000145799, exon 2 of 3) chr2 74714271 74714401 - E 80
+intron (ENSMUST00000145799, intron 1 of 2) chr2 74714402 74715705 - I 88
+exon (ENSMUST00000145799, exon 1 of 3) chr2 74715706 74715895 - E 81
+promoter-TSS (ENSMUST00000145799) chr2 74715896 74716996 - P 13
+intron (ENSMUST00000111983, intron 1 of 2)--2 chr2 74716997 74722552 + I 88
+exon (ENSMUST00000144040, exon 2 of 7) chr2 74722553 74722631 + E 73
+intron (ENSMUST00000144040, intron 2 of 6) chr2 74722632 74723331 + I 88
+exon (ENSMUST00000144040, exon 3 of 7) chr2 74723332 74723367 + E 74
+intron (ENSMUST00000144040, intron 3 of 6) chr2 74723368 74725462 + I 88
+exon (ENSMUST00000144040, exon 4 of 7) chr2 74725463 74725597 + E 75
+intron (ENSMUST00000144040, intron 4 of 6) chr2 74725598 74725844 + I 88
+exon (ENSMUST00000144040, exon 5 of 7) chr2 74725845 74725978 + E 76
+intron (ENSMUST00000144040, intron 5 of 6) chr2 74725979 74726875 + I 88
+exon (ENSMUST00000144040, exon 6 of 7) chr2 74726876 74726932 + E 77
+intron (ENSMUST00000144040, intron 6 of 6) chr2 74726933 74728358 + I 88
+exon (ENSMUST00000144040, exon 7 of 7) chr2 74728359 74728767 + E 78
+TTS (ENSMUST00000144040) chr2 74728768 74729868 + TTS 28
+intron (ENSMUST00000111983, intron 1 of 2)--3 chr2 74729869 74743927 + I 88
+5' UTR (ENSMUST00000111983, exon 2 of 3) chr2 74743928 74744011 + 5UTR 59
+exon (ENSMUST00000111983, exon 2 of 3) chr2 74744012 74744555 + E 60
+intron (ENSMUST00000111983, intron 2 of 2) chr2 74744556 74746321 + I 88
+exon (ENSMUST00000111983, exon 3 of 3) chr2 74746322 74747079 + E 62
+3' UTR (ENSMUST00000111983, exon 3 of 3) chr2 74747080 74748170 + 3UTR 61
+TTS (ENSMUST00000111983) chr2 74748171 74749271 + TTS 24
+Intergenic--5 chr2 74749272 1500000000 + N 1900000000
diff -r 000000000000 -r db456c398880 test-data/annotations_default_stats.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotations_default_stats.txt Mon Dec 13 15:13:33 2021 +0000
@@ -0,0 +1,8 @@
+Annotation Number of peaks Total size (bp) Log2 Ratio (obs/exp) LogP enrichment (+values depleted)
+3UTR 5.0 4601 8.659 -26.809
+TTS 47.0 16013 10.092 -290.509
+Exon 32.0 8271 10.491 -205.971
+Intron 24.0 28234 8.304 -118.153
+Intergenic 0.0 74668827 -17.479 1369.495
+Promoter 81.0 23880 10.301 -519.342
+5UTR 12.0 1148 11.925 -89.692
diff -r 000000000000 -r db456c398880 test-data/annotations_exon_start.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotations_exon_start.txt Mon Dec 13 15:13:33 2021 +0000
@@ -0,0 +1,110 @@
+Intergenic chr2 1 74667309 + N 1900000000
+promoter-TSS (ENSMUST00000001872) chr2 74667310 74668410 + P 1
+exon (ENSMUST00000001872, exon 1 of 2) chr2 74668411 74669078 + E 59
+intron (ENSMUST00000001872, intron 1 of 1) chr2 74669079 74669885 + I 145
+exon (ENSMUST00000001872, exon 2 of 2) chr2 74669886 74671498 + E 60
+TTS (ENSMUST00000001872) chr2 74671499 74672599 + TTS 30
+Intergenic--2 chr2 74672600 74674012 + N 1900000000
+promoter-TSS (ENSMUST00000001878) chr2 74674013 74675113 + P 2
+exon (ENSMUST00000001878, exon 1 of 2) chr2 74675114 74675654 + E 62
+intron (ENSMUST00000001878, intron 1 of 1) chr2 74675655 74675813 + I 145
+exon (ENSMUST00000001878, exon 2 of 2) chr2 74675814 74677604 + E 63
+TTS (ENSMUST00000001878) chr2 74677605 74678556 + TTS 31
+promoter-TSS (ENSMUST00000048086) chr2 74678557 74679657 + P 6
+exon (ENSMUST00000048086, exon 1 of 2) chr2 74679658 74679809 + E 78
+intron (ENSMUST00000048086, intron 1 of 1) chr2 74679810 74681322 + I 145
+promoter-TSS (ENSMUST00000142312) chr2 74681323 74682423 + P 20
+exon (ENSMUST00000142312, exon 1 of 2) chr2 74682424 74682445 + E 119
+promoter-TSS (ENSMUST00000136302) chr2 74682446 74683546 + P 17
+intron (ENSMUST00000136302, intron 1 of 2) chr2 74683547 74683902 + I 145
+exon (ENSMUST00000048086, exon 2 of 2) chr2 74683903 74684067 + E 79
+TTS (ENSMUST00000142312) chr2 74684068 74685168 + TTS 49
+intron (ENSMUST00000136302, intron 2 of 2) chr2 74684357 74685168 + I 145
+exon (ENSMUST00000048086, exon 2 of 2)--2 chr2 74685169 74686915 + E 79
+TTS (ENSMUST00000048086) chr2 74686916 74688016 + TTS 35
+intron (ENSMUST00000136302, intron 2 of 2)--2 chr2 74688017 74690923 + I 145
+promoter-TSS (ENSMUST00000061745) chr2 74690924 74692024 + P 9
+exon (ENSMUST00000061745, exon 1 of 2) chr2 74692025 74692724 + E 89
+intron (ENSMUST00000061745, intron 1 of 1) chr2 74692725 74694090 + I 145
+exon (ENSMUST00000061745, exon 2 of 2) chr2 74694091 74694093 + E 90
+TTS (ENSMUST00000136302) chr2 74694094 74694519 + TTS 46
+promoter-TSS (ENSMUST00000126966) chr2 74694520 74695620 + P 15
+promoter-TSS (ENSMUST00000152027) chr2 74695621 74695701 + P 25
+TTS (ENSMUST00000061745) chr2 74695702 74696105 + TTS 38
+TTS (ENSMUST00000190845) chr2 74696106 74696726 - TTS 57
+promoter-TSS (ENSMUST00000059272) chr2 74696727 74697827 + P 8
+promoter-TSS (ENSMUST00000190845) chr2 74697828 74698586 - P 28
+exon (ENSMUST00000059272, exon 1 of 2) chr2 74698587 74698833 + E 86
+intron (ENSMUST00000059272, intron 1 of 1) chr2 74698834 74699179 + I 145
+exon (ENSMUST00000059272, exon 2 of 2) chr2 74699180 74700005 + E 87
+TTS (ENSMUST00000198895) chr2 74700006 74700107 - TTS 58
+TTS (ENSMUST00000059272) chr2 74700108 74701041 + TTS 37
+promoter-TSS (ENSMUST00000198895) chr2 74701042 74702142 - P 29
+Intergenic--3 chr2 74702143 74702247 + N 1900000000
+TTS (ENSMUST00000132326) chr2 74702248 74703348 - TTS 45
+exon (ENSMUST00000132326, exon 2 of 2) chr2 74703349 74703614 - E 108
+promoter-TSS (ENSMUST00000151380) chr2 74703615 74703904 + P 24
+promoter-TSS (ENSMUST00000019749) chr2 74703905 74705005 + P 3
+promoter-TSS (ENSMUST00000074721) chr2 74705006 74705252 + P 10
+5' UTR (ENSMUST00000019749, exon 1 of 2) chr2 74705253 74705547 + 5UTR 64
+exon (ENSMUST00000019749, exon 1 of 2) chr2 74705548 74706118 + E 65
+intron (ENSMUST00000074721, intron 1 of 1) chr2 74706119 74706513 + I 145
+exon (ENSMUST00000019749, exon 2 of 2) chr2 74706514 74706711 + E 66
+TTS (ENSMUST00000151380) chr2 74706712 74707206 + TTS 53
+TTS (ENSMUST00000074721) chr2 74707207 74707832 + TTS 39
+TTS (ENSMUST00000019749) chr2 74707833 74708933 + TTS 32
+intron (ENSMUST00000132326, intron 1 of 1) chr2 74708934 74709042 - I 145
+TTS (ENSMUST00000156342) chr2 74709043 74709192 - TTS 55
+TTS (ENSMUST00000145799) chr2 74709193 74710293 - TTS 52
+intron (ENSMUST00000145799, intron 2 of 2) chr2 74710294 74710926 - I 145
+promoter-TSS (ENSMUST00000111983) chr2 74710927 74712027 + P 14
+promoter-TSS (ENSMUST00000132326) chr2 74712028 74713054 - P 16
+promoter-TSS (ENSMUST00000156342) chr2 74713055 74713526 - P 26
+exon (ENSMUST00000139005, exon 2 of 2) chr2 74713527 74714401 - E 113
+intron (ENSMUST00000145799, intron 1 of 2) chr2 74714402 74715705 - I 145
+exon (ENSMUST00000139005, exon 1 of 2) chr2 74715706 74715895 - E 114
+promoter-TSS (ENSMUST00000145799) chr2 74715896 74716029 - P 23
+promoter-TSS (ENSMUST00000139005) chr2 74716030 74717130 - P 18
+intron (ENSMUST00000111983, intron 1 of 2) chr2 74717131 74720977 + I 145
+promoter-TSS (ENSMUST00000047904) chr2 74720978 74722078 + P 5
+5' UTR (ENSMUST00000047904, exon 1 of 6) chr2 74722079 74722177 + 5UTR 71
+intron (ENSMUST00000047904, intron 1 of 5) chr2 74722178 74722552 + I 145
+5' UTR (ENSMUST00000047904, exon 2 of 6) chr2 74722553 74722631 + 5UTR 72
+intron (ENSMUST00000144040, intron 2 of 6) chr2 74722632 74723331 + I 145
+5' UTR (ENSMUST00000047904, exon 3 of 6) chr2 74723332 74723367 + 5UTR 73
+intron (ENSMUST00000144040, intron 3 of 6) chr2 74723368 74725069 + I 145
+promoter-TSS (ENSMUST00000083566) chr2 74725070 74725727 + P 11
+promoter-TSS (ENSMUST00000053932) chr2 74725728 74726828 + P 7
+promoter-TSS (ENSMUST00000111980) chr2 74726829 74727174 + P 12
+5' UTR (ENSMUST00000047904, exon 5 of 6) chr2 74727175 74727278 + 5UTR 75
+exon (ENSMUST00000047904, exon 5 of 6) chr2 74727279 74727705 + E 76
+intron (ENSMUST00000053932, intron 1 of 3) chr2 74727706 74728261 + I 145
+exon (ENSMUST00000047904, exon 6 of 6) chr2 74728262 74728767 + E 77
+TTS (ENSMUST00000144040) chr2 74728768 74729022 + TTS 50
+TTS (ENSMUST00000111980) chr2 74729023 74729059 + TTS 41
+TTS (ENSMUST00000047904) chr2 74729060 74730160 + TTS 34
+intron (ENSMUST00000053932, intron 2 of 3) chr2 74730161 74731912 + I 145
+promoter-TSS (ENSMUST00000144544) chr2 74731913 74733013 + P 22
+5' UTR (ENSMUST00000144544, exon 1 of 2) chr2 74733014 74733040 + 5UTR 128
+intron (ENSMUST00000144544, intron 1 of 1) chr2 74733041 74735514 + I 145
+promoter-TSS (ENSMUST00000111982) chr2 74735515 74736615 + P 13
+5' UTR (ENSMUST00000111982, exon 1 of 3) chr2 74736616 74736705 + 5UTR 98
+intron (ENSMUST00000111982, intron 1 of 2) chr2 74736706 74738618 + I 145
+promoter-TSS (ENSMUST00000047830) chr2 74738619 74739719 + P 4
+intron (ENSMUST00000047830, intron 1 of 2) chr2 74739720 74743927 + I 145
+5' UTR (ENSMUST00000047830, exon 2 of 3) chr2 74743928 74744011 + 5UTR 68
+exon (ENSMUST00000047830, exon 2 of 3) chr2 74744012 74744113 + E 69
+TTS (ENSMUST00000144544) chr2 74744114 74744673 + TTS 51
+promoter-TSS (ENSMUST00000140666) chr2 74744674 74745774 + P 19
+promoter-TSS (ENSMUST00000190553) chr2 74745775 74745822 + P 27
+5' UTR (ENSMUST00000140666, exon 1 of 2) chr2 74745823 74746156 + 5UTR 115
+exon (ENSMUST00000140666, exon 1 of 2) chr2 74746157 74746220 + E 116
+exon (ENSMUST00000190553, exon 1 of 1) chr2 74746221 74746321 + E 142
+exon (ENSMUST00000047830, exon 3 of 3) chr2 74746322 74746495 + E 70
+TTS (ENSMUST00000140666) chr2 74746496 74747596 + TTS 48
+exon (ENSMUST00000047830, exon 3 of 3)--2 chr2 74747597 74747799 + E 70
+TTS (ENSMUST00000047830) chr2 74747800 74748900 + TTS 33
+TTS (ENSMUST00000053932) chr2 74748901 74748918 + TTS 36
+TTS (ENSMUST00000111983) chr2 74748919 74749271 + TTS 43
+TTS (ENSMUST00000190553) chr2 74749272 74749442 + TTS 56
+Intergenic--4 chr2 74749443 1500000000 + N 1900000000
diff -r 000000000000 -r db456c398880 test-data/annotations_exon_start_stats.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotations_exon_start_stats.txt Mon Dec 13 15:13:33 2021 +0000
@@ -0,0 +1,7 @@
+Annotation Number of peaks Total size (bp) Log2 Ratio (obs/exp) LogP enrichment (+values depleted)
+TTS 41.0 16013 9.992 -250.473
+Exon 30.0 12152 9.939 -181.768
+Intron 24.0 28234 8.401 -119.856
+Intergenic 0.0 74668827 -17.395 1282.574
+Promoter 81.0 23880 10.397 -526.320
+5UTR 12.0 1148 12.021 -90.517
diff -r 000000000000 -r db456c398880 test-data/annotations_small_TSSTTS.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotations_small_TSSTTS.txt Mon Dec 13 15:13:33 2021 +0000
@@ -0,0 +1,164 @@
+Intergenic chr2 1 74668259 + N 1900000000
+promoter-TSS (ENSMUST00000001872) chr2 74668260 74668360 + P 1
+exon (ENSMUST00000001872, exon 1 of 2) chr2 74668361 74669078 + E 59
+intron (ENSMUST00000001872, intron 1 of 1) chr2 74669079 74669885 + I 158
+exon (ENSMUST00000001872, exon 2 of 2) chr2 74669886 74670136 + E 61
+3' UTR (ENSMUST00000001872, exon 2 of 2) chr2 74670137 74671548 + 3UTR 60
+TTS (ENSMUST00000001872) chr2 74671549 74671649 + TTS 30
+Intergenic--2 chr2 74671650 74674962 + N 1900000000
+promoter-TSS (ENSMUST00000001878) chr2 74674963 74675063 + P 2
+5' UTR (ENSMUST00000001878, exon 1 of 2) chr2 74675064 74675086 + 5UTR 62
+exon (ENSMUST00000001878, exon 1 of 2) chr2 74675087 74675654 + E 63
+intron (ENSMUST00000001878, intron 1 of 1) chr2 74675655 74675813 + I 158
+exon (ENSMUST00000001878, exon 2 of 2) chr2 74675814 74676052 + E 65
+3' UTR (ENSMUST00000001878, exon 2 of 2) chr2 74676053 74677654 + 3UTR 64
+TTS (ENSMUST00000001878) chr2 74677655 74677755 + TTS 31
+Intergenic--3 chr2 74677756 74679506 + N 1900000000
+promoter-TSS (ENSMUST00000048086) chr2 74679507 74679607 + P 6
+exon (ENSMUST00000048086, exon 1 of 2) chr2 74679608 74679809 + E 83
+intron (ENSMUST00000048086, intron 1 of 1) chr2 74679810 74682272 + I 158
+promoter-TSS (ENSMUST00000142312) chr2 74682273 74682373 + P 20
+5' UTR (ENSMUST00000142312, exon 1 of 2) chr2 74682374 74682392 + 5UTR 130
+exon (ENSMUST00000142312, exon 1 of 2) chr2 74682393 74683167 + E 131
+intron (ENSMUST00000142312, intron 1 of 1) chr2 74683168 74683395 + I 158
+promoter-TSS (ENSMUST00000136302) chr2 74683396 74683496 + P 17
+exon (ENSMUST00000136302, exon 1 of 3) chr2 74683497 74683509 + E 122
+intron (ENSMUST00000136302, intron 1 of 2) chr2 74683510 74683902 + I 158
+exon (ENSMUST00000048086, exon 2 of 2) chr2 74683903 74684117 + E 84
+TTS (ENSMUST00000142312) chr2 74684118 74684218 + TTS 49
+exon (ENSMUST00000048086, exon 2 of 2)--2 chr2 74684219 74686965 + E 84
+TTS (ENSMUST00000048086) chr2 74686966 74687066 + TTS 35
+intron (ENSMUST00000136302, intron 2 of 2) chr2 74687067 74691873 + I 158
+promoter-TSS (ENSMUST00000061745) chr2 74691874 74691974 + P 9
+5' UTR (ENSMUST00000061745, exon 1 of 2) chr2 74691975 74691979 + 5UTR 95
+exon (ENSMUST00000061745, exon 1 of 2) chr2 74691980 74692724 + E 96
+intron (ENSMUST00000061745, intron 1 of 1) chr2 74692725 74694090 + I 158
+exon (ENSMUST00000061745, exon 2 of 2) chr2 74694091 74694143 + E 98
+TTS (ENSMUST00000136302) chr2 74694144 74694244 + TTS 46
+exon (ENSMUST00000061745, exon 2 of 2)--2 chr2 74694245 74694368 + E 98
+3' UTR (ENSMUST00000061745, exon 2 of 2) chr2 74694369 74695054 + 3UTR 97
+TTS (ENSMUST00000061745) chr2 74695055 74695155 + TTS 38
+Intergenic--4 chr2 74695156 74695469 + N 1900000000
+promoter-TSS (ENSMUST00000126966) chr2 74695470 74695570 + P 15
+promoter-TSS (ENSMUST00000152027) chr2 74695571 74695651 + P 25
+exon (ENSMUST00000126966, exon 1 of 2) chr2 74695652 74695732 + E 118
+intron (ENSMUST00000126966, intron 1 of 1) chr2 74695733 74696717 + I 158
+TTS (ENSMUST00000190845) chr2 74696718 74696818 - TTS 57
+exon (ENSMUST00000190845, exon 2 of 2) chr2 74696819 74696841 - E 156
+intron (ENSMUST00000190845, intron 1 of 1) chr2 74696842 74697030 - I 158
+exon (ENSMUST00000126966, exon 2 of 2) chr2 74697031 74697314 + E 119
+TTS (ENSMUST00000126966) chr2 74697315 74697415 + TTS 44
+exon (ENSMUST00000190845, exon 1 of 2) chr2 74697416 74697535 - E 157
+promoter-TSS (ENSMUST00000190845) chr2 74697536 74697636 - P 28
+Intergenic--5 chr2 74697637 74697676 + N 1900000000
+promoter-TSS (ENSMUST00000059272) chr2 74697677 74697777 + P 8
+5' UTR (ENSMUST00000059272, exon 1 of 2) chr2 74697778 74698055 + 5UTR 91
+exon (ENSMUST00000059272, exon 1 of 2) chr2 74698056 74698833 + E 92
+intron (ENSMUST00000059272, intron 1 of 1) chr2 74698834 74699179 + I 158
+exon (ENSMUST00000059272, exon 2 of 2) chr2 74699180 74699421 + E 94
+3' UTR (ENSMUST00000059272, exon 2 of 2) chr2 74699422 74700157 + 3UTR 93
+TTS (ENSMUST00000059272) chr2 74700158 74700258 + TTS 37
+Intergenic--6 chr2 74700259 74700955 + N 1900000000
+TTS (ENSMUST00000198895) chr2 74700956 74701056 - TTS 58
+exon (ENSMUST00000198895, exon 1 of 1) chr2 74701057 74701091 - E 158
+promoter-TSS (ENSMUST00000198895) chr2 74701092 74701192 - P 29
+Intergenic--7 chr2 74701193 74703197 + N 1900000000
+TTS (ENSMUST00000132326) chr2 74703198 74703298 - TTS 45
+exon (ENSMUST00000132326, exon 2 of 2) chr2 74703299 74703622 - E 120
+intron (ENSMUST00000132326, intron 1 of 1) chr2 74703623 74704564 - I 158
+promoter-TSS (ENSMUST00000151380) chr2 74704565 74704665 + P 24
+5' UTR (ENSMUST00000151380, exon 1 of 3) chr2 74704666 74704724 + 5UTR 147
+intron (ENSMUST00000151380, intron 1 of 2) chr2 74704725 74704854 + I 158
+promoter-TSS (ENSMUST00000019749) chr2 74704855 74704955 + P 3
+5' UTR (ENSMUST00000019749, exon 1 of 2) chr2 74704956 74705101 + 5UTR 66
+promoter-TSS (ENSMUST00000074721) chr2 74705102 74705202 + P 10
+5' UTR (ENSMUST00000074721, exon 1 of 2) chr2 74705152 74705202 + 5UTR 99
+5' UTR (ENSMUST00000019749, exon 1 of 2)--2 chr2 74705203 74705547 + 5UTR 66
+exon (ENSMUST00000019749, exon 1 of 2) chr2 74705548 74706118 + E 67
+intron (ENSMUST00000074721, intron 1 of 1) chr2 74706119 74706513 + I 158
+exon (ENSMUST00000019749, exon 2 of 2) chr2 74706514 74706761 + E 69
+TTS (ENSMUST00000151380) chr2 74706762 74706862 + TTS 53
+3' UTR (ENSMUST00000019749, exon 2 of 2) chr2 74706863 74707256 + 3UTR 68
+TTS (ENSMUST00000074721) chr2 74707257 74707357 + TTS 39
+3' UTR (ENSMUST00000019749, exon 2 of 2)--2 chr2 74707358 74707882 + 3UTR 68
+TTS (ENSMUST00000019749) chr2 74707883 74707983 + TTS 32
+intron (ENSMUST00000132326, intron 1 of 1)--2 chr2 74707984 74709992 - I 158
+TTS (ENSMUST00000156342) chr2 74709993 74710093 - TTS 55
+exon (ENSMUST00000156342, exon 2 of 2) chr2 74710094 74710142 - E 153
+TTS (ENSMUST00000145799) chr2 74710143 74710243 - TTS 52
+exon (ENSMUST00000145799, exon 3 of 3) chr2 74710244 74710281 - E 144
+intron (ENSMUST00000145799, intron 2 of 2) chr2 74710282 74711876 - I 158
+promoter-TSS (ENSMUST00000111983) chr2 74711877 74711977 + P 14
+promoter-TSS (ENSMUST00000144040) chr2 74711978 74711979 + P 21
+5' UTR (ENSMUST00000111983, exon 1 of 3) chr2 74711980 74712003 + 5UTR 113
+promoter-TSS (ENSMUST00000132326) chr2 74712004 74712104 - P 16
+5' UTR (ENSMUST00000111983, exon 1 of 3)--2 chr2 74712105 74712316 + 5UTR 113
+exon (ENSMUST00000156342, exon 1 of 2) chr2 74712317 74712475 - E 154
+promoter-TSS (ENSMUST00000156342) chr2 74712476 74712576 - P 26
+intron (ENSMUST00000111983, intron 1 of 2) chr2 74712577 74713075 + I 158
+TTS (ENSMUST00000139005) chr2 74713076 74713176 - TTS 47
+exon (ENSMUST00000139005, exon 2 of 2) chr2 74713177 74714401 - E 125
+intron (ENSMUST00000145799, intron 1 of 2) chr2 74714402 74715705 - I 158
+exon (ENSMUST00000139005, exon 1 of 2) chr2 74715706 74715945 - E 126
+promoter-TSS (ENSMUST00000145799) chr2 74715946 74716046 - P 23
+exon (ENSMUST00000139005, exon 1 of 2)--2 chr2 74716047 74716079 - E 126
+promoter-TSS (ENSMUST00000139005) chr2 74716080 74716180 - P 18
+intron (ENSMUST00000111983, intron 1 of 2)--2 chr2 74716181 74721927 + I 158
+promoter-TSS (ENSMUST00000047904) chr2 74721928 74722028 + P 5
+5' UTR (ENSMUST00000047904, exon 1 of 6) chr2 74722029 74722177 + 5UTR 75
+intron (ENSMUST00000047904, intron 1 of 5) chr2 74722178 74722552 + I 158
+5' UTR (ENSMUST00000047904, exon 2 of 6) chr2 74722553 74722631 + 5UTR 76
+intron (ENSMUST00000144040, intron 2 of 6) chr2 74722632 74723331 + I 158
+5' UTR (ENSMUST00000047904, exon 3 of 6) chr2 74723332 74723367 + 5UTR 77
+intron (ENSMUST00000144040, intron 3 of 6) chr2 74723368 74725462 + I 158
+5' UTR (ENSMUST00000047904, exon 4 of 6) chr2 74725463 74725978 + 5UTR 78
+intron (ENSMUST00000144040, intron 5 of 6) chr2 74725979 74726019 + I 158
+promoter-TSS (ENSMUST00000083566) chr2 74726020 74726120 + P 11
+TTS (ENSMUST00000083566) chr2 74726121 74726187 + TTS 40
+intron (ENSMUST00000144040, intron 5 of 6)--2 chr2 74726188 74726677 + I 158
+promoter-TSS (ENSMUST00000053932) chr2 74726678 74726778 + P 7
+5' UTR (ENSMUST00000053932, exon 1 of 4) chr2 74726779 74726875 + 5UTR 85
+5' UTR (ENSMUST00000047904, exon 5 of 6) chr2 74726876 74727023 + 5UTR 79
+promoter-TSS (ENSMUST00000111980) chr2 74727024 74727124 + P 12
+5' UTR (ENSMUST00000111980, exon 1 of 2) chr2 74727074 74727124 + 5UTR 104
+5' UTR (ENSMUST00000047904, exon 5 of 6)--2 chr2 74727125 74727278 + 5UTR 79
+exon (ENSMUST00000047904, exon 5 of 6) chr2 74727279 74727705 + E 80
+intron (ENSMUST00000053932, intron 1 of 3) chr2 74727706 74728261 + I 158
+exon (ENSMUST00000047904, exon 6 of 6) chr2 74728262 74728587 + E 82
+3' UTR (ENSMUST00000047904, exon 6 of 6) chr2 74728588 74728817 + 3UTR 81
+TTS (ENSMUST00000144040) chr2 74728818 74728918 + TTS 50
+3' UTR (ENSMUST00000047904, exon 6 of 6)--2 chr2 74728919 74729072 + 3UTR 81
+TTS (ENSMUST00000111980) chr2 74729073 74729109 + TTS 41
+TTS (ENSMUST00000047904) chr2 74729110 74729210 + TTS 34
+intron (ENSMUST00000053932, intron 2 of 3) chr2 74729211 74732862 + I 158
+promoter-TSS (ENSMUST00000144544) chr2 74732863 74732963 + P 22
+5' UTR (ENSMUST00000144544, exon 1 of 2) chr2 74732964 74733040 + 5UTR 141
+intron (ENSMUST00000144544, intron 1 of 1) chr2 74733041 74736464 + I 158
+promoter-TSS (ENSMUST00000111982) chr2 74736465 74736565 + P 13
+5' UTR (ENSMUST00000111982, exon 1 of 3) chr2 74736566 74736705 + 5UTR 108
+intron (ENSMUST00000111982, intron 1 of 2) chr2 74736706 74739568 + I 158
+promoter-TSS (ENSMUST00000047830) chr2 74739569 74739669 + P 4
+5' UTR (ENSMUST00000047830, exon 1 of 3) chr2 74739670 74739703 + 5UTR 70
+intron (ENSMUST00000047830, intron 1 of 2) chr2 74739704 74743927 + I 158
+5' UTR (ENSMUST00000047830, exon 2 of 3) chr2 74743928 74744011 + 5UTR 71
+exon (ENSMUST00000047830, exon 2 of 3) chr2 74744012 74744163 + E 72
+TTS (ENSMUST00000144544) chr2 74744164 74744264 + TTS 51
+exon (ENSMUST00000047830, exon 2 of 3)--2 chr2 74744265 74744555 + E 72
+intron (ENSMUST00000111983, intron 2 of 2) chr2 74744556 74745623 + I 158
+promoter-TSS (ENSMUST00000140666) chr2 74745624 74745724 + P 19
+promoter-TSS (ENSMUST00000190553) chr2 74745725 74745772 + P 27
+5' UTR (ENSMUST00000140666, exon 1 of 2) chr2 74745773 74746156 + 5UTR 127
+exon (ENSMUST00000140666, exon 1 of 2) chr2 74746157 74746220 + E 128
+exon (ENSMUST00000190553, exon 1 of 1) chr2 74746221 74746321 + E 155
+exon (ENSMUST00000047830, exon 3 of 3) chr2 74746322 74746545 + E 74
+TTS (ENSMUST00000140666) chr2 74746546 74746646 + TTS 48
+exon (ENSMUST00000047830, exon 3 of 3)--2 chr2 74746647 74747079 + E 74
+3' UTR (ENSMUST00000047830, exon 3 of 3) chr2 74747080 74747849 + 3UTR 73
+3' UTR (ENSMUST00000111983, exon 3 of 3) chr2 74747080 74747849 + 3UTR 116
+TTS (ENSMUST00000047830) chr2 74747850 74747950 + TTS 33
+TTS (ENSMUST00000053932) chr2 74747951 74747968 + TTS 36
+3' UTR (ENSMUST00000111983, exon 3 of 3)--2 chr2 74747969 74748220 + 3UTR 116
+TTS (ENSMUST00000111983) chr2 74748221 74748321 + TTS 43
+exon (ENSMUST00000190553, exon 1 of 1)--2 chr2 74748322 74748391 + E 155
+TTS (ENSMUST00000190553) chr2 74748392 74748492 + TTS 56
+Intergenic--8 chr2 74748493 1500000000 + N 1900000000
diff -r 000000000000 -r db456c398880 test-data/annotations_small_TSSTTS_stats.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/annotations_small_TSSTTS_stats.txt Mon Dec 13 15:13:33 2021 +0000
@@ -0,0 +1,8 @@
+Annotation Number of peaks Total size (bp) Log2 Ratio (obs/exp) LogP enrichment (+values depleted)
+3UTR 13.0 7531 9.326 -73.660
+TTS 38.0 2546 12.438 -296.106
+Exon 48.0 13188 10.403 -307.101
+Intron 36.0 43852 8.254 -176.111
+Intergenic 0.0 74676379 -17.650 1393.260
+Promoter 38.0 2757 12.324 -293.081
+5UTR 28.0 3111 11.709 -203.812
diff -r 000000000000 -r db456c398880 test-data/chr2_subset.fa.gz
Binary file test-data/chr2_subset.fa.gz has changed
diff -r 000000000000 -r db456c398880 test-data/fake_phix_peaks.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fake_phix_peaks.bed Mon Dec 13 15:13:33 2021 +0000
@@ -0,0 +1,1 @@
+phiX174 1400 1500
diff -r 000000000000 -r db456c398880 test-data/fake_phix_peaks.subset.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fake_phix_peaks.subset.bed Mon Dec 13 15:13:33 2021 +0000
@@ -0,0 +1,2 @@
+phiX174 1200 1300
+phiX174 1550 1800
diff -r 000000000000 -r db456c398880 test-data/generate_tests.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/generate_tests.sh Mon Dec 13 15:13:33 2021 +0000
@@ -0,0 +1,60 @@
+#! /usr/bin/bash
+## Generate input data:
+if [ ! -e test-data/small_simplified.gtf ]; then
+ wget http://ftp.ensembl.org/pub/release-102/gtf/mus_musculus/Mus_musculus.GRCm38.102.gtf.gz -O /tmp/Mus_musculus.GRCm38.102.gtf.gz
+ zcat /tmp/Mus_musculus.GRCm38.102.gtf.gz | head -n 5 > test-data/small.gtf
+ zcat /tmp/Mus_musculus.GRCm38.102.gtf.gz | awk -v OFS="\t" -v start=74667792 -v end=74748393 '$1 == "2" && $5 > start && $4 < end{print "chr"$0}' >> test-data/small.gtf
+ # annotatePeaks.pl gives different results all time. I need to simplify the gtf.
+ cat test-data/small.gtf | grep -v -P "ENSMUST00000152027|ENSMUST00000156342|ENSMUST00000139005|ENSMUST00000144544|ENSMUST00000111982|ENSMUST00000140666|ENSMUST00000190553|ENSMUST00000132326|ENSMUST00000047830|ENSMUST00000047904|ENSMUST00000111980|ENSMUSG00000065500|ENSMUSG00000100642" > test-data/small_simplified.gtf
+fi
+if [ ! -e test-data/CTCF_peaks.bed ]; then
+ wget https://raw.githubusercontent.com/lldelisle/scriptsForWilleminEtAl2021/main/CTCF/E12_Limbs_Wt_CTCF_colored.bed -O test-data/CTCF_peaks.bed
+fi
+if [ ! -e test-data/CTCF_peaks_shifted.bed ]; then
+ cat test-data/CTCF_peaks.bed | grep "chr2" | awk -v OFS="\t" '$3<75000000 && $2>73740000{$1="mm10_dna"; $2-=73740000; $3-=73740000; print}' > test-data/CTCF_peaks_shifted.bed
+fi
+# chr2_subset.fa was downloaded from UCSC
+# https://genome.ucsc.edu/cgi-bin/hgc?hgsid=1234982067_JnS4z30UVCNarTg26Ztd1Oh6nfu6&g=htcGetDna2&table=&i=mixed&o=56694975&l=56694975&r=56714605&getDnaPos=chr2%3A73740000-75000000&db=mm10&hgSeq.cdsExon=1&hgSeq.padding5=0&hgSeq.padding3=0&hgSeq.casing=upper&hgSeq.maskRepeats=on&boolshad.hgSeq.maskRepeats=0&hgSeq.repMasking=lower&boolshad.hgSeq.revComp=0&submit=get+DNA
+
+. <(planemo conda_env homer_gtf_to_annotation.xml)
+echo "$(which homer)"
+## homer_gtf_to_annotation
+## First test
+parseGTF.pl test-data/small.gtf ann -features exon start_codon stop_codon > /tmp/annotations.txt
+assignGenomeAnnotation /tmp/annotations.txt /tmp/annotations.txt -prioritize test-data/annotations_default.txt > test-data/annotations_default_stats.txt
+## Second test
+parseGTF.pl test-data/small.gtf ann -features exon start_codon > /tmp/annotations2.txt
+assignGenomeAnnotation /tmp/annotations2.txt /tmp/annotations2.txt -prioritize test-data/annotations_exon_start.txt > test-data/annotations_exon_start_stats.txt
+## Third test
+parseGTF.pl test-data/small.gtf ann -features exon start_codon stop_codon -annTSSstartOffset -50 -annTSSendOffset 50 -annTTSstartOffset -50 -annTTSendOffset 50 > /tmp/annotations3.txt
+assignGenomeAnnotation /tmp/annotations3.txt /tmp/annotations3.txt -prioritize test-data/annotations_small_TSSTTS.txt > test-data/annotations_small_TSSTTS_stats.txt
+
+## For annotatePeaks.pl
+parseGTF.pl test-data/small_simplified.gtf ann -features exon start_codon stop_codon > /tmp/annotations.txt
+assignGenomeAnnotation /tmp/annotations.txt /tmp/annotations.txt -prioritize test-data/annotations_default_simplified.txt
+
+## homer_annotatePeaks
+## First test
+annotatePeaks.pl test-data/CTCF_peaks.bed none -gtf test-data/small_simplified.gtf -ann test-data/annotations_default_simplified.txt > test-data/CTCF_peaks_first.txt
+## Second test
+annotatePeaks.pl test-data/CTCF_peaks.bed none -ann test-data/annotations_default.txt > test-data/CTCF_peaks_second.txt
+## Third test
+annotatePeaks.pl test-data/CTCF_peaks.bed none -gtf test-data/small_simplified.gtf > test-data/CTCF_peaks_third.txt
+## Fourth test
+annotatePeaks.pl test-data/fake_phix_peaks.bed test-data/phiX174.fasta -CpG > test-data/phiXcpg.txt
+## Fifth test
+annotatePeaks.pl test-data/fake_phix_peaks.bed none > test-data/phiX_nothing.txt
+
+## findMotifsGenome
+# ! Genome preparsing is giving different results...
+findMotifsGenome.pl test-data/fake_phix_peaks.bed test-data/phiX174.fasta fake_phix_peaks_bed_motif
+mv fake_phix_peaks_bed_motif test-data/motif_test1
+# Thus I needed to use has_text for the other outputs
+# gunzip -c test-data/chr2_subset.fa.gz > test-data/chr2_subset.fa
+# findMotifsGenome.pl test-data/CTCF_peaks_shifted.bed test-data/chr2_subset.fa CTCF_peaks_shifted_bed_motif
+# mv CTCF_peaks_shifted_bed_motif test-data/motif_test2
+# findMotifsGenome.pl test-data/CTCF_peaks_shifted.bed test-data/chr2_subset.fa CTCF_peaks_shifted_bed_motif -mask
+# mv CTCF_peaks_shifted_bed_motif test-data/motif_test3
+# findMotifsGenome.pl test-data/CTCF_peaks_shifted.bed test-data/chr2_subset.fa CTCF_peaks_shifted_bed_motif -mset plants -nomotif
+# mv CTCF_peaks_shifted_bed_motif test-data/motif_test4
+
diff -r 000000000000 -r db456c398880 test-data/motif_test1/homerMotifs.all.motifs
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/motif_test1/homerMotifs.all.motifs Mon Dec 13 15:13:33 2021 +0000
@@ -0,0 +1,165 @@
+>CTAATGAGCT 1-CTAATGAGCT 10.296193 -2.906976 0 T:1.0(100.00%),B:219.7(5.48%),P:1e-1 Tpos:101.0,Tstd:0.0,Bpos:96.2,Bstd:70.8,StrandBias:10.0,Multiplicity:1.00
+0.100 0.700 0.100 0.100
+0.100 0.100 0.100 0.700
+0.700 0.100 0.100 0.100
+0.700 0.100 0.100 0.100
+0.100 0.100 0.100 0.700
+0.100 0.100 0.700 0.100
+0.700 0.100 0.100 0.100
+0.100 0.100 0.700 0.100
+0.100 0.700 0.100 0.100
+0.100 0.100 0.100 0.700
+>ATCAAGATGA 2-ATCAAGATGA 10.296193 -2.906976 0 T:1.0(100.00%),B:219.8(5.48%),P:1e-1 Tpos:113.0,Tstd:0.0,Bpos:94.6,Bstd:75.6,StrandBias:10.0,Multiplicity:1.00
+0.700 0.100 0.100 0.100
+0.100 0.100 0.100 0.700
+0.100 0.700 0.100 0.100
+0.700 0.100 0.100 0.100
+0.700 0.100 0.100 0.100
+0.100 0.100 0.700 0.100
+0.700 0.100 0.100 0.100
+0.100 0.100 0.100 0.700
+0.100 0.100 0.700 0.100
+0.700 0.100 0.100 0.100
+>GTTGCTGCCA 3-GTTGCTGCCA 10.296193 -2.906976 0 T:1.0(100.00%),B:219.9(5.49%),P:1e-1 Tpos:140.0,Tstd:0.0,Bpos:103.4,Bstd:83.3,StrandBias:10.0,Multiplicity:1.00
+0.100 0.100 0.700 0.100
+0.100 0.100 0.100 0.700
+0.100 0.100 0.100 0.700
+0.100 0.100 0.700 0.100
+0.100 0.700 0.100 0.100
+0.100 0.100 0.100 0.700
+0.100 0.100 0.700 0.100
+0.100 0.700 0.100 0.100
+0.100 0.700 0.100 0.100
+0.700 0.100 0.100 0.100
+>CCTGAGACTG 4-CCTGAGACTG 10.296193 -2.902420 0 T:1.0(100.00%),B:220.0(5.49%),P:1e-1 Tpos:181.0,Tstd:0.0,Bpos:96.7,Bstd:84.1,StrandBias:10.0,Multiplicity:1.00
+0.100 0.700 0.100 0.100
+0.100 0.700 0.100 0.100
+0.100 0.100 0.100 0.700
+0.100 0.100 0.700 0.100
+0.700 0.100 0.100 0.100
+0.100 0.100 0.700 0.100
+0.700 0.100 0.100 0.100
+0.100 0.700 0.100 0.100
+0.100 0.100 0.100 0.700
+0.100 0.100 0.700 0.100
+>CGAGGCTAAC 5-CGAGGCTAAC 10.296193 -2.897885 0 T:1.0(100.00%),B:221.5(5.53%),P:1e-1 Tpos:90.0,Tstd:0.0,Bpos:103.4,Bstd:66.4,StrandBias:10.0,Multiplicity:1.00
+0.100 0.700 0.100 0.100
+0.100 0.100 0.700 0.100
+0.700 0.100 0.100 0.100
+0.100 0.100 0.700 0.100
+0.100 0.100 0.700 0.100
+0.100 0.700 0.100 0.100
+0.100 0.100 0.100 0.700
+0.700 0.100 0.100 0.100
+0.700 0.100 0.100 0.100
+0.100 0.700 0.100 0.100
+>ATGAGCTTAATC 1-ATGAGCTTAATC 12.355432 -2.920769 0 T:1.0(100.00%),B:216.0(5.39%),P:1e-1 Tpos:105.0,Tstd:0.0,Bpos:97.7,Bstd:71.4,StrandBias:10.0,Multiplicity:1.00
+0.700 0.100 0.100 0.100
+0.100 0.100 0.100 0.700
+0.100 0.100 0.700 0.100
+0.700 0.100 0.100 0.100
+0.100 0.100 0.700 0.100
+0.100 0.700 0.100 0.100
+0.100 0.100 0.100 0.700
+0.100 0.100 0.100 0.700
+0.700 0.100 0.100 0.100
+0.700 0.100 0.100 0.100
+0.100 0.100 0.100 0.700
+0.100 0.700 0.100 0.100
+>TGCCATCTCAAA 2-TGCCATCTCAAA 12.355432 -2.920769 0 T:1.0(100.00%),B:216.2(5.39%),P:1e-1 Tpos:146.0,Tstd:0.0,Bpos:105.7,Bstd:83.1,StrandBias:10.0,Multiplicity:1.00
+0.100 0.100 0.100 0.700
+0.100 0.100 0.700 0.100
+0.100 0.700 0.100 0.100
+0.100 0.700 0.100 0.100
+0.700 0.100 0.100 0.100
+0.100 0.100 0.100 0.700
+0.100 0.700 0.100 0.100
+0.100 0.100 0.100 0.700
+0.100 0.700 0.100 0.100
+0.700 0.100 0.100 0.100
+0.700 0.100 0.100 0.100
+0.700 0.100 0.100 0.100
+>CGAGGCTAACCC 3-CGAGGCTAACCC 12.355432 -2.916150 0 T:1.0(100.00%),B:217.8(5.43%),P:1e-1 Tpos:91.0,Tstd:0.0,Bpos:101.8,Bstd:65.4,StrandBias:10.0,Multiplicity:1.00
+0.100 0.700 0.100 0.100
+0.100 0.100 0.700 0.100
+0.700 0.100 0.100 0.100
+0.100 0.100 0.700 0.100
+0.100 0.100 0.700 0.100
+0.100 0.700 0.100 0.100
+0.100 0.100 0.100 0.700
+0.700 0.100 0.100 0.100
+0.700 0.100 0.100 0.100
+0.100 0.700 0.100 0.100
+0.100 0.700 0.100 0.100
+0.100 0.700 0.100 0.100
+>CTCCTGAGACTG 4-CTCCTGAGACTG 12.355432 -2.911553 0 T:1.0(100.00%),B:218.1(5.44%),P:1e-1 Tpos:180.0,Tstd:0.0,Bpos:95.7,Bstd:83.7,StrandBias:10.0,Multiplicity:1.00
+0.100 0.700 0.100 0.100
+0.100 0.100 0.100 0.700
+0.100 0.700 0.100 0.100
+0.100 0.700 0.100 0.100
+0.100 0.100 0.100 0.700
+0.100 0.100 0.700 0.100
+0.700 0.100 0.100 0.100
+0.100 0.100 0.700 0.100
+0.700 0.100 0.100 0.100
+0.100 0.700 0.100 0.100
+0.100 0.100 0.100 0.700
+0.100 0.100 0.700 0.100
+>GATGATGCTCGT 5-GATGATGCTCGT 12.355432 -2.906976 0 T:1.0(100.00%),B:219.8(5.48%),P:1e-1 Tpos:119.0,Tstd:0.0,Bpos:93.3,Bstd:77.5,StrandBias:10.0,Multiplicity:1.00
+0.100 0.100 0.700 0.100
+0.700 0.100 0.100 0.100
+0.100 0.100 0.100 0.700
+0.100 0.100 0.700 0.100
+0.700 0.100 0.100 0.100
+0.100 0.100 0.100 0.700
+0.100 0.100 0.700 0.100
+0.100 0.700 0.100 0.100
+0.100 0.100 0.100 0.700
+0.100 0.700 0.100 0.100
+0.100 0.100 0.700 0.100
+0.100 0.100 0.100 0.700
+>CCCTAATG 1-CCCTAATG 8.236954 -2.897885 0 T:1.0(100.00%),B:221.6(5.53%),P:1e-1 Tpos:98.0,Tstd:0.0,Bpos:97.0,Bstd:70.2,StrandBias:10.0,Multiplicity:1.00
+0.100 0.700 0.100 0.100
+0.100 0.700 0.100 0.100
+0.100 0.700 0.100 0.100
+0.100 0.100 0.100 0.700
+0.700 0.100 0.100 0.100
+0.700 0.100 0.100 0.100
+0.100 0.100 0.100 0.700
+0.100 0.100 0.700 0.100
+>TGCTGCCA 2-TGCTGCCA 8.236954 -2.897885 0 T:1.0(100.00%),B:221.8(5.53%),P:1e-1 Tpos:141.0,Tstd:0.0,Bpos:102.6,Bstd:84.0,StrandBias:10.0,Multiplicity:1.00
+0.100 0.100 0.100 0.700
+0.100 0.100 0.700 0.100
+0.100 0.700 0.100 0.100
+0.100 0.100 0.100 0.700
+0.100 0.100 0.700 0.100
+0.100 0.700 0.100 0.100
+0.100 0.700 0.100 0.100
+0.700 0.100 0.100 0.100
+>GAGACTGA 3-GAGACTGA 8.236954 -2.897885 0 T:1.0(100.00%),B:221.9(5.54%),P:1e-1 Tpos:183.0,Tstd:0.0,Bpos:97.7,Bstd:84.4,StrandBias:10.0,Multiplicity:1.00
+0.100 0.100 0.700 0.100
+0.700 0.100 0.100 0.100
+0.100 0.100 0.700 0.100
+0.700 0.100 0.100 0.100
+0.100 0.700 0.100 0.100
+0.100 0.100 0.100 0.700
+0.100 0.100 0.700 0.100
+0.700 0.100 0.100 0.100
+>CTTAATCA 4-CTTAATCA 8.236954 -2.888876 0 T:1.0(100.00%),B:223.5(5.58%),P:1e-1 Tpos:108.0,Tstd:0.0,Bpos:94.6,Bstd:75.1,StrandBias:10.0,Multiplicity:1.00
+0.100 0.700 0.100 0.100
+0.100 0.100 0.100 0.700
+0.100 0.100 0.100 0.700
+0.700 0.100 0.100 0.100
+0.700 0.100 0.100 0.100
+0.100 0.100 0.100 0.700
+0.100 0.700 0.100 0.100
+0.700 0.100 0.100 0.100
+>GATGATGC 5-GATGATGC 8.236954 -2.888876 0 T:1.0(100.00%),B:223.5(5.58%),P:1e-1 Tpos:117.0,Tstd:0.0,Bpos:93.3,Bstd:78.0,StrandBias:10.0,Multiplicity:1.00
+0.100 0.100 0.700 0.100
+0.700 0.100 0.100 0.100
+0.100 0.100 0.100 0.700
+0.100 0.100 0.700 0.100
+0.700 0.100 0.100 0.100
+0.100 0.100 0.100 0.700
+0.100 0.100 0.700 0.100
+0.100 0.700 0.100 0.100
diff -r 000000000000 -r db456c398880 test-data/motif_test1/homerMotifs.motifs10
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/motif_test1/homerMotifs.motifs10 Mon Dec 13 15:13:33 2021 +0000
@@ -0,0 +1,55 @@
+>CTAATGAGCT 1-CTAATGAGCT 10.296193 -2.906976 0 T:1.0(100.00%),B:219.7(5.48%),P:1e-1 Tpos:101.0,Tstd:0.0,Bpos:96.2,Bstd:70.8,StrandBias:10.0,Multiplicity:1.00
+0.100 0.700 0.100 0.100
+0.100 0.100 0.100 0.700
+0.700 0.100 0.100 0.100
+0.700 0.100 0.100 0.100
+0.100 0.100 0.100 0.700
+0.100 0.100 0.700 0.100
+0.700 0.100 0.100 0.100
+0.100 0.100 0.700 0.100
+0.100 0.700 0.100 0.100
+0.100 0.100 0.100 0.700
+>ATCAAGATGA 2-ATCAAGATGA 10.296193 -2.906976 0 T:1.0(100.00%),B:219.8(5.48%),P:1e-1 Tpos:113.0,Tstd:0.0,Bpos:94.6,Bstd:75.6,StrandBias:10.0,Multiplicity:1.00
+0.700 0.100 0.100 0.100
+0.100 0.100 0.100 0.700
+0.100 0.700 0.100 0.100
+0.700 0.100 0.100 0.100
+0.700 0.100 0.100 0.100
+0.100 0.100 0.700 0.100
+0.700 0.100 0.100 0.100
+0.100 0.100 0.100 0.700
+0.100 0.100 0.700 0.100
+0.700 0.100 0.100 0.100
+>GTTGCTGCCA 3-GTTGCTGCCA 10.296193 -2.906976 0 T:1.0(100.00%),B:219.9(5.49%),P:1e-1 Tpos:140.0,Tstd:0.0,Bpos:103.4,Bstd:83.3,StrandBias:10.0,Multiplicity:1.00
+0.100 0.100 0.700 0.100
+0.100 0.100 0.100 0.700
+0.100 0.100 0.100 0.700
+0.100 0.100 0.700 0.100
+0.100 0.700 0.100 0.100
+0.100 0.100 0.100 0.700
+0.100 0.100 0.700 0.100
+0.100 0.700 0.100 0.100
+0.100 0.700 0.100 0.100
+0.700 0.100 0.100 0.100
+>CCTGAGACTG 4-CCTGAGACTG 10.296193 -2.902420 0 T:1.0(100.00%),B:220.0(5.49%),P:1e-1 Tpos:181.0,Tstd:0.0,Bpos:96.7,Bstd:84.1,StrandBias:10.0,Multiplicity:1.00
+0.100 0.700 0.100 0.100
+0.100 0.700 0.100 0.100
+0.100 0.100 0.100 0.700
+0.100 0.100 0.700 0.100
+0.700 0.100 0.100 0.100
+0.100 0.100 0.700 0.100
+0.700 0.100 0.100 0.100
+0.100 0.700 0.100 0.100
+0.100 0.100 0.100 0.700
+0.100 0.100 0.700 0.100
+>CGAGGCTAAC 5-CGAGGCTAAC 10.296193 -2.897885 0 T:1.0(100.00%),B:221.5(5.53%),P:1e-1 Tpos:90.0,Tstd:0.0,Bpos:103.4,Bstd:66.4,StrandBias:10.0,Multiplicity:1.00
+0.100 0.700 0.100 0.100
+0.100 0.100 0.700 0.100
+0.700 0.100 0.100 0.100
+0.100 0.100 0.700 0.100
+0.100 0.100 0.700 0.100
+0.100 0.700 0.100 0.100
+0.100 0.100 0.100 0.700
+0.700 0.100 0.100 0.100
+0.700 0.100 0.100 0.100
+0.100 0.700 0.100 0.100
diff -r 000000000000 -r db456c398880 test-data/motif_test1/homerMotifs.motifs12
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/motif_test1/homerMotifs.motifs12 Mon Dec 13 15:13:33 2021 +0000
@@ -0,0 +1,65 @@
+>ATGAGCTTAATC 1-ATGAGCTTAATC 12.355432 -2.920769 0 T:1.0(100.00%),B:216.0(5.39%),P:1e-1 Tpos:105.0,Tstd:0.0,Bpos:97.7,Bstd:71.4,StrandBias:10.0,Multiplicity:1.00
+0.700 0.100 0.100 0.100
+0.100 0.100 0.100 0.700
+0.100 0.100 0.700 0.100
+0.700 0.100 0.100 0.100
+0.100 0.100 0.700 0.100
+0.100 0.700 0.100 0.100
+0.100 0.100 0.100 0.700
+0.100 0.100 0.100 0.700
+0.700 0.100 0.100 0.100
+0.700 0.100 0.100 0.100
+0.100 0.100 0.100 0.700
+0.100 0.700 0.100 0.100
+>TGCCATCTCAAA 2-TGCCATCTCAAA 12.355432 -2.920769 0 T:1.0(100.00%),B:216.2(5.39%),P:1e-1 Tpos:146.0,Tstd:0.0,Bpos:105.7,Bstd:83.1,StrandBias:10.0,Multiplicity:1.00
+0.100 0.100 0.100 0.700
+0.100 0.100 0.700 0.100
+0.100 0.700 0.100 0.100
+0.100 0.700 0.100 0.100
+0.700 0.100 0.100 0.100
+0.100 0.100 0.100 0.700
+0.100 0.700 0.100 0.100
+0.100 0.100 0.100 0.700
+0.100 0.700 0.100 0.100
+0.700 0.100 0.100 0.100
+0.700 0.100 0.100 0.100
+0.700 0.100 0.100 0.100
+>CGAGGCTAACCC 3-CGAGGCTAACCC 12.355432 -2.916150 0 T:1.0(100.00%),B:217.8(5.43%),P:1e-1 Tpos:91.0,Tstd:0.0,Bpos:101.8,Bstd:65.4,StrandBias:10.0,Multiplicity:1.00
+0.100 0.700 0.100 0.100
+0.100 0.100 0.700 0.100
+0.700 0.100 0.100 0.100
+0.100 0.100 0.700 0.100
+0.100 0.100 0.700 0.100
+0.100 0.700 0.100 0.100
+0.100 0.100 0.100 0.700
+0.700 0.100 0.100 0.100
+0.700 0.100 0.100 0.100
+0.100 0.700 0.100 0.100
+0.100 0.700 0.100 0.100
+0.100 0.700 0.100 0.100
+>CTCCTGAGACTG 4-CTCCTGAGACTG 12.355432 -2.911553 0 T:1.0(100.00%),B:218.1(5.44%),P:1e-1 Tpos:180.0,Tstd:0.0,Bpos:95.7,Bstd:83.7,StrandBias:10.0,Multiplicity:1.00
+0.100 0.700 0.100 0.100
+0.100 0.100 0.100 0.700
+0.100 0.700 0.100 0.100
+0.100 0.700 0.100 0.100
+0.100 0.100 0.100 0.700
+0.100 0.100 0.700 0.100
+0.700 0.100 0.100 0.100
+0.100 0.100 0.700 0.100
+0.700 0.100 0.100 0.100
+0.100 0.700 0.100 0.100
+0.100 0.100 0.100 0.700
+0.100 0.100 0.700 0.100
+>GATGATGCTCGT 5-GATGATGCTCGT 12.355432 -2.906976 0 T:1.0(100.00%),B:219.8(5.48%),P:1e-1 Tpos:119.0,Tstd:0.0,Bpos:93.3,Bstd:77.5,StrandBias:10.0,Multiplicity:1.00
+0.100 0.100 0.700 0.100
+0.700 0.100 0.100 0.100
+0.100 0.100 0.100 0.700
+0.100 0.100 0.700 0.100
+0.700 0.100 0.100 0.100
+0.100 0.100 0.100 0.700
+0.100 0.100 0.700 0.100
+0.100 0.700 0.100 0.100
+0.100 0.100 0.100 0.700
+0.100 0.700 0.100 0.100
+0.100 0.100 0.700 0.100
+0.100 0.100 0.100 0.700
diff -r 000000000000 -r db456c398880 test-data/motif_test1/homerMotifs.motifs8
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/motif_test1/homerMotifs.motifs8 Mon Dec 13 15:13:33 2021 +0000
@@ -0,0 +1,45 @@
+>CCCTAATG 1-CCCTAATG 8.236954 -2.897885 0 T:1.0(100.00%),B:221.6(5.53%),P:1e-1 Tpos:98.0,Tstd:0.0,Bpos:97.0,Bstd:70.2,StrandBias:10.0,Multiplicity:1.00
+0.100 0.700 0.100 0.100
+0.100 0.700 0.100 0.100
+0.100 0.700 0.100 0.100
+0.100 0.100 0.100 0.700
+0.700 0.100 0.100 0.100
+0.700 0.100 0.100 0.100
+0.100 0.100 0.100 0.700
+0.100 0.100 0.700 0.100
+>TGCTGCCA 2-TGCTGCCA 8.236954 -2.897885 0 T:1.0(100.00%),B:221.8(5.53%),P:1e-1 Tpos:141.0,Tstd:0.0,Bpos:102.6,Bstd:84.0,StrandBias:10.0,Multiplicity:1.00
+0.100 0.100 0.100 0.700
+0.100 0.100 0.700 0.100
+0.100 0.700 0.100 0.100
+0.100 0.100 0.100 0.700
+0.100 0.100 0.700 0.100
+0.100 0.700 0.100 0.100
+0.100 0.700 0.100 0.100
+0.700 0.100 0.100 0.100
+>GAGACTGA 3-GAGACTGA 8.236954 -2.897885 0 T:1.0(100.00%),B:221.9(5.54%),P:1e-1 Tpos:183.0,Tstd:0.0,Bpos:97.7,Bstd:84.4,StrandBias:10.0,Multiplicity:1.00
+0.100 0.100 0.700 0.100
+0.700 0.100 0.100 0.100
+0.100 0.100 0.700 0.100
+0.700 0.100 0.100 0.100
+0.100 0.700 0.100 0.100
+0.100 0.100 0.100 0.700
+0.100 0.100 0.700 0.100
+0.700 0.100 0.100 0.100
+>CTTAATCA 4-CTTAATCA 8.236954 -2.888876 0 T:1.0(100.00%),B:223.5(5.58%),P:1e-1 Tpos:108.0,Tstd:0.0,Bpos:94.6,Bstd:75.1,StrandBias:10.0,Multiplicity:1.00
+0.100 0.700 0.100 0.100
+0.100 0.100 0.100 0.700
+0.100 0.100 0.100 0.700
+0.700 0.100 0.100 0.100
+0.700 0.100 0.100 0.100
+0.100 0.100 0.100 0.700
+0.100 0.700 0.100 0.100
+0.700 0.100 0.100 0.100
+>GATGATGC 5-GATGATGC 8.236954 -2.888876 0 T:1.0(100.00%),B:223.5(5.58%),P:1e-1 Tpos:117.0,Tstd:0.0,Bpos:93.3,Bstd:78.0,StrandBias:10.0,Multiplicity:1.00
+0.100 0.100 0.700 0.100
+0.700 0.100 0.100 0.100
+0.100 0.100 0.100 0.700
+0.100 0.100 0.700 0.100
+0.700 0.100 0.100 0.100
+0.100 0.100 0.100 0.700
+0.100 0.100 0.700 0.100
+0.100 0.700 0.100 0.100
diff -r 000000000000 -r db456c398880 test-data/motif_test1/homerResults.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/motif_test1/homerResults.html Mon Dec 13 15:13:33 2021 +0000
@@ -0,0 +1,489 @@
+
fake_phix_peaks_bed_motif/ - Homer de novo Motif Results
+
+Homer de novo Motif Results (fake_phix_peaks_bed_motif/)
+Known Motif Enrichment Results
+Gene Ontology Enrichment Results
+If Homer is having trouble matching a motif to a known motif, try copy/pasting the matrix file into
+STAMP
+More information on motif finding results: HOMER
+ | Description of Results
+ | Tips
+
+Total target sequences = 1
+Total background sequences = 4009
+* - possible false positive
+
+Rank | Motif | P-value | log P-pvalue | % of Targets | % of Background |
+STD(Bg STD) |
+Best Match/Details | Motif File |
+1
+* |
+ | 1e-1 | -2.921e+00 | 100.00% | 5.39% | 0.0bp (71.4bp) | Oc/dmmpmm(Noyes_hd)/fly(0.654) More Information | Similar Motifs Found | motif file (matrix) |
+2
+* |
+ | 1e-1 | -2.921e+00 | 100.00% | 5.39% | 0.0bp (83.1bp) | RFX1/RFX1_YPD/[](Harbison)/Yeast(0.645) More Information | Similar Motifs Found | motif file (matrix) |
+3
+* |
+ | 1e-1 | -2.916e+00 | 100.00% | 5.43% | 0.0bp (65.4bp) | PH0015.1_Crx/Jaspar(0.629) More Information | Similar Motifs Found | motif file (matrix) |
+4
+* |
+ | 1e-1 | -2.912e+00 | 100.00% | 5.44% | 0.0bp (83.7bp) | RAV1(2)(AP2/EREBP)/Arabidopsis thaliana/AthaMap(0.661) More Information | Similar Motifs Found | motif file (matrix) |
+5
+* |
+ | 1e-1 | -2.907e+00 | 100.00% | 5.48% | 0.0bp (75.6bp) | SNRNP70K(RRM)/Drosophila_melanogaster-RNCMPT00143-PBM/HughesRNA(0.785) More Information | Similar Motifs Found | motif file (matrix) |
+6
+* |
+ | 1e-1 | -2.907e+00 | 100.00% | 5.49% | 0.0bp (83.3bp) | DEL1(E2FDP)/colamp-DEL1-DAP-Seq(GSE60143)/Homer(0.657) More Information | Similar Motifs Found | motif file (matrix) |
+7
+* |
+ | 1e-1 | -2.898e+00 | 100.00% | 5.53% | 0.0bp (70.2bp) | HNRNPA1L2(RRM)/Homo_sapiens-RNCMPT00023-PBM/HughesRNA(0.812) More Information | Similar Motifs Found | motif file (matrix) |
+8
+* |
+ | 1e-1 | -2.889e+00 | 100.00% | 5.58% | 0.0bp (78.0bp) | AGL42/MA1201.1/Jaspar(0.903) More Information | Similar Motifs Found | motif file (matrix) |
+
+
+
diff -r 000000000000 -r db456c398880 test-data/motif_test1/homerResults/motif1.info.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/motif_test1/homerResults/motif1.info.html Mon Dec 13 15:13:33 2021 +0000
@@ -0,0 +1,1753 @@
+Motif 1
+Information for 1-ATGAGCTTAATC (Motif 1)
+
+
+
+Reverse Opposite:
+
+
+p-value: | 1e-1 |
+log p-value: | -2.921e+00 |
+Information Content per bp: | 1.530 |
+Number of Target Sequences with motif | 1.0 |
+Percentage of Target Sequences with motif | 100.00% |
+Number of Background Sequences with motif | 216.0 |
+Percentage of Background Sequences with motif | 5.39% |
+Average Position of motif in Targets | 105.0 +/- 0.0bp |
+Average Position of motif in Background | 97.7 +/- 71.4bp |
+Strand Bias (log2 ratio + to - strand density) | 10.0 |
+Multiplicity (# of sites on avg that occur together) | 1.00 |
+Motif File: | file (matrix) reverse opposite |
+SVG Files for Logos: | forward logo reverse opposite |
+
+Matches to Known Motifs
+
+
+Oc/dmmpmm(Noyes_hd)/fly
+
+
+Match Rank: | 1 |
+Score: | 0.65 |
+ Offset: | 4 |
+ Orientation: | forward strand |
+Alignment: | ATGAGCTTAATC-- ----NNTTAATCCN |
+ |
+
+
+
+
+
+ |
+ |
+
+PHO2(MacIsaac)/Yeast
+
+
+Match Rank: | 2 |
+Score: | 0.65 |
+ Offset: | 5 |
+ Orientation: | reverse strand |
+Alignment: | ATGAGCTTAATC -----CTTAAT- |
+ |
+
+
+
+
+
+ |
+ |
+
+oc/dmmpmm(Noyes)/fly
+
+
+Match Rank: | 3 |
+Score: | 0.65 |
+ Offset: | 6 |
+ Orientation: | forward strand |
+Alignment: | ATGAGCTTAATC- ------TTAATCC |
+ |
+
+
+
+
+
+ |
+ |
+
+Gsc/dmmpmm(Noyes_hd)/fly
+
+
+Match Rank: | 4 |
+Score: | 0.64 |
+ Offset: | 4 |
+ Orientation: | forward strand |
+Alignment: | ATGAGCTTAATC-- ----NNTTAATCCN |
+ |
+
+
+
+
+
+ |
+ |
+
+bcd/dmmpmm(Noyes_hd)/fly
+
+
+Match Rank: | 5 |
+Score: | 0.64 |
+ Offset: | 4 |
+ Orientation: | forward strand |
+Alignment: | ATGAGCTTAATC-- ----GGTTAATCCN |
+ |
+
+
+
+
+
+ |
+ |
+
+OTX2/MA0712.1/Jaspar
+
+
+Match Rank: | 6 |
+Score: | 0.64 |
+ Offset: | 6 |
+ Orientation: | forward strand |
+Alignment: | ATGAGCTTAATC-- ------TTAATCCT |
+ |
+
+
+
+
+
+ |
+ |
+
+OTX1/MA0711.1/Jaspar
+
+
+Match Rank: | 7 |
+Score: | 0.63 |
+ Offset: | 6 |
+ Orientation: | forward strand |
+Alignment: | ATGAGCTTAATC-- ------TTAATCCG |
+ |
+
+
+
+
+
+ |
+ |
+
+ESRRB/MA0141.3/Jaspar
+
+
+Match Rank: | 8 |
+Score: | 0.63 |
+ Offset: | -1 |
+ Orientation: | reverse strand |
+Alignment: | -ATGAGCTTAATC NATGACCTTGA-- |
+ |
+
+
+
+
+
+ |
+ |
+
+GSC2/MA0891.1/Jaspar
+
+
+Match Rank: | 9 |
+Score: | 0.63 |
+ Offset: | 5 |
+ Orientation: | forward strand |
+Alignment: | ATGAGCTTAATC--- -----CCTAATCCGC |
+ |
+
+
+
+
+
+ |
+ |
+
+Pitx1/MA0682.1/Jaspar
+
+
+Match Rank: | 10 |
+Score: | 0.63 |
+ Offset: | 6 |
+ Orientation: | forward strand |
+Alignment: | ATGAGCTTAATC-- ------TTAATCCC |
+ |
+
+
+
+
+
+ |
+ |
+
+
+