changeset 0:418e4d0fe0bd draft

planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
author lldelisle
date Fri, 04 Nov 2022 15:37:12 +0000
parents
children 6fd4b3b90220
files fromgtfTobed12.py fromgtfTobed12.xml test-data/Homo_sapiens.GRCh38.95_491firstLines.gtf.gz test-data/test.bed test-data/testMergeExons.bed test-data/testMergeNotUCSC.bed test-data/testWithGenes.bed
diffstat 7 files changed, 528 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fromgtfTobed12.py	Fri Nov 04 15:37:12 2022 +0000
@@ -0,0 +1,150 @@
+import argparse
+import sys
+import warnings
+
+import gffutils
+
+warnings.filterwarnings("ignore", message="It appears you have a gene feature"
+                        " in your GTF file. You may want to use the "
+                        "`disable_infer_genes` option to speed up database "
+                        "creation")
+warnings.filterwarnings("ignore", message="It appears you have a transcript "
+                        "feature in your GTF file. You may want to use the "
+                        "`disable_infer_transcripts` option to speed up "
+                        "database creation")
+# In gffutils v0.10 they changed the error message:
+warnings.filterwarnings("ignore", message="It appears you have a gene feature"
+                        " in your GTF file. You may want to use the "
+                        "`disable_infer_genes=True` option to speed up "
+                        "database creation")
+warnings.filterwarnings("ignore", message="It appears you have a transcript "
+                        "feature in your GTF file. You may want to use the "
+                        "`disable_infer_transcripts=True` option to speed up "
+                        "database creation")
+
+
+def convert_gtf_to_bed(fn, fo, useGene, mergeTranscripts,
+                       mergeTranscriptsAndOverlappingExons, ucsc):
+    db = gffutils.create_db(fn, ':memory:')
+    # For each transcript:
+    prefered_name = "transcript_name"
+    if useGene or mergeTranscripts or mergeTranscriptsAndOverlappingExons:
+        prefered_name = "gene_name"
+    if mergeTranscripts or mergeTranscriptsAndOverlappingExons:
+        all_items = db.features_of_type("gene", order_by='start')
+    else:
+        all_items = db.features_of_type("transcript", order_by='start')
+    for tr in all_items:
+        # The name would be the name of the transcript/gene if exists
+        try:
+            # First try to have it directly on the feature
+            trName = tr.attributes[prefered_name][0]
+        except KeyError:
+            # Else try to guess the name of the transcript/gene from exons:
+            try:
+                trName = set([e.attributes[prefered_name][0]
+                              for e in
+                              db.children(tr,
+                                          featuretype='exon',
+                                          order_by='start')]).pop()
+            except KeyError:
+                # Else take the transcript id
+                trName = tr.id
+        # If the cds is defined in the gtf,
+        # use it to define the thick start and end
+        # The gtf is 1-based closed intervalls and
+        # bed are 0-based half-open so:
+        # I need to remove one from each start
+        try:
+            # In case of multiple CDS (when there is one entry per gene)
+            # I use the first one to get the start
+            # and the last one to get the end (order_by=-start)
+            cds_start = next(db.children(tr,
+                                         featuretype='CDS',
+                                         order_by='start')).start - 1
+            cds_end = next(db.children(tr,
+                                       featuretype='CDS',
+                                       order_by='-start')).end
+        except StopIteration:
+            # If the CDS is not defined, then it is set to the start
+            # as proposed here:
+            # https://genome.ucsc.edu/FAQ/FAQformat.html#format1
+            cds_start = tr.start - 1
+            cds_end = tr.start - 1
+        # Get all exons starts and lengths
+        if mergeTranscriptsAndOverlappingExons:
+            # We merge overlapping exons:
+            exons_starts = []
+            exons_length = []
+            current_start = -1
+            current_end = None
+            for e in db.children(tr, featuretype='exon', order_by='start'):
+                if current_start == -1:
+                    current_start = e.start - 1
+                    current_end = e.end
+                else:
+                    if e.start > current_end:
+                        # This is a non-overlapping exon
+                        # We store the previous exon:
+                        exons_starts.append(current_start)
+                        exons_length.append(current_end - current_start)
+                        # We set the current:
+                        current_start = e.start - 1
+                        current_end = e.end
+                    else:
+                        # This is an overlapping exon
+                        # We update current_end if necessary
+                        current_end = max(current_end, e.end)
+            if current_start != -1:
+                # There is a last exon to store:
+                exons_starts.append(current_start)
+                exons_length.append(current_end - current_start)
+        else:
+            exons_starts = [e.start - 1
+                            for e in
+                            db.children(tr, featuretype='exon',
+                                        order_by='start')]
+            exons_length = [len(e)
+                            for e in
+                            db.children(tr, featuretype='exon',
+                                        order_by='start')]
+        # Rewrite the chromosome name if needed:
+        chrom = tr.chrom
+        if ucsc and chrom[0:3] != 'chr':
+            chrom = 'chr' + chrom
+        fo.write("%s\t%d\t%d\t%s\t%d\t%s\t%d\t%d\t%s\t%d\t%s\t%s\n" %
+                 (chrom, tr.start - 1, tr.end, trName, 0, tr.strand,
+                  cds_start, cds_end, "0", len(exons_starts),
+                  ",".join([str(ex_l) for ex_l in exons_length]),
+                  ",".join([str(s - (tr.start - 1)) for s in exons_starts])))
+
+
+argp = argparse.ArgumentParser(
+    description=("Convert a gtf to a bed12 with one entry"
+                 " per transcript/gene"))
+argp.add_argument('input', default=None,
+                  help="Input gtf file (can be gzip).")
+argp.add_argument('--output', default=sys.stdout,
+                  type=argparse.FileType('w'),
+                  help="Output bed12 file.")
+argp.add_argument('--useGene', action="store_true",
+                  help="Use the gene name instead of the "
+                       "transcript name.")
+argp.add_argument('--ucscformat', action="store_true",
+                  help="If you want that all chromosome names "
+                       "begin with 'chr'.")
+group = argp.add_mutually_exclusive_group()
+group.add_argument('--mergeTranscripts', action="store_true",
+                   help="Merge all transcripts into a single "
+                        "entry to have one line per gene.")
+group.add_argument('--mergeTranscriptsAndOverlappingExons',
+                   action="store_true",
+                   help="Merge all transcripts into a single "
+                        "entry to have one line per gene and merge"
+                        " overlapping exons.")
+
+args = argp.parse_args()
+convert_gtf_to_bed(args.input, args.output, args.useGene,
+                   args.mergeTranscripts,
+                   args.mergeTranscriptsAndOverlappingExons,
+                   args.ucscformat)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fromgtfTobed12.xml	Fri Nov 04 15:37:12 2022 +0000
@@ -0,0 +1,76 @@
+<tool id="fromgtfTobed12" name="fromgtftobed12" version="0.11.1+galaxy0">
+  <description> Convert a gtf to a bed12.</description>
+  <requirements>
+    <requirement type="package" version="0.11.1">gffutils</requirement>
+  </requirements>
+  <stdio>
+    <!-- Anything other than zero is an error -->
+    <exit_code range="1:" />
+    <exit_code range=":-1" />
+    <!-- In case the return code has not been set propery check stderr too -->
+    <regex match="Error:" />
+    <regex match="Exception:" />
+  </stdio>
+  <command>
+<![CDATA[
+        python3 $__tool_directory__/fromgtfTobed12.py
+        $useGene
+        $mergeTranscripts
+        $ucscformat
+        --output $output
+        $input
+]]>
+  </command>
+  <inputs>
+    <param name="input" multiple="false" type="data" format="gtf" label="Select the gtf to convert."/>
+    <param argument="--useGene" type="boolean" checked="False" truevalue="--useGene" falsevalue="" label="Uses the gene name instead of the transcript name."/>
+    <param name="mergeTranscripts" type="select" label="Do you want to merge all transcripts of a gene in a single line?">
+      <option value="" selected="true">No</option>
+      <option value="--mergeTranscripts">Yes</option>
+      <option value="--mergeTranscriptsAndOverlappingExons">Yes and merge overlapping exons</option>
+    </param>
+    <param argument="--ucscformat" type="boolean" checked="True" truevalue="--ucscformat" falsevalue="" label="If you want that all chromosome names begin with 'chr'."/>
+  </inputs>
+  
+  <outputs>
+    <data format="bed" name="output" label="$input.name as bed12"/>
+  </outputs>
+
+  <tests>
+    <test>
+      <param name="input" value="Homo_sapiens.GRCh38.95_491firstLines.gtf.gz"/>
+      <param name="ucscformat" value="--ucscformat"/>
+      <output name="output" file="test.bed"/>
+    </test>
+    <test>
+      <param name="input" value="Homo_sapiens.GRCh38.95_491firstLines.gtf.gz"/>
+      <param name="ucscformat" value="--ucscformat"/>
+      <param name="useGene" value="--useGene"/>
+      <output name="output" file="testWithGenes.bed"/>
+    </test>
+    <test>
+      <param name="input" value="Homo_sapiens.GRCh38.95_491firstLines.gtf.gz"/>
+      <param name="mergeTranscripts" value="--mergeTranscripts"/>
+      <param name="useGene" value="--useGene"/>
+      <param name="ucscformat" value=""/>
+      <output name="output" file="testMergeNotUCSC.bed"/>
+    </test>
+    <test>
+      <param name="input" value="Homo_sapiens.GRCh38.95_491firstLines.gtf.gz"/>
+      <param name="mergeTranscripts" value="--mergeTranscriptsAndOverlappingExons"/>
+      <param name="useGene" value="--useGene"/>
+      <param name="ucscformat" value=""/>
+      <output name="output" file="testMergeExons.bed"/>
+    </test>
+  </tests>
+  <help><![CDATA[
+    This tool uses gffutils to convert gtf to bed12. One line per transcript.
+    It will use as names transcript_name or gene_name when available.
+]]>  </help>
+  <citations>
+  <citation type="bibtex">@online{gffutils,
+  url = {https://pythonhosted.org/gffutils/contents.html}
+  }
+  </citation>
+  </citations>
+</tool>
Binary file test-data/Homo_sapiens.GRCh38.95_491firstLines.gtf.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test.bed	Fri Nov 04 15:37:12 2022 +0000
@@ -0,0 +1,105 @@
+chr1	11868	14409	DDX11L1-202	0	+	11868	11868	0	3	359,109,1189	0,744,1352
+chr1	12009	13670	DDX11L1-201	0	+	12009	12009	0	6	48,49,85,78,154,218	0,169,603,965,1211,1443
+chr1	14403	29570	WASH7P-201	0	-	14403	14403	0	11	98,34,152,159,198,136,137,147,99,154,37	0,601,1392,2203,2454,2829,3202,3511,3864,10334,15130
+chr1	17368	17436	MIR6859-1-201	0	-	17368	17368	0	1	68	0
+chr1	29553	31097	MIR1302-2HG-202	0	+	29553	29553	0	3	486,104,122	0,1010,1422
+chr1	30266	31109	MIR1302-2HG-201	0	+	30266	30266	0	2	401,134	0,709
+chr1	30365	30503	MIR1302-2-201	0	+	30365	30365	0	1	138	0
+chr1	34553	36081	FAM138A-201	0	-	34553	34553	0	3	621,205,361	0,723,1167
+chr1	35244	36073	FAM138A-202	0	-	35244	35244	0	2	237,353	0,476
+chr1	52472	53312	OR4G4P-201	0	+	52472	52472	0	1	840	0
+chr1	57597	64116	OR4G11P-202	0	+	57597	57597	0	3	56,157,1201	0,1102,5318
+chr1	62948	63887	OR4G11P-201	0	+	62948	62948	0	1	939	0
+chr1	65418	71585	OR4F5-202	0	+	65564	70005	0	3	15,54,2549	0,101,3618
+chr1	69054	70108	OR4F5-201	0	+	69090	70005	0	1	1054	0
+chr1	89294	120932	AL627309.1-201	0	-	89294	89294	0	4	2335,150,105,158	0,2796,23405,31480
+chr1	89550	91105	AL627309.3-201	0	-	89550	89550	0	2	500,819	0,736
+chr1	92229	129217	AL627309.1-202	0	-	92229	92229	0	4	11,105,212,163	0,20470,28491,36825
+chr1	110952	129173	AL627309.1-203	0	-	110952	110952	0	3	405,105,119	0,1747,18102
+chr1	120724	133723	AL627309.1-205	0	-	120724	120724	0	4	145,59,169,350	0,149,8330,12649
+chr1	129080	133566	AL627309.1-204	0	-	129080	129080	0	2	143,193	0,4293
+chr1	131024	134836	CICP27-201	0	+	131024	131024	0	1	3812	0
+chr1	135140	135895	AL627309.6-201	0	-	135140	135140	0	1	755	0
+chr1	137681	137965	AL627309.7-201	0	-	137681	137681	0	1	284	0
+chr1	139789	140339	AL627309.2-201	0	-	139789	139789	0	2	58,265	0,285
+chr1	141473	149707	AL627309.5-201	0	-	141473	141473	0	2	1538,3322	0,4912
+chr1	142807	146831	AL627309.5-202	0	-	142807	142807	0	3	204,124,190	0,3578,3834
+chr1	146385	173862	AL627309.5-203	0	-	146385	146385	0	8	124,65,529,59,66,216,132,110	0,9381,17877,19498,21714,22663,26171,27367
+chr1	157783	157887	RNU6-1100P-201	0	-	157783	157783	0	1	104	0
+chr1	160445	161525	AL627309.4-201	0	+	160445	160445	0	2	245,212	0,868
+chr1	165888	168767	AL627309.5-204	0	-	165888	165888	0	3	54,66,158	0,2211,2721
+chr1	182695	184174	FO538757.1-201	0	+	182695	182695	0	5	51,85,78,162,194	0,436,798,1044,1285
+chr1	185216	195411	WASH9P-201	0	-	185216	185216	0	10	134,69,153,159,202,136,137,146,112,149	0,274,1100,1912,2159,2538,2913,3222,3574,10046
+chr1	187890	187958	MIR6859-2-201	0	-	187890	187890	0	1	68	0
+chr1	257863	264733	AP006222.1-201	0	-	257863	257863	0	2	1162,130	0,6740
+chr1	257912	268816	AP006222.1-202	0	-	257912	257912	0	4	1113,85,902,150	0,3637,9390,10754
+chr1	258143	359681	AP006222.1-203	0	-	258143	258143	0	4	882,902,135,337	0,98541,99905,101201
+chr1	258523	268816	AP006222.1-204	0	-	258523	258523	0	3	502,902,150	0,8779,10143
+chr1	258567	259024	AP006222.1-205	0	-	258567	258567	0	1	457	0
+chr1	263014	297502	AP006222.1-206	0	-	263014	263014	0	4	5190,150,105,158	0,5652,26251,34330
+chr1	347981	348366	RPL23AP24-201	0	-	347981	347981	0	1	385	0
+chr1	358856	365704	AL732372.1-201	0	+	358856	358856	0	2	73,534	0,6314
+chr1	358871	365510	AL732372.1-202	0	+	358871	358871	0	2	86,340	0,6299
+chr1	360056	366052	AL732372.1-203	0	+	360056	360056	0	2	112,882	0,5114
+chr1	365388	366151	AL732372.2-201	0	-	365388	365388	0	2	304,133	0,630
+chr1	365394	368450	AL732372.2-202	0	-	365394	365394	0	2	298,200	0,2856
+chr1	365614	379972	AL732372.2-203	0	-	365614	365614	0	3	78,180,204	0,7529,14154
+chr1	373181	485208	AL732372.2-204	0	-	373181	373181	0	3	142,102,169	0,6587,111858
+chr1	439869	440232	WBP1LP7-201	0	+	439869	439869	0	1	363	0
+chr1	450702	451697	OR4F29-201	0	-	450742	451678	0	1	995	0
+chr1	476363	497259	AL732372.2-205	0	-	476363	476363	0	3	582,169,151	0,8676,20745
+chr1	484831	495476	AL732372.2-206	0	-	484831	484831	0	3	377,58,200	0,10160,10445
+chr1	485025	485208	AL732372.2-207	0	-	485025	485025	0	1	183	0
+chr1	485065	489553	AL732372.2-208	0	-	485065	485065	0	2	143,193	0,4295
+chr1	487100	489906	CICP7-201	0	+	487100	487100	0	2	2287,190	0,2616
+chr1	491224	493241	AL732372.3-201	0	-	491224	491224	0	2	765,474	0,1543
+chr1	494381	496605	AL732372.2-209	0	-	494381	494381	0	2	205,342	0,1882
+chr1	494463	502508	AL732372.2-210	0	-	494463	494463	0	5	435,58,191,65,44	0,528,2645,7092,8001
+chr1	494474	495368	AL732372.2-211	0	-	494474	494474	0	3	424,58,92	0,517,802
+chr1	494610	499175	AL732372.2-212	0	-	494610	494610	0	3	288,58,492	0,381,4073
+chr1	494770	498976	AL732372.2-213	0	-	494770	494770	0	5	128,58,191,58,293	0,221,2338,3628,3913
+chr1	497133	498456	AL732372.2-214	0	-	497133	497133	0	3	166,233,58	0,939,1265
+chr1	497204	502598	AL732372.2-215	0	-	497204	497204	0	6	24,233,58,65,57,134	0,868,1194,4351,4982,5260
+chr1	497209	502873	AL732372.2-216	0	-	497209	497209	0	4	90,58,65,409	0,1189,4346,5255
+chr1	497239	499002	AL732372.2-217	0	-	497239	497239	0	4	60,259,58,319	0,807,1159,1444
+chr1	497244	502598	AL732372.2-218	0	-	497244	497244	0	5	55,233,58,65,134	0,828,1154,4311,5220
+chr1	497274	498976	AL732372.2-219	0	-	497274	497274	0	2	25,578	0,1124
+chr1	498280	499175	AL732372.2-220	0	-	498280	498280	0	3	25,58,492	0,118,403
+chr1	498983	501607	AL732372.2-221	0	-	498983	498983	0	2	386,52	0,2572
+chr1	501587	517252	AL732372.2-222	0	-	501587	501587	0	5	33,94,124,65,68	0,1274,3392,12771,15597
+chr1	501603	517225	AL732372.2-223	0	-	501603	501603	0	5	17,197,124,65,70	0,861,3376,12755,15552
+chr1	504469	514413	AL732372.2-224	0	-	504469	504469	0	2	464,55	0,9889
+chr1	504864	522928	AL732372.2-225	0	-	504864	504864	0	4	239,65,82,70	0,9494,12320,17994
+chr1	516375	516479	RF00026.90-201	0	-	516375	516375	0	1	104	0
+chr1	586070	612813	AL669831.3-201	0	-	586070	586070	0	6	288,135,128,180,102,73	0,750,8558,15327,21884,26670
+chr1	586277	588453	AL669831.3-202	0	-	586277	586277	0	3	81,135,337	0,543,1839
+chr1	586944	720194	AL669831.3-203	0	-	586944	586944	0	4	11,105,212,163	0,116740,124766,133087
+chr1	587628	594768	AC114498.1-201	0	+	587628	587628	0	2	73,534	0,6606
+chr1	587667	594574	AC114498.1-202	0	+	587667	587667	0	2	62,340	0,6567
+chr1	594190	633129	AL669831.3-204	0	-	594190	594190	0	5	566,180,102,88,86	0,7207,13764,34728,38853
+chr1	594197	631204	AL669831.3-205	0	-	594197	594197	0	6	559,180,102,124,88,74	0,7200,13757,18543,34721,36933
+chr1	594307	598551	AL669831.3-206	0	-	594307	594307	0	2	449,1253	0,2991
+chr1	594307	827769	AL669831.3-207	0	-	594307	594307	0	4	449,180,212,100	0,7090,117403,233362
+chr1	594307	827796	AL669831.3-208	0	-	594307	594307	0	5	449,180,102,33,127	0,7090,13647,104619,233362
+chr1	594457	733064	AL669831.3-209	0	-	594457	594457	0	8	299,180,102,33,158,169,191,84	0,6940,13497,104469,117307,125574,137559,138523
+chr1	601435	720200	AL669831.3-210	0	-	601435	601435	0	3	142,102,169	0,6519,118596
+chr1	627376	631150	AL669831.3-211	0	-	627376	627376	0	4	447,263,88,20	0,584,1542,3754
+chr1	629061	629433	MTND1P23-201	0	+	629061	629061	0	1	372	0
+chr1	629639	630683	MTND2P28-201	0	+	629639	629639	0	1	1044	0
+chr1	631073	632616	MTCO1P12-201	0	+	631073	631073	0	1	1543	0
+chr1	632324	632413	MIR6723-201	0	-	632324	632324	0	1	89	0
+chr1	632756	633438	MTCO2P12-201	0	+	632756	632756	0	1	682	0
+chr1	633534	633741	MTATP8P1-201	0	+	633534	633534	0	1	207	0
+chr1	633695	634376	MTATP6P1-201	0	+	633695	633695	0	1	681	0
+chr1	634375	634922	MTCO3P12-201	0	+	634375	634375	0	1	547	0
+chr1	674841	675265	WBP1LP6-201	0	+	674841	674841	0	1	424	0
+chr1	685678	686673	OR4F16-201	0	-	685718	686654	0	1	995	0
+chr1	701935	720150	AL669831.3-212	0	-	701935	701935	0	3	405,105,119	0,1749,18096
+chr1	711866	732212	AL669831.3-213	0	-	711866	711866	0	3	56,169,196	0,8165,20150
+chr1	720023	720206	AL669831.3-214	0	-	720023	720023	0	1	183	0
+chr1	720052	724564	AL669831.3-215	0	-	720052	720052	0	2	148,207	0,4305
+chr1	722091	724903	CICP3-201	0	+	722091	722091	0	2	2269,186	0,2626
+chr1	725884	778626	AL669831.1-201	0	-	725884	725884	0	16	3920,58,191,171,197,98,124,65,157,525,59,66,216,132,110,343	0,7422,9538,17295,18310,18843,20810,30192,33082,38838,40444,42663,43612,47091,48286,52399
+chr1	758232	758336	RNU6-1199P-201	0	-	758232	758232	0	1	104	0
+chr1	760910	761989	AL669831.2-201	0	+	760910	760910	0	2	244,212	0,867
+chr1	764722	774280	AL669831.1-202	0	-	764722	764722	0	5	78,104,59,66,110	0,421,1606,3825,9448
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/testMergeExons.bed	Fri Nov 04 15:37:12 2022 +0000
@@ -0,0 +1,46 @@
+1	11868	14409	DDX11L1	0	+	11868	11868	0	4	359,109,78,1189	0,744,1106,1352
+1	14403	29570	WASH7P	0	-	14403	14403	0	11	98,34,152,159,198,136,137,147,99,154,37	0,601,1392,2203,2454,2829,3202,3511,3864,10334,15130
+1	17368	17436	MIR6859-1	0	-	17368	17368	0	1	68	0
+1	29553	31109	MIR1302-2HG	0	+	29553	29553	0	3	486,401,134	0,713,1422
+1	30365	30503	MIR1302-2	0	+	30365	30365	0	1	138	0
+1	34553	36081	FAM138A	0	-	34553	34553	0	3	621,237,361	0,691,1167
+1	52472	53312	OR4G4P	0	+	52472	52472	0	1	840	0
+1	57597	64116	OR4G11P	0	+	57597	57597	0	3	56,157,1201	0,1102,5318
+1	65418	71585	OR4F5	0	+	65564	70005	0	3	15,54,2549	0,101,3618
+1	89294	133723	AL627309.1	0	-	89294	89294	0	7	2335,150,405,105,212,169,350	0,2796,21658,23405,31426,39760,44079
+1	89550	91105	AL627309.3	0	-	89550	89550	0	2	500,819	0,736
+1	131024	134836	CICP27	0	+	131024	131024	0	1	3812	0
+1	135140	135895	AL627309.6	0	-	135140	135140	0	1	755	0
+1	137681	137965	AL627309.7	0	-	137681	137681	0	1	284	0
+1	139789	140339	AL627309.2	0	-	139789	139789	0	2	58,265	0,285
+1	141473	173862	AL627309.5	0	-	141473	141473	0	10	1538,3322,65,529,59,66,158,216,132,110	0,4912,14293,22789,24410,26626,27136,27575,31083,32279
+1	157783	157887	RNU6-1100P	0	-	157783	157783	0	1	104	0
+1	160445	161525	AL627309.4	0	+	160445	160445	0	2	245,212	0,868
+1	182695	184174	FO538757.1	0	+	182695	182695	0	5	51,85,78,162,194	0,436,798,1044,1285
+1	185216	195411	WASH9P	0	-	185216	185216	0	10	134,69,153,159,202,136,137,146,112,149	0,274,1100,1912,2159,2538,2913,3222,3574,10046
+1	187890	187958	MIR6859-2	0	-	187890	187890	0	1	68	0
+1	257863	359681	AP006222.1	0	-	257863	257863	0	9	1162,85,5190,150,105,158,902,135,337	0,3686,5151,10803,31402,39481,98821,100185,101481
+1	347981	348366	RPL23AP24	0	-	347981	347981	0	1	385	0
+1	358856	366052	AL732372.1	0	+	358856	358856	0	3	101,112,882	0,1200,6314
+1	365388	522928	AL732372.2	0	-	365388	365388	0	22	304,133,200,180,204,582,377,193,517,58,200,342,191,259,971,65,57,491,634,65,111,70	0,630,2862,7755,14380,110975,119443,123972,128993,129603,129888,130875,131720,132658,133010,136167,136798,137076,139081,148970,151767,157470
+1	439869	440232	WBP1LP7	0	+	439869	439869	0	1	363	0
+1	450702	451697	OR4F29	0	-	450742	451678	0	1	995	0
+1	487100	489906	CICP7	0	+	487100	487100	0	2	2287,190	0,2616
+1	491224	493241	AL732372.3	0	-	491224	491224	0	2	765,474	0,1543
+1	516375	516479	RF00026	0	-	516375	516375	0	1	104	0
+1	586070	827796	AL669831.3	0	-	586070	586070	0	22	288,135,337,566,1253,180,102,124,447,263,88,74,86,33,405,105,212,183,207,196,84,127	0,750,2046,8120,11228,15327,21884,26670,41306,41890,42848,45060,46973,112856,115865,117614,125640,133953,138287,145946,146910,241599
+1	587628	594768	AC114498.1	0	+	587628	587628	0	2	101,534	0,6606
+1	629061	629433	MTND1P23	0	+	629061	629061	0	1	372	0
+1	629639	630683	MTND2P28	0	+	629639	629639	0	1	1044	0
+1	631073	632616	MTCO1P12	0	+	631073	631073	0	1	1543	0
+1	632324	632413	MIR6723	0	-	632324	632324	0	1	89	0
+1	632756	633438	MTCO2P12	0	+	632756	632756	0	1	682	0
+1	633534	633741	MTATP8P1	0	+	633534	633534	0	1	207	0
+1	633695	634376	MTATP6P1	0	+	633695	633695	0	1	681	0
+1	634375	634922	MTCO3P12	0	+	634375	634375	0	1	547	0
+1	674841	675265	WBP1LP6	0	+	674841	674841	0	1	424	0
+1	685678	686673	OR4F16	0	-	685718	686654	0	1	995	0
+1	722091	724903	CICP3	0	+	722091	722091	0	2	2269,186	0,2626
+1	725884	778626	AL669831.1	0	-	725884	725884	0	16	3920,58,191,171,197,98,124,65,157,525,59,66,216,132,110,343	0,7422,9538,17295,18310,18843,20810,30192,33082,38838,40444,42663,43612,47091,48286,52399
+1	758232	758336	RNU6-1199P	0	-	758232	758232	0	1	104	0
+1	760910	761989	AL669831.2	0	+	760910	760910	0	2	244,212	0,867
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/testMergeNotUCSC.bed	Fri Nov 04 15:37:12 2022 +0000
@@ -0,0 +1,46 @@
+1	11868	14409	DDX11L1	0	+	11868	11868	0	9	359,48,49,109,85,78,1189,154,218	0,141,310,744,744,1106,1352,1352,1584
+1	14403	29570	WASH7P	0	-	14403	14403	0	11	98,34,152,159,198,136,137,147,99,154,37	0,601,1392,2203,2454,2829,3202,3511,3864,10334,15130
+1	17368	17436	MIR6859-1	0	-	17368	17368	0	1	68	0
+1	29553	31109	MIR1302-2HG	0	+	29553	29553	0	5	486,401,104,122,134	0,713,1010,1422,1422
+1	30365	30503	MIR1302-2	0	+	30365	30365	0	1	138	0
+1	34553	36081	FAM138A	0	-	34553	34553	0	5	621,237,205,361,353	0,691,723,1167,1167
+1	52472	53312	OR4G4P	0	+	52472	52472	0	1	840	0
+1	57597	64116	OR4G11P	0	+	57597	57597	0	4	56,157,1201,939	0,1102,5318,5351
+1	65418	71585	OR4F5	0	+	65564	70005	0	4	15,54,2549,1054	0,101,3618,3636
+1	89294	133723	AL627309.1	0	-	89294	89294	0	17	2335,150,11,405,105,105,105,212,145,158,59,163,119,169,143,350,193	0,2796,2935,21658,23405,23405,23405,31426,31430,31480,31579,39760,39760,39760,39786,44079,44079
+1	89550	91105	AL627309.3	0	-	89550	89550	0	2	500,819	0,736
+1	131024	134836	CICP27	0	+	131024	131024	0	1	3812	0
+1	135140	135895	AL627309.6	0	-	135140	135140	0	1	755	0
+1	137681	137965	AL627309.7	0	-	137681	137681	0	1	284	0
+1	139789	140339	AL627309.2	0	-	139789	139789	0	2	58,265	0,285
+1	141473	173862	AL627309.5	0	-	141473	141473	0	16	1538,204,3322,124,124,190,65,529,59,54,66,66,158,216,132,110	0,1334,4912,4912,4912,5168,14293,22789,24410,24415,26626,26626,27136,27575,31083,32279
+1	157783	157887	RNU6-1100P	0	-	157783	157783	0	1	104	0
+1	160445	161525	AL627309.4	0	+	160445	160445	0	2	245,212	0,868
+1	182695	184174	FO538757.1	0	+	182695	182695	0	5	51,85,78,162,194	0,436,798,1044,1285
+1	185216	195411	WASH9P	0	-	185216	185216	0	10	134,69,153,159,202,136,137,146,112,149	0,274,1100,1912,2159,2538,2913,3222,3574,10046
+1	187890	187958	MIR6859-2	0	-	187890	187890	0	1	68	0
+1	257863	359681	AP006222.1	0	-	257863	257863	0	18	1162,1113,882,502,457,85,5190,130,902,902,150,150,150,105,158,902,135,337	0,49,280,660,704,3686,5151,6740,9439,9439,10803,10803,10803,31402,39481,98821,100185,101481
+1	347981	348366	RPL23AP24	0	-	347981	347981	0	1	385	0
+1	358856	366052	AL732372.1	0	+	358856	358856	0	6	73,86,112,534,340,882	0,15,1200,6314,6314,6314
+1	365388	522928	AL732372.2	0	-	365388	365388	0	82	304,298,78,133,200,180,142,204,102,582,377,183,169,169,143,193,205,435,424,288,128,58,58,58,58,58,200,92,342,151,191,191,166,24,90,60,55,25,259,233,233,233,25,58,58,58,58,58,58,578,58,492,293,319,492,386,65,65,65,65,52,33,17,57,44,134,409,134,197,94,464,239,124,124,65,65,55,65,70,68,82,70	0,6,226,630,2862,7755,7793,14380,14380,110975,119443,119637,119651,119651,119677,123972,128993,129075,129086,129222,129382,129603,129603,129603,129603,129603,129888,129888,130875,131720,131720,131720,131745,131816,131821,131851,131856,131886,132658,132684,132684,132684,132892,133010,133010,133010,133010,133010,133010,133010,133010,133295,133295,133295,133295,133595,136167,136167,136167,136167,136167,136199,136215,136798,137076,137076,137076,137076,137076,137473,139081,139476,139591,139591,148970,148970,148970,148970,151767,151796,151796,157470
+1	439869	440232	WBP1LP7	0	+	439869	439869	0	1	363	0
+1	450702	451697	OR4F29	0	-	450742	451678	0	1	995	0
+1	487100	489906	CICP7	0	+	487100	487100	0	2	2287,190	0,2616
+1	491224	493241	AL732372.3	0	-	491224	491224	0	2	765,474	0,1543
+1	516375	516479	RF00026	0	-	516375	516375	0	1	104	0
+1	586070	827796	AL669831.3	0	-	586070	586070	0	59	288,81,135,135,11,337,566,559,449,449,449,299,128,1253,180,180,180,180,180,180,142,102,102,102,102,102,102,73,124,447,263,88,88,88,74,20,86,33,33,405,105,105,212,212,158,56,183,163,169,169,119,169,148,207,191,196,84,100,127	0,207,750,750,874,2046,8120,8127,8237,8237,8237,8387,8558,11228,15327,15327,15327,15327,15327,15327,15365,21884,21884,21884,21884,21884,21884,26670,26670,41306,41890,42848,42848,42848,45060,45060,46973,112856,112856,115865,117614,117614,125640,125640,125694,125796,133953,133961,133961,133961,133961,133961,133982,138287,145946,145946,146910,241599,241599
+1	587628	594768	AC114498.1	0	+	587628	587628	0	4	73,62,534,340	0,39,6606,6606
+1	629061	629433	MTND1P23	0	+	629061	629061	0	1	372	0
+1	629639	630683	MTND2P28	0	+	629639	629639	0	1	1044	0
+1	631073	632616	MTCO1P12	0	+	631073	631073	0	1	1543	0
+1	632324	632413	MIR6723	0	-	632324	632324	0	1	89	0
+1	632756	633438	MTCO2P12	0	+	632756	632756	0	1	682	0
+1	633534	633741	MTATP8P1	0	+	633534	633534	0	1	207	0
+1	633695	634376	MTATP6P1	0	+	633695	633695	0	1	681	0
+1	634375	634922	MTCO3P12	0	+	634375	634375	0	1	547	0
+1	674841	675265	WBP1LP6	0	+	674841	674841	0	1	424	0
+1	685678	686673	OR4F16	0	-	685718	686654	0	1	995	0
+1	722091	724903	CICP3	0	+	722091	722091	0	2	2269,186	0,2626
+1	725884	778626	AL669831.1	0	-	725884	725884	0	21	3920,58,191,171,197,98,124,65,157,525,78,104,59,59,66,66,216,132,110,110,343	0,7422,9538,17295,18310,18843,20810,30192,33082,38838,38838,39259,40444,40444,42663,42663,43612,47091,48286,48286,52399
+1	758232	758336	RNU6-1199P	0	-	758232	758232	0	1	104	0
+1	760910	761989	AL669831.2	0	+	760910	760910	0	2	244,212	0,867
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/testWithGenes.bed	Fri Nov 04 15:37:12 2022 +0000
@@ -0,0 +1,105 @@
+chr1	11868	14409	DDX11L1	0	+	11868	11868	0	3	359,109,1189	0,744,1352
+chr1	12009	13670	DDX11L1	0	+	12009	12009	0	6	48,49,85,78,154,218	0,169,603,965,1211,1443
+chr1	14403	29570	WASH7P	0	-	14403	14403	0	11	98,34,152,159,198,136,137,147,99,154,37	0,601,1392,2203,2454,2829,3202,3511,3864,10334,15130
+chr1	17368	17436	MIR6859-1	0	-	17368	17368	0	1	68	0
+chr1	29553	31097	MIR1302-2HG	0	+	29553	29553	0	3	486,104,122	0,1010,1422
+chr1	30266	31109	MIR1302-2HG	0	+	30266	30266	0	2	401,134	0,709
+chr1	30365	30503	MIR1302-2	0	+	30365	30365	0	1	138	0
+chr1	34553	36081	FAM138A	0	-	34553	34553	0	3	621,205,361	0,723,1167
+chr1	35244	36073	FAM138A	0	-	35244	35244	0	2	237,353	0,476
+chr1	52472	53312	OR4G4P	0	+	52472	52472	0	1	840	0
+chr1	57597	64116	OR4G11P	0	+	57597	57597	0	3	56,157,1201	0,1102,5318
+chr1	62948	63887	OR4G11P	0	+	62948	62948	0	1	939	0
+chr1	65418	71585	OR4F5	0	+	65564	70005	0	3	15,54,2549	0,101,3618
+chr1	69054	70108	OR4F5	0	+	69090	70005	0	1	1054	0
+chr1	89294	120932	AL627309.1	0	-	89294	89294	0	4	2335,150,105,158	0,2796,23405,31480
+chr1	89550	91105	AL627309.3	0	-	89550	89550	0	2	500,819	0,736
+chr1	92229	129217	AL627309.1	0	-	92229	92229	0	4	11,105,212,163	0,20470,28491,36825
+chr1	110952	129173	AL627309.1	0	-	110952	110952	0	3	405,105,119	0,1747,18102
+chr1	120724	133723	AL627309.1	0	-	120724	120724	0	4	145,59,169,350	0,149,8330,12649
+chr1	129080	133566	AL627309.1	0	-	129080	129080	0	2	143,193	0,4293
+chr1	131024	134836	CICP27	0	+	131024	131024	0	1	3812	0
+chr1	135140	135895	AL627309.6	0	-	135140	135140	0	1	755	0
+chr1	137681	137965	AL627309.7	0	-	137681	137681	0	1	284	0
+chr1	139789	140339	AL627309.2	0	-	139789	139789	0	2	58,265	0,285
+chr1	141473	149707	AL627309.5	0	-	141473	141473	0	2	1538,3322	0,4912
+chr1	142807	146831	AL627309.5	0	-	142807	142807	0	3	204,124,190	0,3578,3834
+chr1	146385	173862	AL627309.5	0	-	146385	146385	0	8	124,65,529,59,66,216,132,110	0,9381,17877,19498,21714,22663,26171,27367
+chr1	157783	157887	RNU6-1100P	0	-	157783	157783	0	1	104	0
+chr1	160445	161525	AL627309.4	0	+	160445	160445	0	2	245,212	0,868
+chr1	165888	168767	AL627309.5	0	-	165888	165888	0	3	54,66,158	0,2211,2721
+chr1	182695	184174	FO538757.1	0	+	182695	182695	0	5	51,85,78,162,194	0,436,798,1044,1285
+chr1	185216	195411	WASH9P	0	-	185216	185216	0	10	134,69,153,159,202,136,137,146,112,149	0,274,1100,1912,2159,2538,2913,3222,3574,10046
+chr1	187890	187958	MIR6859-2	0	-	187890	187890	0	1	68	0
+chr1	257863	264733	AP006222.1	0	-	257863	257863	0	2	1162,130	0,6740
+chr1	257912	268816	AP006222.1	0	-	257912	257912	0	4	1113,85,902,150	0,3637,9390,10754
+chr1	258143	359681	AP006222.1	0	-	258143	258143	0	4	882,902,135,337	0,98541,99905,101201
+chr1	258523	268816	AP006222.1	0	-	258523	258523	0	3	502,902,150	0,8779,10143
+chr1	258567	259024	AP006222.1	0	-	258567	258567	0	1	457	0
+chr1	263014	297502	AP006222.1	0	-	263014	263014	0	4	5190,150,105,158	0,5652,26251,34330
+chr1	347981	348366	RPL23AP24	0	-	347981	347981	0	1	385	0
+chr1	358856	365704	AL732372.1	0	+	358856	358856	0	2	73,534	0,6314
+chr1	358871	365510	AL732372.1	0	+	358871	358871	0	2	86,340	0,6299
+chr1	360056	366052	AL732372.1	0	+	360056	360056	0	2	112,882	0,5114
+chr1	365388	366151	AL732372.2	0	-	365388	365388	0	2	304,133	0,630
+chr1	365394	368450	AL732372.2	0	-	365394	365394	0	2	298,200	0,2856
+chr1	365614	379972	AL732372.2	0	-	365614	365614	0	3	78,180,204	0,7529,14154
+chr1	373181	485208	AL732372.2	0	-	373181	373181	0	3	142,102,169	0,6587,111858
+chr1	439869	440232	WBP1LP7	0	+	439869	439869	0	1	363	0
+chr1	450702	451697	OR4F29	0	-	450742	451678	0	1	995	0
+chr1	476363	497259	AL732372.2	0	-	476363	476363	0	3	582,169,151	0,8676,20745
+chr1	484831	495476	AL732372.2	0	-	484831	484831	0	3	377,58,200	0,10160,10445
+chr1	485025	485208	AL732372.2	0	-	485025	485025	0	1	183	0
+chr1	485065	489553	AL732372.2	0	-	485065	485065	0	2	143,193	0,4295
+chr1	487100	489906	CICP7	0	+	487100	487100	0	2	2287,190	0,2616
+chr1	491224	493241	AL732372.3	0	-	491224	491224	0	2	765,474	0,1543
+chr1	494381	496605	AL732372.2	0	-	494381	494381	0	2	205,342	0,1882
+chr1	494463	502508	AL732372.2	0	-	494463	494463	0	5	435,58,191,65,44	0,528,2645,7092,8001
+chr1	494474	495368	AL732372.2	0	-	494474	494474	0	3	424,58,92	0,517,802
+chr1	494610	499175	AL732372.2	0	-	494610	494610	0	3	288,58,492	0,381,4073
+chr1	494770	498976	AL732372.2	0	-	494770	494770	0	5	128,58,191,58,293	0,221,2338,3628,3913
+chr1	497133	498456	AL732372.2	0	-	497133	497133	0	3	166,233,58	0,939,1265
+chr1	497204	502598	AL732372.2	0	-	497204	497204	0	6	24,233,58,65,57,134	0,868,1194,4351,4982,5260
+chr1	497209	502873	AL732372.2	0	-	497209	497209	0	4	90,58,65,409	0,1189,4346,5255
+chr1	497239	499002	AL732372.2	0	-	497239	497239	0	4	60,259,58,319	0,807,1159,1444
+chr1	497244	502598	AL732372.2	0	-	497244	497244	0	5	55,233,58,65,134	0,828,1154,4311,5220
+chr1	497274	498976	AL732372.2	0	-	497274	497274	0	2	25,578	0,1124
+chr1	498280	499175	AL732372.2	0	-	498280	498280	0	3	25,58,492	0,118,403
+chr1	498983	501607	AL732372.2	0	-	498983	498983	0	2	386,52	0,2572
+chr1	501587	517252	AL732372.2	0	-	501587	501587	0	5	33,94,124,65,68	0,1274,3392,12771,15597
+chr1	501603	517225	AL732372.2	0	-	501603	501603	0	5	17,197,124,65,70	0,861,3376,12755,15552
+chr1	504469	514413	AL732372.2	0	-	504469	504469	0	2	464,55	0,9889
+chr1	504864	522928	AL732372.2	0	-	504864	504864	0	4	239,65,82,70	0,9494,12320,17994
+chr1	516375	516479	RF00026	0	-	516375	516375	0	1	104	0
+chr1	586070	612813	AL669831.3	0	-	586070	586070	0	6	288,135,128,180,102,73	0,750,8558,15327,21884,26670
+chr1	586277	588453	AL669831.3	0	-	586277	586277	0	3	81,135,337	0,543,1839
+chr1	586944	720194	AL669831.3	0	-	586944	586944	0	4	11,105,212,163	0,116740,124766,133087
+chr1	587628	594768	AC114498.1	0	+	587628	587628	0	2	73,534	0,6606
+chr1	587667	594574	AC114498.1	0	+	587667	587667	0	2	62,340	0,6567
+chr1	594190	633129	AL669831.3	0	-	594190	594190	0	5	566,180,102,88,86	0,7207,13764,34728,38853
+chr1	594197	631204	AL669831.3	0	-	594197	594197	0	6	559,180,102,124,88,74	0,7200,13757,18543,34721,36933
+chr1	594307	598551	AL669831.3	0	-	594307	594307	0	2	449,1253	0,2991
+chr1	594307	827769	AL669831.3	0	-	594307	594307	0	4	449,180,212,100	0,7090,117403,233362
+chr1	594307	827796	AL669831.3	0	-	594307	594307	0	5	449,180,102,33,127	0,7090,13647,104619,233362
+chr1	594457	733064	AL669831.3	0	-	594457	594457	0	8	299,180,102,33,158,169,191,84	0,6940,13497,104469,117307,125574,137559,138523
+chr1	601435	720200	AL669831.3	0	-	601435	601435	0	3	142,102,169	0,6519,118596
+chr1	627376	631150	AL669831.3	0	-	627376	627376	0	4	447,263,88,20	0,584,1542,3754
+chr1	629061	629433	MTND1P23	0	+	629061	629061	0	1	372	0
+chr1	629639	630683	MTND2P28	0	+	629639	629639	0	1	1044	0
+chr1	631073	632616	MTCO1P12	0	+	631073	631073	0	1	1543	0
+chr1	632324	632413	MIR6723	0	-	632324	632324	0	1	89	0
+chr1	632756	633438	MTCO2P12	0	+	632756	632756	0	1	682	0
+chr1	633534	633741	MTATP8P1	0	+	633534	633534	0	1	207	0
+chr1	633695	634376	MTATP6P1	0	+	633695	633695	0	1	681	0
+chr1	634375	634922	MTCO3P12	0	+	634375	634375	0	1	547	0
+chr1	674841	675265	WBP1LP6	0	+	674841	674841	0	1	424	0
+chr1	685678	686673	OR4F16	0	-	685718	686654	0	1	995	0
+chr1	701935	720150	AL669831.3	0	-	701935	701935	0	3	405,105,119	0,1749,18096
+chr1	711866	732212	AL669831.3	0	-	711866	711866	0	3	56,169,196	0,8165,20150
+chr1	720023	720206	AL669831.3	0	-	720023	720023	0	1	183	0
+chr1	720052	724564	AL669831.3	0	-	720052	720052	0	2	148,207	0,4305
+chr1	722091	724903	CICP3	0	+	722091	722091	0	2	2269,186	0,2626
+chr1	725884	778626	AL669831.1	0	-	725884	725884	0	16	3920,58,191,171,197,98,124,65,157,525,59,66,216,132,110,343	0,7422,9538,17295,18310,18843,20810,30192,33082,38838,40444,42663,43612,47091,48286,52399
+chr1	758232	758336	RNU6-1199P	0	-	758232	758232	0	1	104	0
+chr1	760910	761989	AL669831.2	0	+	760910	760910	0	2	244,212	0,867
+chr1	764722	774280	AL669831.1	0	-	764722	764722	0	5	78,104,59,66,110	0,421,1606,3825,9448