Mercurial > repos > lldelisle > fromgtftobed12
annotate fromgtfTobed12.py @ 1:6fd4b3b90220 draft default tip
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 15b8c2cc83708044413a152322bcbfca8a74d29a
author | lldelisle |
---|---|
date | Fri, 03 Nov 2023 14:13:51 +0000 |
parents | 418e4d0fe0bd |
children |
rev | line source |
---|---|
0
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
1 import argparse |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
2 import sys |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
3 import warnings |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
4 |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
5 import gffutils |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
6 |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
7 warnings.filterwarnings("ignore", message="It appears you have a gene feature" |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
8 " in your GTF file. You may want to use the " |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
9 "`disable_infer_genes` option to speed up database " |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
10 "creation") |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
11 warnings.filterwarnings("ignore", message="It appears you have a transcript " |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
12 "feature in your GTF file. You may want to use the " |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
13 "`disable_infer_transcripts` option to speed up " |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
14 "database creation") |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
15 # In gffutils v0.10 they changed the error message: |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
16 warnings.filterwarnings("ignore", message="It appears you have a gene feature" |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
17 " in your GTF file. You may want to use the " |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
18 "`disable_infer_genes=True` option to speed up " |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
19 "database creation") |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
20 warnings.filterwarnings("ignore", message="It appears you have a transcript " |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
21 "feature in your GTF file. You may want to use the " |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
22 "`disable_infer_transcripts=True` option to speed up " |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
23 "database creation") |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
24 |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
25 |
1
6fd4b3b90220
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 15b8c2cc83708044413a152322bcbfca8a74d29a
lldelisle
parents:
0
diff
changeset
|
26 def convert_gtf_to_bed(fn, fo, preferedName, mergeTranscripts, |
0
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
27 mergeTranscriptsAndOverlappingExons, ucsc): |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
28 db = gffutils.create_db(fn, ':memory:') |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
29 # For each transcript: |
1
6fd4b3b90220
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 15b8c2cc83708044413a152322bcbfca8a74d29a
lldelisle
parents:
0
diff
changeset
|
30 if preferedName is not None: |
6fd4b3b90220
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 15b8c2cc83708044413a152322bcbfca8a74d29a
lldelisle
parents:
0
diff
changeset
|
31 prefered_name = preferedName |
6fd4b3b90220
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 15b8c2cc83708044413a152322bcbfca8a74d29a
lldelisle
parents:
0
diff
changeset
|
32 elif mergeTranscripts or mergeTranscriptsAndOverlappingExons: |
0
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
33 prefered_name = "gene_name" |
1
6fd4b3b90220
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 15b8c2cc83708044413a152322bcbfca8a74d29a
lldelisle
parents:
0
diff
changeset
|
34 else: |
6fd4b3b90220
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 15b8c2cc83708044413a152322bcbfca8a74d29a
lldelisle
parents:
0
diff
changeset
|
35 prefered_name = "transcript_name" |
0
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
36 if mergeTranscripts or mergeTranscriptsAndOverlappingExons: |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
37 all_items = db.features_of_type("gene", order_by='start') |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
38 else: |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
39 all_items = db.features_of_type("transcript", order_by='start') |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
40 for tr in all_items: |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
41 # The name would be the name of the transcript/gene if exists |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
42 try: |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
43 # First try to have it directly on the feature |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
44 trName = tr.attributes[prefered_name][0] |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
45 except KeyError: |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
46 # Else try to guess the name of the transcript/gene from exons: |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
47 try: |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
48 trName = set([e.attributes[prefered_name][0] |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
49 for e in |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
50 db.children(tr, |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
51 featuretype='exon', |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
52 order_by='start')]).pop() |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
53 except KeyError: |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
54 # Else take the transcript id |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
55 trName = tr.id |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
56 # If the cds is defined in the gtf, |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
57 # use it to define the thick start and end |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
58 # The gtf is 1-based closed intervalls and |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
59 # bed are 0-based half-open so: |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
60 # I need to remove one from each start |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
61 try: |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
62 # In case of multiple CDS (when there is one entry per gene) |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
63 # I use the first one to get the start |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
64 # and the last one to get the end (order_by=-start) |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
65 cds_start = next(db.children(tr, |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
66 featuretype='CDS', |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
67 order_by='start')).start - 1 |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
68 cds_end = next(db.children(tr, |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
69 featuretype='CDS', |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
70 order_by='-start')).end |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
71 except StopIteration: |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
72 # If the CDS is not defined, then it is set to the start |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
73 # as proposed here: |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
74 # https://genome.ucsc.edu/FAQ/FAQformat.html#format1 |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
75 cds_start = tr.start - 1 |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
76 cds_end = tr.start - 1 |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
77 # Get all exons starts and lengths |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
78 if mergeTranscriptsAndOverlappingExons: |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
79 # We merge overlapping exons: |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
80 exons_starts = [] |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
81 exons_length = [] |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
82 current_start = -1 |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
83 current_end = None |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
84 for e in db.children(tr, featuretype='exon', order_by='start'): |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
85 if current_start == -1: |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
86 current_start = e.start - 1 |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
87 current_end = e.end |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
88 else: |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
89 if e.start > current_end: |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
90 # This is a non-overlapping exon |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
91 # We store the previous exon: |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
92 exons_starts.append(current_start) |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
93 exons_length.append(current_end - current_start) |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
94 # We set the current: |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
95 current_start = e.start - 1 |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
96 current_end = e.end |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
97 else: |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
98 # This is an overlapping exon |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
99 # We update current_end if necessary |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
100 current_end = max(current_end, e.end) |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
101 if current_start != -1: |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
102 # There is a last exon to store: |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
103 exons_starts.append(current_start) |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
104 exons_length.append(current_end - current_start) |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
105 else: |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
106 exons_starts = [e.start - 1 |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
107 for e in |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
108 db.children(tr, featuretype='exon', |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
109 order_by='start')] |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
110 exons_length = [len(e) |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
111 for e in |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
112 db.children(tr, featuretype='exon', |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
113 order_by='start')] |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
114 # Rewrite the chromosome name if needed: |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
115 chrom = tr.chrom |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
116 if ucsc and chrom[0:3] != 'chr': |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
117 chrom = 'chr' + chrom |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
118 fo.write("%s\t%d\t%d\t%s\t%d\t%s\t%d\t%d\t%s\t%d\t%s\t%s\n" % |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
119 (chrom, tr.start - 1, tr.end, trName, 0, tr.strand, |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
120 cds_start, cds_end, "0", len(exons_starts), |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
121 ",".join([str(ex_l) for ex_l in exons_length]), |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
122 ",".join([str(s - (tr.start - 1)) for s in exons_starts]))) |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
123 |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
124 |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
125 argp = argparse.ArgumentParser( |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
126 description=("Convert a gtf to a bed12 with one entry" |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
127 " per transcript/gene")) |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
128 argp.add_argument('input', default=None, |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
129 help="Input gtf file (can be gzip).") |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
130 argp.add_argument('--output', default=sys.stdout, |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
131 type=argparse.FileType('w'), |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
132 help="Output bed12 file.") |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
133 argp.add_argument('--ucscformat', action="store_true", |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
134 help="If you want that all chromosome names " |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
135 "begin with 'chr'.") |
1
6fd4b3b90220
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 15b8c2cc83708044413a152322bcbfca8a74d29a
lldelisle
parents:
0
diff
changeset
|
136 argp.add_argument('--preferedName', default=None, |
6fd4b3b90220
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 15b8c2cc83708044413a152322bcbfca8a74d29a
lldelisle
parents:
0
diff
changeset
|
137 help="Name to use for bed output.") |
0
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
138 group = argp.add_mutually_exclusive_group() |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
139 group.add_argument('--mergeTranscripts', action="store_true", |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
140 help="Merge all transcripts into a single " |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
141 "entry to have one line per gene.") |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
142 group.add_argument('--mergeTranscriptsAndOverlappingExons', |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
143 action="store_true", |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
144 help="Merge all transcripts into a single " |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
145 "entry to have one line per gene and merge" |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
146 " overlapping exons.") |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
147 |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
148 args = argp.parse_args() |
1
6fd4b3b90220
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 15b8c2cc83708044413a152322bcbfca8a74d29a
lldelisle
parents:
0
diff
changeset
|
149 convert_gtf_to_bed(args.input, args.output, args.preferedName, |
0
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
150 args.mergeTranscripts, |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
151 args.mergeTranscriptsAndOverlappingExons, |
418e4d0fe0bd
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 1aaffda5b95e0389e315179345642c0d005867c1
lldelisle
parents:
diff
changeset
|
152 args.ucscformat) |