annotate gff_to_bed_converter.py @ 0:696e702ebf74 draft

"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
author petr-novak
date Mon, 09 May 2022 08:26:30 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
1 #!/usr/bin/env python
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
2 from __future__ import print_function
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
3
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
4 import sys
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
5
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
6 from galaxy.datatypes.util.gff_util import parse_gff_attributes
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
7
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
8
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
9 def get_bed_line(chrom, name, strand, blocks):
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
10 """Returns a BED line for given data."""
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
11
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
12 if len(blocks) == 1:
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
13 # Use simple BED format if there is only a single block:
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
14 # chrom, chromStart, chromEnd, name, score, strand
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
15 #
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
16 start, end = blocks[0]
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
17 return "%s\t%i\t%i\t%s\t0\t%s\n" % (chrom, start, end, name, strand)
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
18
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
19 #
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
20 # Build lists for transcript blocks' starts, sizes.
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
21 #
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
22
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
23 # Get transcript start, end.
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
24 t_start = sys.maxsize
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
25 t_end = -1
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
26 for block_start, block_end in blocks:
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
27 if block_start < t_start:
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
28 t_start = block_start
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
29 if block_end > t_end:
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
30 t_end = block_end
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
31
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
32 # Get block starts, sizes.
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
33 block_starts = []
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
34 block_sizes = []
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
35 for block_start, block_end in blocks:
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
36 block_starts.append(str(block_start - t_start))
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
37 block_sizes.append(str(block_end - block_start))
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
38
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
39 #
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
40 # Create BED entry.
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
41 # Bed format: chrom, chromStart, chromEnd, name, score, strand, \
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
42 # thickStart, thickEnd, itemRgb, blockCount, blockSizes, blockStarts
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
43 #
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
44 # Render complete feature with thick blocks. There's no clear way to do this unless
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
45 # we analyze the block names, but making everything thick makes more sense than
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
46 # making everything thin.
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
47 #
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
48 return "%s\t%i\t%i\t%s\t0\t%s\t%i\t%i\t0\t%i\t%s\t%s\n" % (
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
49 chrom,
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
50 t_start,
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
51 t_end,
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
52 name,
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
53 strand,
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
54 t_start,
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
55 t_end,
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
56 len(block_starts),
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
57 ",".join(block_sizes),
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
58 ",".join(block_starts),
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
59 )
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
60
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
61
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
62 def __main__():
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
63 input_name = sys.argv[1]
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
64 output_name = sys.argv[2]
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
65 skipped_lines = 0
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
66 first_skipped_line = 0
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
67 i = 0
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
68 cur_transcript_chrome = None
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
69 cur_transcript_id = None
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
70 cur_transcript_strand = None
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
71 cur_transcripts_blocks = [] # (start, end) for each block.
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
72 with open(output_name, "w") as out, open(input_name) as in_fh:
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
73 for i, line in enumerate(in_fh):
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
74 line = line.rstrip("\r\n")
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
75 if line and not line.startswith("#"):
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
76 try:
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
77 # GFF format: chrom source, name, chromStart, chromEnd, score, strand, attributes
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
78 elems = line.split("\t")
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
79 start = str(int(elems[3]) - 1)
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
80 coords = [int(start), int(elems[4])]
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
81 strand = elems[6]
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
82 if strand not in ["+", "-"]:
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
83 strand = "+"
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
84 attributes = parse_gff_attributes(elems[8])
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
85 t_id = attributes.get("transcript_id", None)
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
86
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
87 if not t_id:
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
88 #
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
89 # No transcript ID, so write last transcript and write current line as its own line.
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
90 #
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
91
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
92 # Write previous transcript.
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
93 if cur_transcript_id:
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
94 # Write BED entry.
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
95 out.write(
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
96 get_bed_line(
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
97 cur_transcript_chrome,
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
98 cur_transcript_id,
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
99 cur_transcript_strand,
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
100 cur_transcripts_blocks,
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
101 )
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
102 )
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
103
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
104 # Replace any spaces in the name with underscores so UCSC will not complain.
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
105 name = elems[2].replace(" ", "_")
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
106 out.write(get_bed_line(elems[0], name, strand, [coords]))
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
107 continue
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
108
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
109 # There is a transcript ID, so process line at transcript level.
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
110 if t_id == cur_transcript_id:
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
111 # Line is element of transcript and will be a block in the BED entry.
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
112 cur_transcripts_blocks.append(coords)
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
113 continue
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
114
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
115 #
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
116 # Line is part of new transcript; write previous transcript and start
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
117 # new transcript.
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
118 #
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
119
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
120 # Write previous transcript.
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
121 if cur_transcript_id:
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
122 # Write BED entry.
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
123 out.write(
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
124 get_bed_line(
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
125 cur_transcript_chrome, cur_transcript_id, cur_transcript_strand, cur_transcripts_blocks
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
126 )
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
127 )
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
128
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
129 # Start new transcript.
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
130 cur_transcript_chrome = elems[0]
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
131 cur_transcript_id = t_id
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
132 cur_transcript_strand = strand
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
133 cur_transcripts_blocks = []
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
134 cur_transcripts_blocks.append(coords)
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
135 except Exception:
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
136 skipped_lines += 1
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
137 if not first_skipped_line:
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
138 first_skipped_line = i + 1
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
139 else:
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
140 skipped_lines += 1
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
141 if not first_skipped_line:
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
142 first_skipped_line = i + 1
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
143
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
144 # Write last transcript.
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
145 if cur_transcript_id:
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
146 # Write BED entry.
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
147 out.write(
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
148 get_bed_line(cur_transcript_chrome, cur_transcript_id, cur_transcript_strand, cur_transcripts_blocks)
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
149 )
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
150 info_msg = "%i lines converted to BED. " % (i + 1 - skipped_lines)
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
151 if skipped_lines > 0:
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
152 info_msg += "Skipped %d blank/comment/invalid lines starting with line #%d." % (
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
153 skipped_lines,
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
154 first_skipped_line,
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
155 )
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
156 print(info_msg)
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
157
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
158
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
159 if __name__ == "__main__":
696e702ebf74 "planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff changeset
160 __main__()