Mercurial > repos > petr-novak > various_galaxy_tools
annotate gff_to_bed_converter.py @ 0:696e702ebf74 draft
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
author | petr-novak |
---|---|
date | Mon, 09 May 2022 08:26:30 +0000 |
parents | |
children |
rev | line source |
---|---|
0
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
1 #!/usr/bin/env python |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
2 from __future__ import print_function |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
3 |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
4 import sys |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
5 |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
6 from galaxy.datatypes.util.gff_util import parse_gff_attributes |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
7 |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
8 |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
9 def get_bed_line(chrom, name, strand, blocks): |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
10 """Returns a BED line for given data.""" |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
11 |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
12 if len(blocks) == 1: |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
13 # Use simple BED format if there is only a single block: |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
14 # chrom, chromStart, chromEnd, name, score, strand |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
15 # |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
16 start, end = blocks[0] |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
17 return "%s\t%i\t%i\t%s\t0\t%s\n" % (chrom, start, end, name, strand) |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
18 |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
19 # |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
20 # Build lists for transcript blocks' starts, sizes. |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
21 # |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
22 |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
23 # Get transcript start, end. |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
24 t_start = sys.maxsize |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
25 t_end = -1 |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
26 for block_start, block_end in blocks: |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
27 if block_start < t_start: |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
28 t_start = block_start |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
29 if block_end > t_end: |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
30 t_end = block_end |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
31 |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
32 # Get block starts, sizes. |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
33 block_starts = [] |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
34 block_sizes = [] |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
35 for block_start, block_end in blocks: |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
36 block_starts.append(str(block_start - t_start)) |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
37 block_sizes.append(str(block_end - block_start)) |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
38 |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
39 # |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
40 # Create BED entry. |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
41 # Bed format: chrom, chromStart, chromEnd, name, score, strand, \ |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
42 # thickStart, thickEnd, itemRgb, blockCount, blockSizes, blockStarts |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
43 # |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
44 # Render complete feature with thick blocks. There's no clear way to do this unless |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
45 # we analyze the block names, but making everything thick makes more sense than |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
46 # making everything thin. |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
47 # |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
48 return "%s\t%i\t%i\t%s\t0\t%s\t%i\t%i\t0\t%i\t%s\t%s\n" % ( |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
49 chrom, |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
50 t_start, |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
51 t_end, |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
52 name, |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
53 strand, |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
54 t_start, |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
55 t_end, |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
56 len(block_starts), |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
57 ",".join(block_sizes), |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
58 ",".join(block_starts), |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
59 ) |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
60 |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
61 |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
62 def __main__(): |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
63 input_name = sys.argv[1] |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
64 output_name = sys.argv[2] |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
65 skipped_lines = 0 |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
66 first_skipped_line = 0 |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
67 i = 0 |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
68 cur_transcript_chrome = None |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
69 cur_transcript_id = None |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
70 cur_transcript_strand = None |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
71 cur_transcripts_blocks = [] # (start, end) for each block. |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
72 with open(output_name, "w") as out, open(input_name) as in_fh: |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
73 for i, line in enumerate(in_fh): |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
74 line = line.rstrip("\r\n") |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
75 if line and not line.startswith("#"): |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
76 try: |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
77 # GFF format: chrom source, name, chromStart, chromEnd, score, strand, attributes |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
78 elems = line.split("\t") |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
79 start = str(int(elems[3]) - 1) |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
80 coords = [int(start), int(elems[4])] |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
81 strand = elems[6] |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
82 if strand not in ["+", "-"]: |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
83 strand = "+" |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
84 attributes = parse_gff_attributes(elems[8]) |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
85 t_id = attributes.get("transcript_id", None) |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
86 |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
87 if not t_id: |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
88 # |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
89 # No transcript ID, so write last transcript and write current line as its own line. |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
90 # |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
91 |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
92 # Write previous transcript. |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
93 if cur_transcript_id: |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
94 # Write BED entry. |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
95 out.write( |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
96 get_bed_line( |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
97 cur_transcript_chrome, |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
98 cur_transcript_id, |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
99 cur_transcript_strand, |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
100 cur_transcripts_blocks, |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
101 ) |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
102 ) |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
103 |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
104 # Replace any spaces in the name with underscores so UCSC will not complain. |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
105 name = elems[2].replace(" ", "_") |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
106 out.write(get_bed_line(elems[0], name, strand, [coords])) |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
107 continue |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
108 |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
109 # There is a transcript ID, so process line at transcript level. |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
110 if t_id == cur_transcript_id: |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
111 # Line is element of transcript and will be a block in the BED entry. |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
112 cur_transcripts_blocks.append(coords) |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
113 continue |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
114 |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
115 # |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
116 # Line is part of new transcript; write previous transcript and start |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
117 # new transcript. |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
118 # |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
119 |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
120 # Write previous transcript. |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
121 if cur_transcript_id: |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
122 # Write BED entry. |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
123 out.write( |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
124 get_bed_line( |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
125 cur_transcript_chrome, cur_transcript_id, cur_transcript_strand, cur_transcripts_blocks |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
126 ) |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
127 ) |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
128 |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
129 # Start new transcript. |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
130 cur_transcript_chrome = elems[0] |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
131 cur_transcript_id = t_id |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
132 cur_transcript_strand = strand |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
133 cur_transcripts_blocks = [] |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
134 cur_transcripts_blocks.append(coords) |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
135 except Exception: |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
136 skipped_lines += 1 |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
137 if not first_skipped_line: |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
138 first_skipped_line = i + 1 |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
139 else: |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
140 skipped_lines += 1 |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
141 if not first_skipped_line: |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
142 first_skipped_line = i + 1 |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
143 |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
144 # Write last transcript. |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
145 if cur_transcript_id: |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
146 # Write BED entry. |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
147 out.write( |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
148 get_bed_line(cur_transcript_chrome, cur_transcript_id, cur_transcript_strand, cur_transcripts_blocks) |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
149 ) |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
150 info_msg = "%i lines converted to BED. " % (i + 1 - skipped_lines) |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
151 if skipped_lines > 0: |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
152 info_msg += "Skipped %d blank/comment/invalid lines starting with line #%d." % ( |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
153 skipped_lines, |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
154 first_skipped_line, |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
155 ) |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
156 print(info_msg) |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
157 |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
158 |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
159 if __name__ == "__main__": |
696e702ebf74
"planemo upload commit 0f6eca49bafc3c946189d793161a7f81d595e1a1-dirty"
petr-novak
parents:
diff
changeset
|
160 __main__() |