Mercurial > repos > vipints > fml_mergeloci
annotate fml_gff_groomer/scripts/gff_loci_merge.py @ 0:79726c328621 default tip
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
author | vipints |
---|---|
date | Tue, 07 Jun 2011 17:29:24 -0400 |
parents | |
children |
rev | line source |
---|---|
0
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
1 #!/usr/bin/env python |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
2 # |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
3 # This program is free software; you can redistribute it and/or modify |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
4 # it under the terms of the GNU General Public License as published by |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
5 # the Free Software Foundation; either version 3 of the License, or |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
6 # (at your option) any later version. |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
7 # |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
8 # Written (W) 2010 Vipin T Sreedharan, Friedrich Miescher Laboratory of the Max Planck Society |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
9 # Copyright (C) 2010 Friedrich Miescher Laboratory of the Max Planck Society |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
10 # |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
11 # Description : to merge same transcripts in single loci and define as an alternative spliced form for the gene. |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
12 |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
13 def display_content(final_dict): |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
14 """displaying the summary from GFF file""" |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
15 |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
16 print "\tUnique combination of Source(s), Feature type(s) and corresponding count:" |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
17 for sftype, cnt in sorted(final_dict['gff_source_type'].items()): |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
18 if sftype[1] == 'gene':print '\t' + str(cnt) + '\t' + str(sftype[0]) + ', '+ str(sftype[1]) |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
19 |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
20 def available_limits(gff_file): |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
21 """Figure out the available feature types from the given GFF file""" |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
22 |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
23 gff_handle = open(gff_file, 'rU') |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
24 filter_info = dict(gff_id = [0], gff_source_type = [1, 2], |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
25 gff_source = [1], gff_type = [2]) |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
26 cur_limits = dict() |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
27 for filter_key in filter_info.keys(): |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
28 cur_limits[filter_key] = collections.defaultdict(int) |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
29 for line in gff_handle: |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
30 if line.strip('\n\r')[0] != "#": |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
31 parts = [p.strip() for p in line.split('\t')] |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
32 if len(parts) == 1 and re.search(r'\w+', parts[0]):continue ## GFF files with FASTA sequence together |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
33 assert len(parts) == 9, line |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
34 for filter_key, cur_indexes in filter_info.items(): |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
35 cur_id = tuple([parts[i] for i in cur_indexes]) |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
36 cur_limits[filter_key][cur_id] += 1 |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
37 # get rid of the default dicts |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
38 gff_handle.close() |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
39 final_dict = dict() |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
40 for key, value_dict in cur_limits.items(): |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
41 if len(key) == 1:key = key[0] |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
42 final_dict[key] = dict(value_dict) |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
43 return final_dict |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
44 |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
45 def GFFWriter(merged_info, genes, transcripts, exons, utr5, cds, utr3, out_file): |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
46 """Write GFF3 file with merged feature description""" |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
47 |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
48 out_fh = open(out_file, 'w') |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
49 for ginfo, regions in merged_info.items(): |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
50 gene_cnt = 1 |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
51 for interval, features in sorted(regions.items()):# master gene feature |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
52 out_fh.write(ginfo[0] + '\t' + ginfo[1] + '\tgene\t' + str(interval[0]) + '\t' + str(interval[1]) + '\t.\t' + ginfo[2] + '\t.\tID=Gene_' + ginfo[0] + '_' + str(gene_cnt).zfill(5) + ';Name=Gene_' + ginfo[0] + '_' + str(gene_cnt).zfill(5) + '\n') |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
53 for geneid in features:# corresponding transcript info |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
54 if geneid in transcripts: |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
55 for tinfo in transcripts[geneid]:# transcript feature line |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
56 out_fh.write(ginfo[0] + '\t' + ginfo[1] + '\t' + tinfo['type'] + '\t' + str(tinfo['start']) + '\t' + str(tinfo['stop']) + '\t.\t' + ginfo[2] + '\t.\tID=' + tinfo['ID']+ ';Parent=Gene_' + ginfo[0] + '_' + str(gene_cnt).zfill(5) + '\n') |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
57 if tinfo['ID'] in utr5:# check for 5 prime UTR |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
58 for u5info in utr5[tinfo['ID']]:out_fh.write(ginfo[0] + '\t' + ginfo[1] + '\tfive_prime_UTR\t' + str(u5info['start']) + '\t' + str(u5info['stop']) + '\t.\t' + ginfo[2] + '\t.\tParent=' + tinfo['ID'] + '\n') |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
59 if tinfo['ID'] in cds:# check for CDS |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
60 for cdsinfo in cds[tinfo['ID']]:out_fh.write(ginfo[0] + '\t' + ginfo[1] + '\tCDS\t' + str(cdsinfo['start']) + '\t' + str(cdsinfo['stop']) + '\t.\t' + ginfo[2] + '\t.\tParent=' + tinfo['ID'] + '\n') |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
61 if tinfo['ID'] in utr3:# check for 3 prime UTR |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
62 for u3info in utr3[tinfo['ID']]:out_fh.write(ginfo[0] + '\t' + ginfo[1] + '\tthree_prime_UTR\t' + str(u3info['start']) + '\t' + str(u3info['stop']) + '\t.\t' + ginfo[2] + '\t.\tParent=' + tinfo['ID'] + '\n') |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
63 if tinfo['ID'] in exons:# check for exons |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
64 for exinfo in exons[tinfo['ID']]:out_fh.write(ginfo[0] + '\t' + ginfo[1] + '\texon\t' + str(exinfo['start']) + '\t' + str(exinfo['stop']) + '\t.\t' + ginfo[2] + '\t.\tParent=' + tinfo['ID'] + '\n') |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
65 gene_cnt += 1 |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
66 out_fh.close() |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
67 |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
68 def UniqLoci(genes, transcripts, exons): |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
69 """determine unique location where features annotated multiple times""" |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
70 |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
71 uniq_loci = dict() |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
72 for gid, parts in genes.items(): |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
73 gene_info = (parts['chr'], parts['source'], parts['strand']) |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
74 if gene_info in uniq_loci:## same contig, orientation, source: look for merging transcripts based on the nearby location |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
75 if (int(parts['start']), int(parts['stop'])) in uniq_loci[gene_info].keys(): ## similar transcripts will catch here (start and stop are same may be exon, CDS or intron content may vary) |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
76 uniq_loci[gene_info][(int(parts['start']), int(parts['stop']))].append(gid) |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
77 else: # heuristic approach to include closely related region on a single master loci. |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
78 got_a_range = 0 |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
79 for floc in uniq_loci[gene_info].keys():# look whether it lies closely to any intervel which is already defined |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
80 if (floc[1]-parts['start']) < 150 or (parts['stop']-floc[0]) < 150:continue ## TODO boundary spanning length in same orientation for genes of each species will be great. |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
81 if floc[0] <= parts['start'] and parts['start'] < floc[1]: # the start of the new candidate is inside of any of the already defined interval ? |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
82 non_coding = 0 |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
83 try: # check for small transcript whether they belong to a existing one or a new non-coding candidate. |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
84 if len(transcripts[gid]) == 1: |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
85 if len(exons[transcripts[gid][0]['ID']]) == 1:non_coding = 1 |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
86 if non_coding == 0: |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
87 if parts['stop'] > floc[1]:# making global gene coordinate from individual transcript model |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
88 entries = uniq_loci[gene_info][floc] |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
89 del uniq_loci[gene_info][floc] # remove the existing interval, here we got a longer downstream position from the candidate |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
90 entries.append(gid) |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
91 uniq_loci[gene_info][(floc[0], parts['stop'])] = entries |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
92 else: |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
93 uniq_loci[gene_info][floc].append(gid) |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
94 else:# create a new interval for non-coding type entry |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
95 uniq_loci[gene_info][(parts['start'], parts['stop'])] = [gid] |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
96 got_a_range = 1 |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
97 break |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
98 except: # dont have any transcripts or exons defined. |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
99 break |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
100 elif floc[0] < parts['stop'] and parts['stop'] <= floc[1]: # the stop of the new candidate is inside of any of the pre-defined interval ? the candidate seems to be from more upstream |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
101 non_coding = 0 |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
102 try: |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
103 if len(transcripts[gid]) == 1: |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
104 if len(exons[transcripts[gid][0]['ID']]) == 1:non_coding = 1 |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
105 if non_coding == 0: |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
106 entries = uniq_loci[gene_info][floc] |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
107 del uniq_loci[gene_info][floc] # remove the existing interval, here we got a upstream position from which the candidate transcribing |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
108 entries.append(gid) |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
109 uniq_loci[gene_info][(int(parts['start']), floc[1])] = entries |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
110 else: # create a new interval for non-coding type entry |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
111 uniq_loci[gene_info][(parts['start'], parts['stop'])] = [gid] |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
112 got_a_range = 1 |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
113 break |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
114 except: |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
115 break |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
116 elif floc[0] > parts['start'] and floc[1] < parts['stop']: # whether the whole feature floc region (--) resides in the candidate location (----------) ? |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
117 non_coding = 0 # here the candidate seems to be longer than the pre-defined interval, check all entries from the pre-defined interval whether it is a small region, any chance as non-coding. |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
118 try: |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
119 for features in uniq_loci[gene_info][floc]: |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
120 if len(transcripts[features]) == 1: |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
121 if len(exons[transcripts[features][0]['ID']]) == 1:non_coding = 1 |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
122 if non_coding == 1: # create a new interval for non coding |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
123 uniq_loci[gene_info][(parts['start'], parts['stop'])] = [gid] |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
124 else: # append the existing transcript cluster, here change the interval position based on the candidate location |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
125 entries = uniq_loci[gene_info][floc] |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
126 del uniq_loci[gene_info][floc] # remove the existing interval, here we got a longer upstream and downstream region. |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
127 entries.append(gid) |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
128 uniq_loci[gene_info][(parts['start'], parts['stop'])] = entries |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
129 got_a_range = 1 |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
130 break |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
131 except: |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
132 break |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
133 ## or create a new interval ?? |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
134 if got_a_range == 0:uniq_loci[gene_info][(parts['start'], parts['stop'])] = [gid] |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
135 else: |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
136 uniq_loci[gene_info] = {(int(parts['start']), int(parts['stop'])): [gid]} |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
137 |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
138 return uniq_loci |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
139 |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
140 def ParseGFF(gff_file): |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
141 """feature extraction from provided GFF file""" |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
142 |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
143 gff_handle = open(gff_file, 'rU') |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
144 genes, transcripts, exons, utr5, cds, utr3 = dict(), dict(), dict(), dict(), dict(), dict() |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
145 for gff_line in gff_handle: |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
146 parts = gff_line.strip('\n\r').split('\t') |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
147 if gff_line[0] == '#' or gff_line[0] == '>':continue |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
148 if len(parts) == 1:continue ## Some centers in the world create GFF files with FASTA sequence together |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
149 if len(parts) != 9:sys.stdout.write('Warning: Found invalid GFF line\n' + gff_line.strip('\n\r') + '\n');continue |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
150 if parts[3] == '' or parts[4] == '':sys.stdout.write('Warning: Found missing coordinate in GFF line\n' + gff_line.strip('\n\r') + '\n');continue |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
151 if parts[2] == 'gene': |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
152 gene_info = dict() |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
153 gene_info['start'] = int(parts[3]) |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
154 gene_info['stop'] = int(parts[4]) |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
155 gene_info['chr'] = parts[0] |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
156 gene_info['source'] = parts[1] |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
157 gene_info['strand'] = parts[6] |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
158 gid = '' |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
159 for attr in parts[-1].split(';'): |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
160 if attr == '':continue ## GFF line may end with a ';' symbol |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
161 attr = attr.split('=') |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
162 if attr[0] == 'ID':gid=attr[1];continue |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
163 gene_info[attr[0]] = attr[1] |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
164 if gid != '': genes[gid] = gene_info |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
165 if parts[2] == 'mRNA' or parts[2] == 'transcript' or parts[2] == 'ncRNA' or parts[2] == 'tRNA' or parts[2] == 'snRNA' or parts[2] == 'scRNA' or parts[2] == 'snoRNA' or parts[2] == 'snlRNA' or parts[2] == 'rRNA' or parts[2] == 'miRNA': |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
166 mrna_info = dict() |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
167 mrna_info['start'] = int(parts[3]) |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
168 mrna_info['stop'] = int(parts[4]) |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
169 mrna_info['chr'] = parts[0] |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
170 mrna_info['strand'] = parts[6] |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
171 mrna_info['type'] = parts[2] |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
172 gid = '' |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
173 for attr in parts[-1].split(';'): |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
174 if attr == '':continue ## GFF line may end with a ';' symbol |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
175 attr = attr.split('=') |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
176 if attr[0] == 'Parent':gid=attr[1];continue |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
177 mrna_info[attr[0]] = attr[1] |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
178 if gid in transcripts: |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
179 transcripts[gid].append(mrna_info) |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
180 else: |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
181 transcripts[gid] = [mrna_info] |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
182 if parts[2] == 'exon': |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
183 exon_info = dict() |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
184 exon_info['start'] = int(parts[3]) |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
185 exon_info['stop'] = int(parts[4]) |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
186 exon_info['chr'] = parts[0] |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
187 exon_info['strand'] = parts[6] |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
188 tid = '' |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
189 for attr in parts[-1].split(';'): |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
190 if attr == '':continue ## GFF line may end with a ';' symbol |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
191 attr = attr.split('=') |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
192 if attr[0] == 'Parent':tid=attr[1];continue |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
193 exon_info[attr[0]] = attr[1] |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
194 if tid in exons: |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
195 exons[tid].append(exon_info) |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
196 else: |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
197 exons[tid] = [exon_info] |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
198 if parts[2] == 'five_prime_UTR': |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
199 utr5_info = dict() |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
200 utr5_info['start'] = int(parts[3]) |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
201 utr5_info['stop'] = int(parts[4]) |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
202 utr5_info['chr'] = parts[0] |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
203 utr5_info['strand'] = parts[6] |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
204 tid = '' |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
205 for attr in parts[-1].split(';'): |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
206 if attr == '':continue ## GFF line may end with a ';' symbol |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
207 attr = attr.split('=') |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
208 if attr[0] == 'Parent':tid=attr[1];continue |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
209 utr5_info[attr[0]] = attr[1] |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
210 if tid in utr5: |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
211 utr5[tid].append(utr5_info) |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
212 else: |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
213 utr5[tid] = [utr5_info] |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
214 if parts[2] == 'CDS': |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
215 cds_info = dict() |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
216 cds_info['start'] = int(parts[3]) |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
217 cds_info['stop'] = int(parts[4]) |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
218 cds_info['chr'] = parts[0] |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
219 cds_info['strand'] = parts[6] |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
220 tid = '' |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
221 for attr in parts[-1].split(';'): |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
222 if attr == '':continue |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
223 attr = attr.split('=') |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
224 if attr[0] == 'Parent':tid=attr[1];continue |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
225 cds_info[attr[0]] = attr[1] |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
226 if tid in cds: |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
227 cds[tid].append(cds_info) |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
228 else: |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
229 cds[tid] = [cds_info] |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
230 if parts[2] == 'three_prime_UTR': |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
231 utr3_info = dict() |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
232 utr3_info['start'] = int(parts[3]) |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
233 utr3_info['stop'] = int(parts[4]) |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
234 utr3_info['chr'] = parts[0] |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
235 utr3_info['strand'] = parts[6] |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
236 tid = '' |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
237 for attr in parts[-1].split(';'): |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
238 if attr == '':continue |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
239 attr = attr.split('=') |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
240 if attr[0] == 'Parent':tid=attr[1];continue |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
241 utr3_info[attr[0]] = attr[1] |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
242 if tid in utr3: |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
243 utr3[tid].append(utr3_info) |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
244 else: |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
245 utr3[tid] = [utr3_info] |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
246 gff_handle.close() |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
247 return genes, transcripts, exons, utr5, cds, utr3 |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
248 |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
249 import re, sys |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
250 import time |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
251 import collections |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
252 |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
253 if __name__=='__main__': |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
254 |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
255 stime = time.asctime( time.localtime(time.time()) ) |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
256 print '-------------------------------------------------------' |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
257 print 'MergeLoci started on ' + stime |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
258 print '-------------------------------------------------------' |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
259 try: |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
260 gff_file = sys.argv[1] |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
261 out_file = sys.argv[2] |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
262 except: |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
263 sys.stderr.write("Missing GFF3 file, result file. terminating...\n") |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
264 sys.stderr.write("USAGE: gff_loci_merge.py <gff file> <result file>\n") |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
265 sys.exit(-1) |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
266 print '--------' |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
267 print 'Level: 1- ' + 'Reading GFF file: ' + re.sub(r'/home/galaxy/galaxy-2.1.2009', r'GALAXYDIR', gff_file) |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
268 print '--------' |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
269 print '--------' |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
270 print 'Level: 2- ' + 'BEFORE processing, Merging feature distribution in GFF file' |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
271 print '--------' |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
272 # initial feature distribution in file |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
273 final_dict = available_limits(gff_file) |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
274 display_content(final_dict) |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
275 # determine the whole content from GFF file |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
276 genes, transcripts, exons, utr5, cds, utr3 = ParseGFF(gff_file) |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
277 print '--------' |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
278 print 'Level: 3- ' + 'Start merging feature(s) from similar locations...' |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
279 print '--------' |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
280 # determine the same gene loci on specific chromosome based on the same source |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
281 merged_regions = UniqLoci(genes, transcripts, exons) |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
282 print '\tDone.' |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
283 print '--------' |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
284 print 'Level: 4- ' + 'Writing merged feature annotation to GFF format...' |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
285 print '--------' |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
286 # write new GFF file with merged loci information for gene feature |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
287 GFFWriter(merged_regions, genes, transcripts, exons, utr5, cds, utr3, out_file) |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
288 print '\tDone.' |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
289 # after processing display the feature distribution in the result file |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
290 print '--------' |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
291 print 'Level: 5- ' + 'Merged feature(s) summary from GFF file' |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
292 print '--------' |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
293 final_dict = available_limits(out_file) |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
294 display_content(final_dict) |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
295 print |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
296 print '\tMerged result file: ' + re.sub(r'/home/galaxy/galaxy-2.1.2009', r'GALAXYDIR', out_file) |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
297 stime = time.asctime( time.localtime(time.time()) ) |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
298 print '-------------------------------------------------------' |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
299 print 'MergeLoci finished at ' + stime |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
300 print '-------------------------------------------------------' |