Mercurial > repos > pravs > peptidegenomiccoordinate
comparison peptideGenomicCoordinate.py @ 1:561b62936e2c draft
planemo upload
author | pravs |
---|---|
date | Mon, 09 Apr 2018 16:53:05 -0400 |
parents | |
children | 1b61f7aafb61 |
comparison
equal
deleted
inserted
replaced
0:58c4333afe1f | 1:561b62936e2c |
---|---|
1 | |
2 # | |
3 # Author: Praveen Kumar | |
4 # University of Minnesota | |
5 # | |
6 # Get peptide's genomic coordinate from the protein's genomic mapping sqlite file (which is derived from the https://toolshed.g2.bx.psu.edu/view/galaxyp/translate_bed/038ecf54cbec) | |
7 # | |
8 # python peptideGenomicCoordinate.py <peptide_list> <mz_to_sqlite DB> <genomic mapping file DB> <output.bed> | |
9 # | |
10 # | |
11 | |
12 def main(): | |
13 import sys | |
14 import sqlite3 | |
15 conn = sqlite3.connect(sys.argv[2]) | |
16 c = conn.cursor() | |
17 c.execute("DROP table if exists novel") | |
18 conn.commit() | |
19 c.execute("CREATE TABLE novel(peptide text)") | |
20 pepfile = open(sys.argv[1],"r") | |
21 | |
22 for seq in pepfile.readlines(): | |
23 seq = seq.strip() | |
24 c.execute('INSERT INTO novel VALUES ("'+seq+'")') | |
25 conn.commit() | |
26 | |
27 c.execute("SELECT distinct psm.sequence, ps.id, ps.sequence from db_sequence ps, psm_entries psm, novel n, proteins_by_peptide pbp where psm.sequence = n.peptide and pbp.peptide_ref = psm.id and pbp.id = ps.id") | |
28 rows = c.fetchall() | |
29 | |
30 conn1 = sqlite3.connect(sys.argv[3]) | |
31 c1 = conn1.cursor() | |
32 | |
33 outfh = open(sys.argv[4], "w") | |
34 | |
35 master_dict = {} | |
36 for each in rows: | |
37 peptide = each[0] | |
38 acc = each[1] | |
39 acc_seq = each[2] | |
40 | |
41 c1.execute("SELECT chrom,start,end,name,strand,cds_start,cds_end FROM feature_cds_map map WHERE map.name = '"+acc+"'") | |
42 coordinates = c1.fetchall() | |
43 | |
44 if len(coordinates) != 0: | |
45 pep_start = 0 | |
46 pep_end = 0 | |
47 flag = 0 | |
48 splice_flag = 0 | |
49 spliced_peptide = [] | |
50 for each_entry in coordinates: | |
51 chromosome = each_entry[0] | |
52 start = int(each_entry[1]) | |
53 end = int(each_entry[2]) | |
54 strand = each_entry[4] | |
55 cds_start = int(each_entry[5]) | |
56 cds_end = int(each_entry[6]) | |
57 pep_pos_start = (acc_seq.find(peptide)*3) | |
58 pep_pos_end = pep_pos_start + (len(peptide)*3) | |
59 if (pep_pos_start >= cds_start) and (pep_pos_end <= cds_end): | |
60 if strand == "+": | |
61 pep_start = start + pep_pos_start - cds_start | |
62 pep_end = start + pep_pos_end - cds_start | |
63 pep_thick_start = 0 | |
64 pep_thick_end = len(peptide) | |
65 flag == 1 | |
66 else: | |
67 pep_end = end - pep_pos_start + cds_start | |
68 pep_start = end - pep_pos_end + cds_start | |
69 pep_thick_start = 0 | |
70 pep_thick_end = len(peptide) | |
71 flag == 1 | |
72 spliced_peptide = [] | |
73 splice_flag = 0 | |
74 else: | |
75 if flag == 0: | |
76 if strand == "+": | |
77 if (pep_pos_start >= cds_start) and (pep_pos_start <= cds_end) and (pep_pos_end > cds_end): | |
78 pep_start = start + pep_pos_start - cds_start | |
79 pep_end = end | |
80 pep_thick_start = 0 | |
81 pep_thick_end = (pep_end-pep_start) | |
82 spliced_peptide.append([pep_start,pep_end,pep_thick_start,pep_thick_end]) | |
83 splice_flag = splice_flag + 1 | |
84 if splice_flag == 2: | |
85 flag = 1 | |
86 elif (pep_pos_end >= cds_start) and (pep_pos_end <= cds_end) and (pep_pos_start < cds_start): | |
87 pep_start = start | |
88 pep_end = start + pep_pos_end - cds_start | |
89 pep_thick_start = (len(peptide)*3)-(pep_end-pep_start) | |
90 pep_thick_end = (len(peptide)*3) | |
91 spliced_peptide.append([pep_start,pep_end,pep_thick_start,pep_thick_end]) | |
92 splice_flag = splice_flag + 1 | |
93 if splice_flag == 2: | |
94 flag = 1 | |
95 else: | |
96 pass | |
97 else: | |
98 if (pep_pos_start >= cds_start) and (pep_pos_start <= cds_end) and (pep_pos_end >= cds_end): | |
99 pep_start = start | |
100 pep_end = end - pep_pos_start - cds_start | |
101 pep_thick_start = 0 | |
102 pep_thick_end = (pep_end-pep_start) | |
103 spliced_peptide.append([pep_start,pep_end,pep_thick_start,pep_thick_end]) | |
104 splice_flag = splice_flag + 1 | |
105 if splice_flag == 2: | |
106 flag = 1 | |
107 elif (pep_pos_end >= cds_start) and (pep_pos_end <= cds_end) and (pep_pos_start <= cds_start): | |
108 pep_start = end - pep_pos_end + cds_start | |
109 pep_end = end | |
110 pep_thick_start = (len(peptide)*3)-(pep_end-pep_start) | |
111 pep_thick_end = (len(peptide)*3) | |
112 spliced_peptide.append([pep_start,pep_end,pep_thick_start,pep_thick_end]) | |
113 splice_flag = splice_flag + 1 | |
114 if splice_flag == 2: | |
115 flag = 1 | |
116 else: | |
117 pass | |
118 if len(spliced_peptide) == 0: | |
119 if strand == "+": | |
120 bed_line = [chromosome, str(pep_start), str(pep_end), peptide, "255", strand, str(pep_start), str(pep_end), "0", "1", str(pep_end-pep_start), "0"] | |
121 else: | |
122 bed_line = [chromosome, str(pep_start), str(pep_end), peptide, "255", strand, str(pep_start), str(pep_end), "0", "1", str(pep_end-pep_start), "0"] | |
123 outfh.write("\t".join(bed_line)+"\n") | |
124 else: | |
125 if strand == "+": | |
126 pep_entry = spliced_peptide | |
127 pep_start = min([pep_entry[0][0], pep_entry[1][0]]) | |
128 pep_end = max([pep_entry[0][1], pep_entry[1][1]]) | |
129 blockSize = [str(min([pep_entry[0][3], pep_entry[1][3]])),str(max([pep_entry[0][3], pep_entry[1][3]])-min([pep_entry[0][3], pep_entry[1][3]]))] | |
130 blockStarts = ["0", str(pep_end-pep_start-(max([pep_entry[0][3], pep_entry[1][3]])-min([pep_entry[0][3], pep_entry[1][3]])))] | |
131 bed_line = [chromosome, str(pep_start), str(pep_end), peptide, "255", strand, str(pep_start), str(pep_end), "0", "2", ",".join(blockSize), ",".join(blockStarts)] | |
132 outfh.write("\t".join(bed_line)+"\n") | |
133 else: | |
134 pep_entry = spliced_peptide | |
135 pep_start = min([pep_entry[0][0], pep_entry[1][0]]) | |
136 pep_end = max([pep_entry[0][1], pep_entry[1][1]]) | |
137 blockSize = [str(min([pep_entry[0][3], pep_entry[1][3]])),str(max([pep_entry[0][3], pep_entry[1][3]])-min([pep_entry[0][3], pep_entry[1][3]]))] | |
138 blockStarts = ["0", str(pep_end-pep_start-(max([pep_entry[0][3], pep_entry[1][3]])-min([pep_entry[0][3], pep_entry[1][3]])))] | |
139 bed_line = [chromosome, str(pep_start), str(pep_end), peptide, "255", strand, str(pep_start), str(pep_end), "0", "2", ",".join(blockSize), ",".join(blockStarts)] | |
140 outfh.write("\t".join(bed_line)+"\n") | |
141 c.execute("DROP table novel") | |
142 conn.commit() | |
143 conn.close() | |
144 conn1.close() | |
145 outfh.close() | |
146 pepfile.close() | |
147 | |
148 return None | |
149 if __name__ == "__main__": | |
150 main() |