peptidegenomiccoordinate: peptideGenomicCoordinate.py comparison

comparison peptideGenomicCoordinate.py @ 1:561b62936e2c draft

planemo upload

author	pravs
date	Mon, 09 Apr 2018 16:53:05 -0400
parents
children	1b61f7aafb61

comparison

equal deleted inserted replaced

-:58c4333afe1f
+:561b62936e2c
+#
+# Author: Praveen Kumar
+# University of Minnesota
+#
+# Get peptide's genomic coordinate from the protein's genomic mapping sqlite file (which is derived from the https://toolshed.g2.bx.psu.edu/view/galaxyp/translate_bed/038ecf54cbec)
+#
+# python peptideGenomicCoordinate.py <peptide_list> <mz_to_sqlite DB> <genomic mapping file DB> <output.bed>
+#
+#
+def main():
+import sys
+import sqlite3
+conn = sqlite3.connect(sys.argv[2])
+c = conn.cursor()
+c.execute("DROP table if exists novel")
+conn.commit()
+c.execute("CREATE TABLE novel(peptide text)")
+pepfile = open(sys.argv[1],"r")
+for seq in pepfile.readlines():
+seq = seq.strip()
+c.execute('INSERT INTO novel VALUES ("'+seq+'")')
+conn.commit()
+c.execute("SELECT distinct psm.sequence, ps.id, ps.sequence from db_sequence ps, psm_entries psm, novel n, proteins_by_peptide pbp where psm.sequence = n.peptide and pbp.peptide_ref = psm.id and pbp.id = ps.id")
+rows = c.fetchall()
+conn1 = sqlite3.connect(sys.argv[3])
+c1 = conn1.cursor()
+outfh = open(sys.argv[4], "w")
+master_dict = {}
+for each in rows:
+peptide = each[0]
+acc = each[1]
+acc_seq = each[2]
+c1.execute("SELECT chrom,start,end,name,strand,cds_start,cds_end FROM feature_cds_map map WHERE map.name = '"+acc+"'")
+coordinates = c1.fetchall()
+if len(coordinates) != 0:
+pep_start = 0
+pep_end = 0
+flag = 0
+splice_flag = 0
+spliced_peptide = []
+for each_entry in coordinates:
+chromosome = each_entry[0]
+start = int(each_entry[1])
+end = int(each_entry[2])
+strand = each_entry[4]
+cds_start = int(each_entry[5])
+cds_end = int(each_entry[6])
+pep_pos_start = (acc_seq.find(peptide)*3)
+pep_pos_end = pep_pos_start + (len(peptide)*3)
+if (pep_pos_start >= cds_start) and (pep_pos_end <= cds_end):
+if strand == "+":
+pep_start = start + pep_pos_start - cds_start
+pep_end = start + pep_pos_end - cds_start
+pep_thick_start = 0
+pep_thick_end = len(peptide)
+flag == 1
+else:
+pep_end = end - pep_pos_start + cds_start
+pep_start = end - pep_pos_end + cds_start
+pep_thick_start = 0
+pep_thick_end = len(peptide)
+flag == 1
+spliced_peptide = []
+splice_flag = 0
+else:
+if flag == 0:
+if strand == "+":
+if (pep_pos_start >= cds_start) and (pep_pos_start <= cds_end) and (pep_pos_end > cds_end):
+pep_start = start + pep_pos_start - cds_start
+pep_end = end
+pep_thick_start = 0
+pep_thick_end = (pep_end-pep_start)
+spliced_peptide.append([pep_start,pep_end,pep_thick_start,pep_thick_end])
+splice_flag = splice_flag + 1
+if splice_flag == 2:
+flag = 1
+elif (pep_pos_end >= cds_start) and (pep_pos_end <= cds_end) and (pep_pos_start < cds_start):
+pep_start = start
+pep_end = start + pep_pos_end - cds_start
+pep_thick_start = (len(peptide)*3)-(pep_end-pep_start)
+pep_thick_end = (len(peptide)*3)
+spliced_peptide.append([pep_start,pep_end,pep_thick_start,pep_thick_end])
+splice_flag = splice_flag + 1
+if splice_flag == 2:
+flag = 1
+else:
+pass
+else:
+if (pep_pos_start >= cds_start) and (pep_pos_start <= cds_end) and (pep_pos_end >= cds_end):
+pep_start = start
+pep_end = end - pep_pos_start - cds_start
+pep_thick_start = 0
+pep_thick_end = (pep_end-pep_start)
+spliced_peptide.append([pep_start,pep_end,pep_thick_start,pep_thick_end])
+splice_flag = splice_flag + 1
+if splice_flag == 2:
+flag = 1
+elif (pep_pos_end >= cds_start) and (pep_pos_end <= cds_end) and (pep_pos_start <= cds_start):
+pep_start = end - pep_pos_end + cds_start
+pep_end = end
+pep_thick_start = (len(peptide)*3)-(pep_end-pep_start)
+pep_thick_end = (len(peptide)*3)
+spliced_peptide.append([pep_start,pep_end,pep_thick_start,pep_thick_end])
+splice_flag = splice_flag + 1
+if splice_flag == 2:
+flag = 1
+else:
+pass
+if len(spliced_peptide) == 0:
+if strand == "+":
+bed_line = [chromosome, str(pep_start), str(pep_end), peptide, "255", strand, str(pep_start), str(pep_end), "0", "1", str(pep_end-pep_start), "0"]
+else:
+bed_line = [chromosome, str(pep_start), str(pep_end), peptide, "255", strand, str(pep_start), str(pep_end), "0", "1", str(pep_end-pep_start), "0"]
+outfh.write("\t".join(bed_line)+"\n")
+else:
+if strand == "+":
+pep_entry = spliced_peptide
+pep_start = min([pep_entry[0][0], pep_entry[1][0]])
+pep_end = max([pep_entry[0][1], pep_entry[1][1]])
+blockSize = [str(min([pep_entry[0][3], pep_entry[1][3]])),str(max([pep_entry[0][3], pep_entry[1][3]])-min([pep_entry[0][3], pep_entry[1][3]]))]
+blockStarts = ["0", str(pep_end-pep_start-(max([pep_entry[0][3], pep_entry[1][3]])-min([pep_entry[0][3], pep_entry[1][3]])))]
+bed_line = [chromosome, str(pep_start), str(pep_end), peptide, "255", strand, str(pep_start), str(pep_end), "0", "2", ",".join(blockSize), ",".join(blockStarts)]
+outfh.write("\t".join(bed_line)+"\n")
+else:
+pep_entry = spliced_peptide
+pep_start = min([pep_entry[0][0], pep_entry[1][0]])
+pep_end = max([pep_entry[0][1], pep_entry[1][1]])
+blockSize = [str(min([pep_entry[0][3], pep_entry[1][3]])),str(max([pep_entry[0][3], pep_entry[1][3]])-min([pep_entry[0][3], pep_entry[1][3]]))]
+blockStarts = ["0", str(pep_end-pep_start-(max([pep_entry[0][3], pep_entry[1][3]])-min([pep_entry[0][3], pep_entry[1][3]])))]
+bed_line = [chromosome, str(pep_start), str(pep_end), peptide, "255", strand, str(pep_start), str(pep_end), "0", "2", ",".join(blockSize), ",".join(blockStarts)]
+outfh.write("\t".join(bed_line)+"\n")
+c.execute("DROP table novel")
+conn.commit()
+conn.close()
+conn1.close()
+outfh.close()
+pepfile.close()
+return None
+if __name__ == "__main__":
+main()

Mercurial > repos > pravs > peptidegenomiccoordinate

comparison peptideGenomicCoordinate.py @ 1:561b62936e2c draft