comparison shm_csr.py @ 63:8728284105ee draft

Uploaded
author davidvanzessen
date Wed, 06 Dec 2017 08:04:52 -0500
parents aa8d37bd1930
children 43a1aa648537
comparison
equal deleted inserted replaced
62:aa8d37bd1930 63:8728284105ee
102 102
103 mutationdic[ID + "_FR3"] = [] 103 mutationdic[ID + "_FR3"] = []
104 if len(linesplt[fr3Index]) > 5: 104 if len(linesplt[fr3Index]) > 5:
105 mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x] 105 mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x]
106 106
107 try:
108 pass
109 except Exception as e:
110 print "Something went wrong while processing this line:"
111 print "line:", linecount
112 print "fr1 len:", len(linesplt[fr1Index]), "value:", linesplt[fr1Index]
113 print "cdr1 len:", len(linesplt[cdr1Index]), "value:", linesplt[cdr1Index]
114 print "fr2 len:", len(linesplt[fr2Index]), "value:", linesplt[fr2Index]
115 print "cdr2 len:", len(linesplt[cdr2Index]), "value:", linesplt[cdr2Index]
116 print "fr3 len:", len(linesplt[fr3Index]), "value:", linesplt[fr3Index]
117 print ID + "_FR1 in mutationdic", ID + "_FR1" in mutationdic
118 print ID + "_CDR1 in mutationdic", ID + "_CDR1" in mutationdic
119 print ID + "_FR2 in mutationdic", ID + "_FR2" in mutationdic
120 print ID + "_CDR2 in mutationdic", ID + "_CDR2" in mutationdic
121 print ID + "_FR3 in mutationdic", ID + "_FR3" in mutationdic
122 print linesplt
123 print e
124 mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] 107 mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"]
125 mutationListByID[ID] = mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] 108 mutationListByID[ID] = mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"]
126 109
127 cdr1Length = len(linesplt[cdr1LengthIndex]) 110 cdr1Length = len(linesplt[cdr1LengthIndex])
128 cdr2Length = len(linesplt[cdr2LengthIndex]) 111 cdr2Length = len(linesplt[cdr2LengthIndex])
391 if in_how_many_motifs > 0: 374 if in_how_many_motifs > 0:
392 RGYWCount[ID] += (1.0 * int(mutation_in_RGYW)) / in_how_many_motifs 375 RGYWCount[ID] += (1.0 * int(mutation_in_RGYW)) / in_how_many_motifs
393 WRCYCount[ID] += (1.0 * int(mutation_in_WRCY)) / in_how_many_motifs 376 WRCYCount[ID] += (1.0 * int(mutation_in_WRCY)) / in_how_many_motifs
394 WACount[ID] += (1.0 * int(mutation_in_WA)) / in_how_many_motifs 377 WACount[ID] += (1.0 * int(mutation_in_WA)) / in_how_many_motifs
395 TWCount[ID] += (1.0 * int(mutation_in_TW)) / in_how_many_motifs 378 TWCount[ID] += (1.0 * int(mutation_in_TW)) / in_how_many_motifs
379
380 mutations_in_motifs_file = os.path.join(os.path.dirname(os.path.abspath(infile)), "mutation_in_motifs.txt")
381 if not os.path.exists(mutation_by_id_file):
382 with open(mutations_in_motifs_file, 'w') as out_handle:
383 out_handle.write("{0}\n".format("\t".join([
384 "Sequence.ID",
385 "mutation_position",
386 "region",
387 "from_nt",
388 "to_nt",
389 "mutation_position_AA",
390 "from_AA",
391 "to_AA",
392 "motif",
393 "motif_start_nt",
394 "motif_end_nt",
395 "rest"
396 ])))
397
398 with open(mutations_in_motifs_file, 'a') as out_handle:
399 motif_dic = {"RGYW": RGYW, "WRCY": WRCY, "WA": WA, "TW": TW}
400 for mutation in mutationList:
401 frm, where, to, AAfrm, AAwhere, AAto, junk = mutation
402 for motif in motif_dic.keys():
403
404 for start, end, region in motif_dic[motif]:
405 if start <= int(where) <= end:
406 out_handle.write("{0}\n".format(
407 "\t".join([
408 ID,
409 where,
410 region,
411 frm,
412 to,
413 str(AAwhere),
414 str(AAfrm),
415 str(AAto),
416 motif,
417 str(start),
418 str(end),
419 str(junk)
420 ])
421 ))
422
396 423
397 424
398 def mean(lst): 425 def mean(lst):
399 return (float(sum(lst)) / len(lst)) if len(lst) > 0 else 0.0 426 return (float(sum(lst)) / len(lst)) if len(lst) > 0 else 0.0
400 427