Mercurial > repos > davidvanzessen > shm_csr
comparison shm_csr.py @ 63:8728284105ee draft
Uploaded
author | davidvanzessen |
---|---|
date | Wed, 06 Dec 2017 08:04:52 -0500 |
parents | aa8d37bd1930 |
children | 43a1aa648537 |
comparison
equal
deleted
inserted
replaced
62:aa8d37bd1930 | 63:8728284105ee |
---|---|
102 | 102 |
103 mutationdic[ID + "_FR3"] = [] | 103 mutationdic[ID + "_FR3"] = [] |
104 if len(linesplt[fr3Index]) > 5: | 104 if len(linesplt[fr3Index]) > 5: |
105 mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x] | 105 mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x] |
106 | 106 |
107 try: | |
108 pass | |
109 except Exception as e: | |
110 print "Something went wrong while processing this line:" | |
111 print "line:", linecount | |
112 print "fr1 len:", len(linesplt[fr1Index]), "value:", linesplt[fr1Index] | |
113 print "cdr1 len:", len(linesplt[cdr1Index]), "value:", linesplt[cdr1Index] | |
114 print "fr2 len:", len(linesplt[fr2Index]), "value:", linesplt[fr2Index] | |
115 print "cdr2 len:", len(linesplt[cdr2Index]), "value:", linesplt[cdr2Index] | |
116 print "fr3 len:", len(linesplt[fr3Index]), "value:", linesplt[fr3Index] | |
117 print ID + "_FR1 in mutationdic", ID + "_FR1" in mutationdic | |
118 print ID + "_CDR1 in mutationdic", ID + "_CDR1" in mutationdic | |
119 print ID + "_FR2 in mutationdic", ID + "_FR2" in mutationdic | |
120 print ID + "_CDR2 in mutationdic", ID + "_CDR2" in mutationdic | |
121 print ID + "_FR3 in mutationdic", ID + "_FR3" in mutationdic | |
122 print linesplt | |
123 print e | |
124 mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] | 107 mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] |
125 mutationListByID[ID] = mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] | 108 mutationListByID[ID] = mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] |
126 | 109 |
127 cdr1Length = len(linesplt[cdr1LengthIndex]) | 110 cdr1Length = len(linesplt[cdr1LengthIndex]) |
128 cdr2Length = len(linesplt[cdr2LengthIndex]) | 111 cdr2Length = len(linesplt[cdr2LengthIndex]) |
391 if in_how_many_motifs > 0: | 374 if in_how_many_motifs > 0: |
392 RGYWCount[ID] += (1.0 * int(mutation_in_RGYW)) / in_how_many_motifs | 375 RGYWCount[ID] += (1.0 * int(mutation_in_RGYW)) / in_how_many_motifs |
393 WRCYCount[ID] += (1.0 * int(mutation_in_WRCY)) / in_how_many_motifs | 376 WRCYCount[ID] += (1.0 * int(mutation_in_WRCY)) / in_how_many_motifs |
394 WACount[ID] += (1.0 * int(mutation_in_WA)) / in_how_many_motifs | 377 WACount[ID] += (1.0 * int(mutation_in_WA)) / in_how_many_motifs |
395 TWCount[ID] += (1.0 * int(mutation_in_TW)) / in_how_many_motifs | 378 TWCount[ID] += (1.0 * int(mutation_in_TW)) / in_how_many_motifs |
379 | |
380 mutations_in_motifs_file = os.path.join(os.path.dirname(os.path.abspath(infile)), "mutation_in_motifs.txt") | |
381 if not os.path.exists(mutation_by_id_file): | |
382 with open(mutations_in_motifs_file, 'w') as out_handle: | |
383 out_handle.write("{0}\n".format("\t".join([ | |
384 "Sequence.ID", | |
385 "mutation_position", | |
386 "region", | |
387 "from_nt", | |
388 "to_nt", | |
389 "mutation_position_AA", | |
390 "from_AA", | |
391 "to_AA", | |
392 "motif", | |
393 "motif_start_nt", | |
394 "motif_end_nt", | |
395 "rest" | |
396 ]))) | |
397 | |
398 with open(mutations_in_motifs_file, 'a') as out_handle: | |
399 motif_dic = {"RGYW": RGYW, "WRCY": WRCY, "WA": WA, "TW": TW} | |
400 for mutation in mutationList: | |
401 frm, where, to, AAfrm, AAwhere, AAto, junk = mutation | |
402 for motif in motif_dic.keys(): | |
403 | |
404 for start, end, region in motif_dic[motif]: | |
405 if start <= int(where) <= end: | |
406 out_handle.write("{0}\n".format( | |
407 "\t".join([ | |
408 ID, | |
409 where, | |
410 region, | |
411 frm, | |
412 to, | |
413 str(AAwhere), | |
414 str(AAfrm), | |
415 str(AAto), | |
416 motif, | |
417 str(start), | |
418 str(end), | |
419 str(junk) | |
420 ]) | |
421 )) | |
422 | |
396 | 423 |
397 | 424 |
398 def mean(lst): | 425 def mean(lst): |
399 return (float(sum(lst)) / len(lst)) if len(lst) > 0 else 0.0 | 426 return (float(sum(lst)) / len(lst)) if len(lst) > 0 else 0.0 |
400 | 427 |