diff shm_csr.py @ 48:c5295dd10dfc draft

Uploaded
author davidvanzessen
date Mon, 08 May 2017 09:27:27 -0400
parents 64711f461c8e
children aa8d37bd1930
line wrap: on
line diff
--- a/shm_csr.py	Thu May 04 07:43:09 2017 -0400
+++ b/shm_csr.py	Mon May 08 09:27:27 2017 -0400
@@ -114,7 +114,18 @@
 	#tandem mutation stuff
 	tandem_frequency = defaultdict(int)
 	mutation_frequency = defaultdict(int)
-
+	
+	mutations_by_id_dic = {}
+	first = True
+	mutation_by_id_file = os.path.join(os.path.dirname(outfile), "mutation_by_id.txt")
+	with open(mutation_by_id_file, 'r') as mutation_by_id:
+		for l in mutation_by_id:
+			if first:
+				first = False
+				continue
+			splt = l.split("\t")
+			mutations_by_id_dic[splt[0]] = int(splt[1])
+    
 	tandem_file = os.path.join(os.path.dirname(outfile), "tandems_by_id.txt")
 	with open(tandem_file, 'w') as o:
 		highest_tandem_length = 0
@@ -159,7 +170,7 @@
 
 			region_length = fr1LengthDict[ID] + cdr1LengthDic[ID] + fr2LengthDict[ID] + cdr2LengthDic[ID] + fr3LengthDict[ID]
 			longest_tandem = max(tandem_muts, key=lambda x: x[1]) if len(tandem_muts) else (0, 0)
-			num_mutations = len(mutations)
+			num_mutations = mutations_by_id_dic[ID] # len(mutations)
 			f_num_mutations = float(num_mutations)
 			num_tandem_muts = len(tandem_muts)
 			expected_tandem_muts = f_num_mutations * (f_num_mutations - 1.0) / float(region_length)
@@ -197,9 +208,6 @@
 			o.write("{0}\t{1}\n".format(frq, tandem_frequency[str(frq)]))
 
 	tandem_row = []
-	print genes
-	print tandem_sum_by_class
-	print expected_tandem_sum_by_class
 	genes_extra = list(genes)
 	genes_extra.append("all")
 	for x, y, in zip([tandem_sum_by_class[x] for x in genes_extra], [expected_tandem_sum_by_class[x] for x in genes_extra]):
@@ -207,22 +215,6 @@
 			tandem_row += [x, round(y, 2), round(x / y, 2)]
 		else:
 			tandem_row += [x, round(y, 2), 0]
-	
-	"""
-	print tandem_row
-	tandem_row += tandem_row[-3:]
-	print tandem_row
-	all_expected_tandem = expected_tandem_sum_by_class["all"]
-	all_tandem = tandem_sum_by_class["all"]
-	if all_expected_tandem == 0:
-		tandem_row[-6:-3] = [all_tandem, round(all_expected_tandem, 2), 0]
-	else:
-		tandem_row[-6:-3] = [all_tandem, round(all_expected_tandem, 2), round(all_tandem / all_expected_tandem, 2)]
-	print tandem_row
-	"""
-	for i in range(len(genes_extra)):
-		gene = genes_extra[i]
-		print gene, tandem_row[i*3:i*3+3]
 
 	tandem_freq_file = os.path.join(os.path.dirname(outfile), "shm_overview_tandem_row.txt")
 	with open(tandem_freq_file, 'w') as o: