Repository 'shm_csr'
hg clone https://radegast.galaxyproject.org/repos/davidvanzessen/shm_csr

Changeset 48:c5295dd10dfc (2017-05-08)
Previous changeset 47:64711f461c8e (2017-05-04) Next changeset 49:f5fe63533c58 (2017-05-11)
Commit message:
Uploaded
modified:
shm_csr.py
b
diff -r 64711f461c8e -r c5295dd10dfc shm_csr.py
--- a/shm_csr.py Thu May 04 07:43:09 2017 -0400
+++ b/shm_csr.py Mon May 08 09:27:27 2017 -0400
[
@@ -114,7 +114,18 @@
  #tandem mutation stuff
  tandem_frequency = defaultdict(int)
  mutation_frequency = defaultdict(int)
-
+
+ mutations_by_id_dic = {}
+ first = True
+ mutation_by_id_file = os.path.join(os.path.dirname(outfile), "mutation_by_id.txt")
+ with open(mutation_by_id_file, 'r') as mutation_by_id:
+ for l in mutation_by_id:
+ if first:
+ first = False
+ continue
+ splt = l.split("\t")
+ mutations_by_id_dic[splt[0]] = int(splt[1])
+    
  tandem_file = os.path.join(os.path.dirname(outfile), "tandems_by_id.txt")
  with open(tandem_file, 'w') as o:
  highest_tandem_length = 0
@@ -159,7 +170,7 @@
 
  region_length = fr1LengthDict[ID] + cdr1LengthDic[ID] + fr2LengthDict[ID] + cdr2LengthDic[ID] + fr3LengthDict[ID]
  longest_tandem = max(tandem_muts, key=lambda x: x[1]) if len(tandem_muts) else (0, 0)
- num_mutations = len(mutations)
+ num_mutations = mutations_by_id_dic[ID] # len(mutations)
  f_num_mutations = float(num_mutations)
  num_tandem_muts = len(tandem_muts)
  expected_tandem_muts = f_num_mutations * (f_num_mutations - 1.0) / float(region_length)
@@ -197,9 +208,6 @@
  o.write("{0}\t{1}\n".format(frq, tandem_frequency[str(frq)]))
 
  tandem_row = []
- print genes
- print tandem_sum_by_class
- print expected_tandem_sum_by_class
  genes_extra = list(genes)
  genes_extra.append("all")
  for x, y, in zip([tandem_sum_by_class[x] for x in genes_extra], [expected_tandem_sum_by_class[x] for x in genes_extra]):
@@ -207,22 +215,6 @@
  tandem_row += [x, round(y, 2), round(x / y, 2)]
  else:
  tandem_row += [x, round(y, 2), 0]
-
- """
- print tandem_row
- tandem_row += tandem_row[-3:]
- print tandem_row
- all_expected_tandem = expected_tandem_sum_by_class["all"]
- all_tandem = tandem_sum_by_class["all"]
- if all_expected_tandem == 0:
- tandem_row[-6:-3] = [all_tandem, round(all_expected_tandem, 2), 0]
- else:
- tandem_row[-6:-3] = [all_tandem, round(all_expected_tandem, 2), round(all_tandem / all_expected_tandem, 2)]
- print tandem_row
- """
- for i in range(len(genes_extra)):
- gene = genes_extra[i]
- print gene, tandem_row[i*3:i*3+3]
 
  tandem_freq_file = os.path.join(os.path.dirname(outfile), "shm_overview_tandem_row.txt")
  with open(tandem_freq_file, 'w') as o: