Mercurial > repos > davidvanzessen > shm_csr
diff shm_csr.py @ 1:faae21ba5c63 draft
Uploaded
author | davidvanzessen |
---|---|
date | Tue, 25 Oct 2016 07:28:43 -0400 |
parents | c33d93683a09 |
children | 275ab5175fd6 |
line wrap: on
line diff
--- a/shm_csr.py Thu Oct 13 10:52:24 2016 -0400 +++ b/shm_csr.py Tue Oct 25 07:28:43 2016 -0400 @@ -7,15 +7,14 @@ parser.add_argument("--input", help="The '7_V-REGION-mutation-and-AA-change-table' and '10_V-REGION-mutation-hotspots' merged together, with an added 'best_match' annotation") parser.add_argument("--genes", help="The genes available in the 'best_match' column") -parser.add_argument("--includefr1", help="Should the mutation/nucleotides in the FR1 region be included?") +parser.add_argument("--empty_region_filter", help="Where does the sequence start?", choices=['leader', 'FR1', 'CDR1', 'FR2']) parser.add_argument("--output", help="Output file") args = parser.parse_args() infile = args.input genes = str(args.genes).split(",") -print "includefr1 =", args.includefr1 -include_fr1 = True if args.includefr1 == "yes" else False +empty_region_filter = args.empty_region_filter outfile = args.output genedic = dict() @@ -59,16 +58,14 @@ ID = linesplt[IDIndex] genedic[ID] = linesplt[best_matchIndex] try: - if linesplt[fr1Index] != "NA": - mutationdic[ID + "_FR1"] = [mutationMatcher.match(x).groups() for x in linesplt[fr1Index].split("|") if x] if include_fr1 else [] - else: - mutationdic[ID + "_FR1"] = [] - mutationdic[ID + "_CDR1"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr1Index].split("|") if x] if linesplt[cdr1Index] != "NA" else [] - mutationdic[ID + "_FR2"] = [mutationMatcher.match(x).groups() for x in linesplt[fr2Index].split("|") if x] if linesplt[fr2Index] != "NA" else [] - mutationdic[ID + "_CDR2"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr2Index].split("|") if x] if linesplt[cdr2Index] != "NA" else [] + mutationdic[ID + "_FR1"] = [mutationMatcher.match(x).groups() for x in linesplt[fr1Index].split("|") if x] if (linesplt[fr1Index] != "NA" and empty_region_filter == "leader") else [] + mutationdic[ID + "_CDR1"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr1Index].split("|") if x] if (linesplt[cdr1Index] != "NA" and empty_region_filter in ["leader", "FR1"]) else [] + mutationdic[ID + "_FR2"] = [mutationMatcher.match(x).groups() for x in linesplt[fr2Index].split("|") if x] if (linesplt[fr2Index] != "NA" and empty_region_filter in ["leader", "FR1", "CDR1"]) else [] + mutationdic[ID + "_CDR2"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr2Index].split("|") if x] if (linesplt[cdr2Index] != "NA") else [] mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x] if linesplt[fr3Index] != "NA" else [] - except e: + except Exception as e: + print "Something went wrong while processing this line:" print linesplt print linecount print e @@ -189,8 +186,6 @@ linesplt = line.split("\t") gene = linesplt[best_matchIndex] ID = linesplt[IDIndex] - if ID == "ca2": - print linesplt RGYW = [(int(x), int(y), z) for (x, y, z) in [hotspotMatcher.match(x).groups() for x in linesplt[aggctatIndex].split("|") if x]] WRCY = [(int(x), int(y), z) for (x, y, z) in @@ -201,8 +196,7 @@ [hotspotMatcher.match(x).groups() for x in linesplt[tatIndex].split("|") if x]] RGYWCount[ID], WRCYCount[ID], WACount[ID], TWCount[ID] = 0, 0, 0, 0 - mutationList = (mutationdic[ID + "_FR1"] if include_fr1 else []) + mutationdic[ID + "_CDR1"] + mutationdic[ - ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] + mutationList = mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] for mutation in mutationList: frm, where, to, AAfrm, AAwhere, AAto, junk = mutation mutation_in_RGYW = any([(start <= int(where) <= end) for (start, end, region) in RGYW]) @@ -271,7 +265,7 @@ o.write(typ + " (%)") curr = dic[typ] for gene in genes: - geneMatcher = geneMatchers[gene] #re.compile("^" + gene + ".*") #recompile every loop.... + geneMatcher = geneMatchers[gene] if valuedic[gene + "_" + fname] is 0: o.write(",0,0,0") else: