comparison baseline/script_imgt.py @ 83:729738462297 draft

"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
author rhpvorderman
date Wed, 15 Sep 2021 12:24:06 +0000
parents b6f9a640e098
children
comparison
equal deleted inserted replaced
82:a103134ee6e0 83:729738462297
8 parser.add_argument("--output", help="Output file") 8 parser.add_argument("--output", help="Output file")
9 parser.add_argument("--id", help="ID to be used at the '>>>' line in the output") 9 parser.add_argument("--id", help="ID to be used at the '>>>' line in the output")
10 10
11 args = parser.parse_args() 11 args = parser.parse_args()
12 12
13 print "script_imgt.py" 13 print("script_imgt.py")
14 print "input:", args.input 14 print("input:", args.input)
15 print "ref:", args.ref 15 print("ref:", args.ref)
16 print "output:", args.output 16 print("output:", args.output)
17 print "id:", args.id 17 print("id:", args.id)
18 18
19 refdic = dict() 19 refdic = dict()
20 with open(args.ref, 'rU') as ref: 20 with open(args.ref, 'rU') as ref:
21 currentSeq = "" 21 currentSeq = ""
22 currentId = "" 22 currentId = ""
28 currentSeq = "" 28 currentSeq = ""
29 else: 29 else:
30 currentSeq += line.rstrip() 30 currentSeq += line.rstrip()
31 refdic[currentId[1:]] = currentSeq 31 refdic[currentId[1:]] = currentSeq
32 32
33 print "Have", str(len(refdic)), "reference sequences" 33 print("Have", str(len(refdic)), "reference sequences")
34 34
35 vPattern = [r"(IGHV[0-9]-[0-9ab]+-?[0-9]?D?\*\d{1,2})"]#, 35 vPattern = [r"(IGHV[0-9]-[0-9ab]+-?[0-9]?D?\*\d{1,2})"]#,
36 # r"(TRBV[0-9]{1,2}-?[0-9]?-?[123]?)", 36 # r"(TRBV[0-9]{1,2}-?[0-9]?-?[123]?)",
37 # r"(IGKV[0-3]D?-[0-9]{1,2})", 37 # r"(IGKV[0-3]D?-[0-9]{1,2})",
38 # r"(IGLV[0-9]-[0-9]{1,2})", 38 # r"(IGLV[0-9]-[0-9]{1,2})",
72 outputdic[ref] += [(linesplt[0].replace(">", ""), linesplt[2].replace(">", "").rstrip())] 72 outputdic[ref] += [(linesplt[0].replace(">", ""), linesplt[2].replace(">", "").rstrip())]
73 else: 73 else:
74 outputdic[ref] = [(linesplt[0].replace(">", ""), linesplt[2].replace(">", "").rstrip())] 74 outputdic[ref] = [(linesplt[0].replace(">", ""), linesplt[2].replace(">", "").rstrip())]
75 #print outputdic 75 #print outputdic
76 76
77 for k in outputdic.keys(): 77 for k in list(outputdic.keys()):
78 if k in refdic: 78 if k in refdic:
79 o.write(">>" + k + "\n") 79 o.write(">>" + k + "\n")
80 o.write(refdic[k] + "\n") 80 o.write(refdic[k] + "\n")
81 for seq in outputdic[k]: 81 for seq in outputdic[k]:
82 #print seq 82 #print seq
83 o.write(">" + seq[0] + "\n") 83 o.write(">" + seq[0] + "\n")
84 o.write(seq[1] + "\n") 84 o.write(seq[1] + "\n")
85 else: 85 else:
86 print k + " not in reference, skipping " + k 86 print(k + " not in reference, skipping " + k)