Mercurial > repos > davidvanzessen > shm_csr
comparison baseline/script_imgt.py @ 83:729738462297 draft
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
author | rhpvorderman |
---|---|
date | Wed, 15 Sep 2021 12:24:06 +0000 |
parents | b6f9a640e098 |
children |
comparison
equal
deleted
inserted
replaced
82:a103134ee6e0 | 83:729738462297 |
---|---|
8 parser.add_argument("--output", help="Output file") | 8 parser.add_argument("--output", help="Output file") |
9 parser.add_argument("--id", help="ID to be used at the '>>>' line in the output") | 9 parser.add_argument("--id", help="ID to be used at the '>>>' line in the output") |
10 | 10 |
11 args = parser.parse_args() | 11 args = parser.parse_args() |
12 | 12 |
13 print "script_imgt.py" | 13 print("script_imgt.py") |
14 print "input:", args.input | 14 print("input:", args.input) |
15 print "ref:", args.ref | 15 print("ref:", args.ref) |
16 print "output:", args.output | 16 print("output:", args.output) |
17 print "id:", args.id | 17 print("id:", args.id) |
18 | 18 |
19 refdic = dict() | 19 refdic = dict() |
20 with open(args.ref, 'rU') as ref: | 20 with open(args.ref, 'rU') as ref: |
21 currentSeq = "" | 21 currentSeq = "" |
22 currentId = "" | 22 currentId = "" |
28 currentSeq = "" | 28 currentSeq = "" |
29 else: | 29 else: |
30 currentSeq += line.rstrip() | 30 currentSeq += line.rstrip() |
31 refdic[currentId[1:]] = currentSeq | 31 refdic[currentId[1:]] = currentSeq |
32 | 32 |
33 print "Have", str(len(refdic)), "reference sequences" | 33 print("Have", str(len(refdic)), "reference sequences") |
34 | 34 |
35 vPattern = [r"(IGHV[0-9]-[0-9ab]+-?[0-9]?D?\*\d{1,2})"]#, | 35 vPattern = [r"(IGHV[0-9]-[0-9ab]+-?[0-9]?D?\*\d{1,2})"]#, |
36 # r"(TRBV[0-9]{1,2}-?[0-9]?-?[123]?)", | 36 # r"(TRBV[0-9]{1,2}-?[0-9]?-?[123]?)", |
37 # r"(IGKV[0-3]D?-[0-9]{1,2})", | 37 # r"(IGKV[0-3]D?-[0-9]{1,2})", |
38 # r"(IGLV[0-9]-[0-9]{1,2})", | 38 # r"(IGLV[0-9]-[0-9]{1,2})", |
72 outputdic[ref] += [(linesplt[0].replace(">", ""), linesplt[2].replace(">", "").rstrip())] | 72 outputdic[ref] += [(linesplt[0].replace(">", ""), linesplt[2].replace(">", "").rstrip())] |
73 else: | 73 else: |
74 outputdic[ref] = [(linesplt[0].replace(">", ""), linesplt[2].replace(">", "").rstrip())] | 74 outputdic[ref] = [(linesplt[0].replace(">", ""), linesplt[2].replace(">", "").rstrip())] |
75 #print outputdic | 75 #print outputdic |
76 | 76 |
77 for k in outputdic.keys(): | 77 for k in list(outputdic.keys()): |
78 if k in refdic: | 78 if k in refdic: |
79 o.write(">>" + k + "\n") | 79 o.write(">>" + k + "\n") |
80 o.write(refdic[k] + "\n") | 80 o.write(refdic[k] + "\n") |
81 for seq in outputdic[k]: | 81 for seq in outputdic[k]: |
82 #print seq | 82 #print seq |
83 o.write(">" + seq[0] + "\n") | 83 o.write(">" + seq[0] + "\n") |
84 o.write(seq[1] + "\n") | 84 o.write(seq[1] + "\n") |
85 else: | 85 else: |
86 print k + " not in reference, skipping " + k | 86 print(k + " not in reference, skipping " + k) |