Mercurial > repos > davidvanzessen > shm_csr
comparison baseline/script_imgt.py @ 83:729738462297 draft
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
| author | rhpvorderman |
|---|---|
| date | Wed, 15 Sep 2021 12:24:06 +0000 |
| parents | b6f9a640e098 |
| children |
comparison
equal
deleted
inserted
replaced
| 82:a103134ee6e0 | 83:729738462297 |
|---|---|
| 8 parser.add_argument("--output", help="Output file") | 8 parser.add_argument("--output", help="Output file") |
| 9 parser.add_argument("--id", help="ID to be used at the '>>>' line in the output") | 9 parser.add_argument("--id", help="ID to be used at the '>>>' line in the output") |
| 10 | 10 |
| 11 args = parser.parse_args() | 11 args = parser.parse_args() |
| 12 | 12 |
| 13 print "script_imgt.py" | 13 print("script_imgt.py") |
| 14 print "input:", args.input | 14 print("input:", args.input) |
| 15 print "ref:", args.ref | 15 print("ref:", args.ref) |
| 16 print "output:", args.output | 16 print("output:", args.output) |
| 17 print "id:", args.id | 17 print("id:", args.id) |
| 18 | 18 |
| 19 refdic = dict() | 19 refdic = dict() |
| 20 with open(args.ref, 'rU') as ref: | 20 with open(args.ref, 'rU') as ref: |
| 21 currentSeq = "" | 21 currentSeq = "" |
| 22 currentId = "" | 22 currentId = "" |
| 28 currentSeq = "" | 28 currentSeq = "" |
| 29 else: | 29 else: |
| 30 currentSeq += line.rstrip() | 30 currentSeq += line.rstrip() |
| 31 refdic[currentId[1:]] = currentSeq | 31 refdic[currentId[1:]] = currentSeq |
| 32 | 32 |
| 33 print "Have", str(len(refdic)), "reference sequences" | 33 print("Have", str(len(refdic)), "reference sequences") |
| 34 | 34 |
| 35 vPattern = [r"(IGHV[0-9]-[0-9ab]+-?[0-9]?D?\*\d{1,2})"]#, | 35 vPattern = [r"(IGHV[0-9]-[0-9ab]+-?[0-9]?D?\*\d{1,2})"]#, |
| 36 # r"(TRBV[0-9]{1,2}-?[0-9]?-?[123]?)", | 36 # r"(TRBV[0-9]{1,2}-?[0-9]?-?[123]?)", |
| 37 # r"(IGKV[0-3]D?-[0-9]{1,2})", | 37 # r"(IGKV[0-3]D?-[0-9]{1,2})", |
| 38 # r"(IGLV[0-9]-[0-9]{1,2})", | 38 # r"(IGLV[0-9]-[0-9]{1,2})", |
| 72 outputdic[ref] += [(linesplt[0].replace(">", ""), linesplt[2].replace(">", "").rstrip())] | 72 outputdic[ref] += [(linesplt[0].replace(">", ""), linesplt[2].replace(">", "").rstrip())] |
| 73 else: | 73 else: |
| 74 outputdic[ref] = [(linesplt[0].replace(">", ""), linesplt[2].replace(">", "").rstrip())] | 74 outputdic[ref] = [(linesplt[0].replace(">", ""), linesplt[2].replace(">", "").rstrip())] |
| 75 #print outputdic | 75 #print outputdic |
| 76 | 76 |
| 77 for k in outputdic.keys(): | 77 for k in list(outputdic.keys()): |
| 78 if k in refdic: | 78 if k in refdic: |
| 79 o.write(">>" + k + "\n") | 79 o.write(">>" + k + "\n") |
| 80 o.write(refdic[k] + "\n") | 80 o.write(refdic[k] + "\n") |
| 81 for seq in outputdic[k]: | 81 for seq in outputdic[k]: |
| 82 #print seq | 82 #print seq |
| 83 o.write(">" + seq[0] + "\n") | 83 o.write(">" + seq[0] + "\n") |
| 84 o.write(seq[1] + "\n") | 84 o.write(seq[1] + "\n") |
| 85 else: | 85 else: |
| 86 print k + " not in reference, skipping " + k | 86 print(k + " not in reference, skipping " + k) |
