comparison baseline/script_imgt.py @ 63:8728284105ee draft

Uploaded
author davidvanzessen
date Wed, 06 Dec 2017 08:04:52 -0500
parents 4c5ba6b5d10d
children ba33b94637ca
comparison
equal deleted inserted replaced
62:aa8d37bd1930 63:8728284105ee
8 parser.add_argument("--output", help="Output file") 8 parser.add_argument("--output", help="Output file")
9 parser.add_argument("--id", help="ID to be used at the '>>>' line in the output") 9 parser.add_argument("--id", help="ID to be used at the '>>>' line in the output")
10 10
11 args = parser.parse_args() 11 args = parser.parse_args()
12 12
13 print "script_imgt.py"
14 print "input:", args.input
15 print "ref:", args.ref
16 print "output:", args.output
17 print "id:", args.id
18
13 refdic = dict() 19 refdic = dict()
14 with open(args.ref, 'rU') as ref: 20 with open(args.ref, 'rU') as ref:
15 currentSeq = "" 21 currentSeq = ""
16 currentId = "" 22 currentId = ""
17 for line in ref: 23 for line in ref:
18 if line[0] is ">": 24 if line.startswith(">"):
19 if currentSeq is not "" and currentId is not "": 25 if currentSeq is not "" and currentId is not "":
20 refdic[currentId[1:]] = currentSeq 26 refdic[currentId[1:]] = currentSeq
21 currentId = line.rstrip() 27 currentId = line.rstrip()
22 currentSeq = "" 28 currentSeq = ""
23 else: 29 else:
24 currentSeq += line.rstrip() 30 currentSeq += line.rstrip()
25 refdic[currentId[1:]] = currentSeq 31 refdic[currentId[1:]] = currentSeq
26 32
33 print "Have", str(len(refdic)), "reference sequences"
27 34
28 vPattern = [r"(IGHV[0-9]-[0-9ab]+-?[0-9]?D?\*\d{1,2})"]#, 35 vPattern = [r"(IGHV[0-9]-[0-9ab]+-?[0-9]?D?\*\d{1,2})"]#,
29 # r"(TRBV[0-9]{1,2}-?[0-9]?-?[123]?)", 36 # r"(TRBV[0-9]{1,2}-?[0-9]?-?[123]?)",
30 # r"(IGKV[0-3]D?-[0-9]{1,2})", 37 # r"(IGKV[0-3]D?-[0-9]{1,2})",
31 # r"(IGLV[0-9]-[0-9]{1,2})", 38 # r"(IGLV[0-9]-[0-9]{1,2})",
35 42
36 #vPattern = re.compile(r"|".join(vPattern)) 43 #vPattern = re.compile(r"|".join(vPattern))
37 vPattern = re.compile("|".join(vPattern)) 44 vPattern = re.compile("|".join(vPattern))
38 45
39 def filterGene(s, pattern): 46 def filterGene(s, pattern):
40 s1 = s[s.find(" ") + 1:]
41 return s1[:s1.find(" ")]
42 """
43 if type(s) is not str: 47 if type(s) is not str:
44 return None 48 return None
45 res = pattern.search(s) 49 res = pattern.search(s)
46 if res: 50 if res:
47 return res.group(0) 51 return res.group(0)
48 return None 52 return None
49 """ 53
50 54
51 55
52 currentSeq = "" 56 currentSeq = ""
53 currentId = "" 57 currentId = ""
54 first=True 58 first=True