Mercurial > repos > erasmus-medical-center > hla_dq
comparison hla_dq.py @ 0:10a407fb5072 draft
planemo upload for repository https://github.com/ErasmusMC-Bioinformatics/galaxytools-emc/tree/master/tools/hla_dq commit d6273a8247a1cbb7df2b26b9e97cd1bd3faa4f61
author | erasmus-medical-center |
---|---|
date | Wed, 30 May 2018 07:50:21 -0400 |
parents | |
children | 4fc47a3ff9e8 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:10a407fb5072 |
---|---|
1 #!/usr/bin/env python | |
2 ''' | |
3 Given HLA-A and HLA-B genes annotated with BLAST IMGT/HLA database | |
4 genotypes, determine associated serotypes | |
5 | |
6 | |
7 DQA1 | DQB1 | type | |
8 -------------------- | |
9 02:01| 02:02| DQ2.2 | |
10 03:03| 02:02| DQ2.3 | |
11 05:01| 02:01| DQ2.3 | |
12 03:01| 03:02| DQ8 | |
13 03:02| 03:02| DQ8 | |
14 03:03| 03:02| DQ8 | |
15 | |
16 Annotations are of form DQ[A|B]1*xx:yy[:zz[:vv]] where xx:yy is of interest. | |
17 | |
18 Example: "HLA:HLA11066 DQA1*01:05:02 768 bp" | |
19 ''' | |
20 | |
21 import argparse | |
22 import itertools | |
23 | |
24 | |
25 def to_matrix(l, n): | |
26 return [l[i:i+n] for i in range(0, len(l), n)] | |
27 | |
28 | |
29 def get_list_of_associated_types(filename, column): | |
30 with open(filename) as f: | |
31 contents = f.readlines() | |
32 | |
33 return [':'.join(line.split('\t')[column-1].split(' ')[1] | |
34 .split('*')[1].split(':')[0:2]) | |
35 for line in contents[1:]] | |
36 | |
37 | |
38 def get_associations(typesA, typesB): | |
39 ''' Given list of genotype annotations (e.g. DQA1*02:01..) | |
40 from A and B, determine possible associated serotypes | |
41 ''' | |
42 | |
43 ''' each combination of DQA1,DQB1,type ''' | |
44 associated_combinations = [ | |
45 ['02:01', '02:02', 'DQ2.2'], | |
46 ['03:03', '02:02', 'DQ2.3'], | |
47 ['05:01', '02:01', 'DQ2.5'], | |
48 ['03:01', '03:02', 'DQ8'], | |
49 ['03:02', '03:02', 'DQ8'], | |
50 ['03:03', '03:02', 'DQ8']] | |
51 | |
52 return [a[2] for a in associated_combinations | |
53 if a[0] in typesA and a[1] in typesB] | |
54 | |
55 | |
56 if __name__ == "__main__": | |
57 parser = argparse.ArgumentParser() | |
58 parser.add_argument('-A', required=True, action='append', | |
59 help='BLAST hits A gene') | |
60 parser.add_argument('-B', required=True, action='append', | |
61 help='BLAST hits B gene') | |
62 parser.add_argument('-c', '--column', default=5, type=int, | |
63 help='Column number containing the BLAST annotation') | |
64 args = parser.parse_args() | |
65 | |
66 # TODO: QC check that file A contains DQA1 annotations and | |
67 # B file contains DQB1 annotations? | |
68 | |
69 # find possible associated types, for all combinations of alleles | |
70 typesA = [get_list_of_associated_types(A, args.column) for A in args.A] | |
71 typesB = [get_list_of_associated_types(B, args.column) for B in args.B] | |
72 associations = [get_associations(c[0], c[1]) | |
73 for c in itertools.product(typesA, typesB)] | |
74 associations = to_matrix(associations, len(args.B)) | |
75 | |
76 # write output table | |
77 header = '\t'+'\t'.join(['B'+str(i+1) for i in range(0, len(args.B))])+'\n' | |
78 bcount = 0 | |
79 with open('results.tsv', 'w') as outfile: | |
80 outfile.write(header) | |
81 for line in associations: | |
82 bcount += 1 | |
83 outfile.write( | |
84 'A' + str(bcount) + '\t' + | |
85 '\t'.join([';'.join(sorted(set(l))) | |
86 if l else '-' for l in line])+'\n') |