Mercurial > repos > guerler > springsuite
annotate spring_package/Utilities.py @ 39:172398348efd draft
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
| author | guerler | 
|---|---|
| date | Fri, 22 Jan 2021 15:50:27 +0000 | 
| parents | |
| children | 
| rev | line source | 
|---|---|
| 
39
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
1 from os.path import isfile | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
2 | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
3 | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
4 def validateIdentifier(identifier): | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
5 if len(identifier) < 6 or identifier[4:5] != "_": | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
6 raise Exception("Invalid list entry (`PDB_CHAIN`): %s." % identifier) | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
7 | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
8 | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
9 def getId(identifier): | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
10 identifier = identifier.strip() | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
11 validateIdentifier(identifier) | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
12 return identifier[:4].upper() + identifier[4:6] | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
13 | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
14 | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
15 def getChain(identifier): | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
16 validateIdentifier(identifier) | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
17 pdbChain = identifier[5:6] | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
18 return pdbChain | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
19 | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
20 | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
21 def getName(identifier): | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
22 pdb = identifier[:4].lower() | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
23 return pdb | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
24 | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
25 | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
26 def getCrossReference(crossReferenceFile, allPartners=False): | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
27 crossReference = dict() | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
28 crossCount = 0 | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
29 with open(crossReferenceFile) as file: | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
30 for line in file: | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
31 columns = line.split() | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
32 if len(columns) < 2: | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
33 raise Exception("Invalid Cross Reference Entry %s." % line) | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
34 core = columns[0] | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
35 partner = columns[1] | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
36 if len(columns) < 4: | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
37 templates = [core, partner] | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
38 else: | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
39 templates = [columns[2], columns[3]] | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
40 if core not in crossReference: | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
41 crossReference[core] = dict(partners=list(), templates=list()) | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
42 if allPartners or partner not in crossReference[core]["partners"]: | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
43 crossReference[core]["partners"].append(partner) | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
44 crossReference[core]["templates"].append(templates) | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
45 crossCount = crossCount + 1 | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
46 print("Identified %s reference interactions." % crossCount) | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
47 return crossReference | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
48 | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
49 | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
50 def getTemplates(hhrFile, minScore=10): | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
51 result = dict() | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
52 topTemplate = None | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
53 if isfile(hhrFile): | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
54 with open(hhrFile) as file: | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
55 for index, line in enumerate(file): | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
56 if index > 8: | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
57 if not line.strip(): | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
58 break | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
59 templateId = line[4:10] | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
60 templateScore = float(line[57:63]) | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
61 if templateScore > minScore: | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
62 if topTemplate is None: | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
63 topTemplate = templateId | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
64 result[templateId] = templateScore | 
| 
 
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
 
guerler 
parents:  
diff
changeset
 | 
65 return topTemplate, result | 
