Mercurial > repos > guerler > springsuite
comparison spring_minz.py @ 29:41353488926c draft
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
author | guerler |
---|---|
date | Sun, 22 Nov 2020 14:15:24 +0000 |
parents | e34da554d415 |
children | 172398348efd |
comparison
equal
deleted
inserted
replaced
28:75d1aedc9b3f | 29:41353488926c |
---|---|
1 #! /usr/bin/env python3 | 1 #! /usr/bin/env python3 |
2 import argparse | 2 import argparse |
3 import os | 3 import os |
4 | |
4 | 5 |
5 def main(args): | 6 def main(args): |
6 logFile = open(args.log, 'a+') | 7 logFile = open(args.log, 'a+') |
7 targets = list() | 8 targets = list() |
8 targetPath = args.targetpath.rstrip("/") | 9 targetPath = args.targetpath.rstrip("/") |
9 with open(args.targetlist) as file: | 10 with open(args.targetlist) as file: |
10 for index, line in enumerate(file): | 11 for line in file: |
11 name = line.strip() | 12 name = line.strip() |
12 targets.append(name) | 13 targets.append(name) |
13 print ("Loaded %s target names from `%s`." % (len(targets), args.targetlist)) | 14 print("Loaded %s target names from `%s`." % (len(targets), |
15 args.targetlist)) | |
14 if args.inputlist: | 16 if args.inputlist: |
15 inputs = list() | 17 inputs = list() |
16 inputPath = args.inputpath.rstrip("/") | 18 inputPath = args.inputpath.rstrip("/") |
17 with open(args.inputlist) as file: | 19 with open(args.inputlist) as file: |
18 for index, line in enumerate(file): | 20 for line in file: |
19 name = line.strip() | 21 name = line.strip() |
20 inputs.append(name) | 22 inputs.append(name) |
21 print ("Loaded %s input names from `%s`." % (len(inputs), args.inputlist)) | 23 print("Loaded %s input names from `%s`." % (len(inputs), |
24 args.inputlist)) | |
22 else: | 25 else: |
23 inputs = targets | 26 inputs = targets |
24 inputPath = targetPath | 27 inputPath = targetPath |
25 crossReference = dict() | 28 crossReference = dict() |
26 with open(args.crossreference) as file: | 29 with open(args.crossreference) as file: |
27 for index, line in enumerate(file): | 30 for line in file: |
28 columns = line.split() | 31 columns = line.split() |
29 core = columns[0] | 32 core = columns[0] |
30 partner = columns[-1] | 33 partner = columns[-1] |
31 if core not in crossReference: | 34 if core not in crossReference: |
32 crossReference[core] = [] | 35 crossReference[core] = [] |
33 crossReference[core].append(partner) | 36 crossReference[core].append(partner) |
34 print ("Loaded cross reference from `%s`." % args.crossreference) | 37 print("Loaded cross reference from `%s`." % args.crossreference) |
35 interactions = dict() | 38 interactions = dict() |
36 for targetName in targets: | 39 for targetName in targets: |
37 targetFile = "%s/%s" % (targetPath, targetName) | 40 targetFile = "%s/%s" % (targetPath, targetName) |
38 matchScores(targetFile=targetFile, | 41 matchScores(targetFile=targetFile, |
39 targetName=targetName, | 42 targetName=targetName, |
55 crossReference=crossReference, | 58 crossReference=crossReference, |
56 minScore=args.minscore, | 59 minScore=args.minscore, |
57 idLength=args.idlength, | 60 idLength=args.idlength, |
58 logFile=logFile, | 61 logFile=logFile, |
59 interactions=interactions) | 62 interactions=interactions) |
60 interactions = sorted(interactions.values(), key=lambda item: item["minZ"], reverse=True) | 63 interactions = sorted(interactions.values(), key=lambda item: item["minZ"], |
64 reverse=True) | |
61 with open(args.output, 'w') as output_file: | 65 with open(args.output, 'w') as output_file: |
62 for entry in interactions: | 66 for entry in interactions: |
63 output_file.write("%s\t%s\t%s\t%s\n" % (entry["targetName"], entry["inputName"], entry["minZ"], entry["minInfo"])) | 67 output_file.write("%s\t%s\t%s\t%s\n" % (entry["targetName"], |
68 entry["inputName"], entry["minZ"], | |
69 entry["minInfo"])) | |
64 logFile.close() | 70 logFile.close() |
65 | 71 |
66 def matchScores(targetFile, targetName, inputs, inputPath, crossReference, minScore, idLength, logFile, interactions): | 72 |
73 def matchScores(targetFile, targetName, inputs, inputPath, crossReference, | |
74 minScore, idLength, logFile, interactions): | |
67 targetTop, targetHits = getTemplateScores(targetFile, minScore, idLength) | 75 targetTop, targetHits = getTemplateScores(targetFile, minScore, idLength) |
68 if not targetHits: | 76 if not targetHits: |
69 print("No targets found `%s`" % targetFile) | 77 print("No targets found `%s`" % targetFile) |
70 else: | 78 else: |
71 print ("Loaded target scores from `%s`." % targetFile) | 79 print("Loaded target scores from `%s`." % targetFile) |
72 for inputName in inputs: | 80 for inputName in inputs: |
73 inputFile = "%s/%s" % (inputPath, inputName) | 81 inputFile = "%s/%s" % (inputPath, inputName) |
74 inputTop, inputHits = getTemplateScores(inputFile, minScore, idLength) | 82 inputTop, inputHits = getTemplateScores(inputFile, |
83 minScore, idLength) | |
75 minZ = 0 | 84 minZ = 0 |
76 minInfo = "" | 85 minInfo = "" |
77 for t in targetHits: | 86 for t in targetHits: |
78 if t in crossReference: | 87 if t in crossReference: |
79 partners = crossReference[t] | 88 partners = crossReference[t] |
80 for p in partners: | 89 for p in partners: |
81 if p in inputHits: | 90 if p in inputHits: |
82 score = min(targetHits[t], inputHits[p]) | 91 score = min(targetHits[t], inputHits[p]) |
83 if score > minZ: | 92 if score > minZ: |
84 minZ = score | 93 minZ = score |
85 minInfo = "%s\t%s\t%s\t%s" % (targetTop, inputTop, t, p) | 94 minInfo = "%s\t%s\t%s\t%s" % (targetTop, |
95 inputTop, t, p) | |
86 if minZ > minScore: | 96 if minZ > minScore: |
87 if targetName > inputName: | 97 if targetName > inputName: |
88 interactionKey = "%s_%s" % (targetName, inputName) | 98 interactionKey = "%s_%s" % (targetName, inputName) |
89 else: | 99 else: |
90 interactionKey = "%s_%s" % (inputName, targetName) | 100 interactionKey = "%s_%s" % (inputName, targetName) |
91 if interactionKey in interactions: | 101 if interactionKey in interactions: |
92 if interactions[interactionKey]["minZ"] >= minZ: | 102 if interactions[interactionKey]["minZ"] >= minZ: |
93 continue | 103 continue |
94 interactions[interactionKey] = dict(targetName=targetName, inputName=inputName, minZ=minZ, minInfo=minInfo) | 104 interactions[interactionKey] = dict(targetName=targetName, |
95 logFile.write("Interaction between %s and %s [min-Z: %s].\n" % (targetName, inputName, minZ)) | 105 inputName=inputName, |
106 minZ=minZ, minInfo=minInfo) | |
107 logFile.write("Interaction between %s and %s [min-Z: %s].\n" % | |
108 (targetName, inputName, minZ)) | |
109 | |
96 | 110 |
97 def getTemplateScores(hhrFile, minScore, idLength): | 111 def getTemplateScores(hhrFile, minScore, idLength): |
98 result = dict() | 112 result = dict() |
99 topTemplate = None | 113 topTemplate = None |
100 idLength = idLength + 4 | 114 idLength = idLength + 4 |
110 if topTemplate is None: | 124 if topTemplate is None: |
111 topTemplate = templateId | 125 topTemplate = templateId |
112 result[templateId] = templateScore | 126 result[templateId] = templateScore |
113 return topTemplate, result | 127 return topTemplate, result |
114 | 128 |
129 | |
115 if __name__ == "__main__": | 130 if __name__ == "__main__": |
116 parser = argparse.ArgumentParser(description='This script identifies interactions by detecting matching HH-search results.') | 131 parser = argparse.ArgumentParser(description='This script identifies interactions by detecting matching HH-search results.') |
117 parser.add_argument('-tl', '--targetlist', help='Text file containing identifiers.', required=True) | 132 parser.add_argument('-tl', '--targetlist', help='Text file containing identifiers.', required=True) |
118 parser.add_argument('-tp', '--targetpath', help='Directory containing `hhr` files', required=True) | 133 parser.add_argument('-tp', '--targetpath', help='Directory containing `hhr` files', required=True) |
119 parser.add_argument('-il', '--inputlist', help='Text file containing identifiers.', required=False) | 134 parser.add_argument('-il', '--inputlist', help='Text file containing identifiers.', required=False) |