Mercurial > repos > guerler > springsuite
view spring_minz.py @ 40:06337927c198 draft
"planemo upload commit 68723d88e81923739538c34722bc9be164dd4646"
author | guerler |
---|---|
date | Sat, 23 Jan 2021 14:42:46 +0000 |
parents | 172398348efd |
children |
line wrap: on
line source
#! /usr/bin/env python3 import argparse from spring_package.Utilities import getCrossReference, getTemplates def main(args): minScore = args.minscore logFile = open(args.log, 'w') targets = list() targetPath = args.targetpath.rstrip("/") hhrResults = dict() with open(args.targetlist) as file: for line in file: name = line.strip() targets.append(name) print("Loaded %s target names from `%s`." % (len(targets), args.targetlist)) for targetName in targets: targetFile = "%s/%s" % (targetPath, targetName) hhrResults[targetName] = getTemplates(targetFile, minScore) if args.inputlist: inputs = list() inputPath = args.inputpath.rstrip("/") with open(args.inputlist) as file: for line in file: name = line.strip() inputs.append(name) print("Loaded %s input names from `%s`." % (len(inputs), args.inputlist)) for inputName in inputs: if inputName not in hhrResults: inputFile = "%s/%s" % (inputPath, inputName) hhrResults[inputName] = getTemplates(inputFile, minScore) else: inputs = targets print("Loaded hhr results for %s entries." % len(hhrResults.keys())) crossReference = getCrossReference(args.cross) print("Loaded cross reference from `%s`." % args.cross) interactions = dict() for targetName in targets: matchScores(hhrResults=hhrResults, targetName=targetName, inputs=inputs, crossReference=crossReference, minScore=args.minscore, logFile=logFile, interactions=interactions) interactions = sorted(interactions.values(), key=lambda item: item["minZ"], reverse=True) with open(args.output, 'w') as output_file: for entry in interactions: output_file.write("%s\t%s\t%s\t%s\n" % (entry["targetName"], entry["inputName"], entry["minZ"], entry["minInfo"])) logFile.close() def matchScores(hhrResults, targetName, inputs, crossReference, minScore, logFile, interactions): if targetName not in hhrResults: print("Target not found `%s`" % targetName) else: targetTop, targetHits = hhrResults[targetName] print("Evaluating %s." % targetName) logFile.write("Evaluating %s.\n" % targetName) logFile.flush() for inputName in inputs: if inputName in hhrResults: inputTop, inputHits = hhrResults[inputName] minZ = 0 minInfo = "" for t in targetHits: if t in crossReference: partners = crossReference[t]["partners"] for p in partners: if p in inputHits: score = min(targetHits[t], inputHits[p]) if score > minZ: minZ = score minInfo = "%s\t%s\t%s\t%s" % (targetTop, inputTop, t, p) if minZ > minScore: if targetName > inputName: interactionKey = "%s_%s" % (targetName, inputName) else: interactionKey = "%s_%s" % (inputName, targetName) if interactionKey in interactions: if interactions[interactionKey]["minZ"] >= minZ: continue interactions[interactionKey] = dict(targetName=targetName, inputName=inputName, minZ=minZ, minInfo=minInfo) if __name__ == "__main__": parser = argparse.ArgumentParser(description='This script identifies interactions by detecting matching HH-search results.') parser.add_argument('-tl', '--targetlist', help='Text file containing identifiers.', required=True) parser.add_argument('-tp', '--targetpath', help='Directory containing `hhr` files', required=True) parser.add_argument('-il', '--inputlist', help='Text file containing identifiers.', required=False) parser.add_argument('-ip', '--inputpath', help='Directory containing `hhr` files', required=False) parser.add_argument('-c', '--cross', help='PDB Cross Reference', required=True) parser.add_argument('-o', '--output', help='Output file containing min-Z scores', required=True) parser.add_argument('-l', '--log', help='Log file', required=True) parser.add_argument('-m', '--minscore', help='min-Z score threshold', type=int, default=25) args = parser.parse_args() main(args)