Mercurial > repos > guerler > springsuite
changeset 41:f316caf098a6 draft default tip
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
author | guerler |
---|---|
date | Mon, 01 Mar 2021 15:02:36 +0000 (2021-03-01) |
parents | 06337927c198 |
children | |
files | spring_mcc.py spring_mcc.xml spring_model_all.py spring_package/Modeller.py spring_package/__pycache__/Modeller.cpython-38.pyc spring_package/__pycache__/Modeller.cpython-39.pyc test-data/mcc/human_hv1h2.png test-data/model/log.txt |
diffstat | 8 files changed, 80 insertions(+), 74 deletions(-) [+] |
line wrap: on
line diff
--- a/spring_mcc.py Sat Jan 23 14:42:46 2021 +0000 +++ b/spring_mcc.py Mon Mar 01 15:02:36 2021 +0000 @@ -1,9 +1,20 @@ #! /usr/bin/env python import argparse import math +import pandas as pd from os.path import isfile import re -from matplotlib import pyplot as plt + +METHODS = ["Biochemical Activity", + "Co-fractionation", + "Co-localization", + "Far Western", + "FRET", + "PCA", + "Co-crystal Structure", + "Co-purification", + "Two-hybrid", + "Affinity Capture-MS"] def getIds(rawIds): @@ -213,8 +224,8 @@ filterAList = sorted(locations[regionA]) filterBList = sorted(locations[regionB]) else: - filterAList = list(filterA) - filterBList = list(filterB) + filterAList = sorted(filterA) + filterBList = sorted(filterB) for i, j in randomPairs(len(filterAList), len(filterBList), jSize): nameA = filterAList[i] nameB = filterBList[j] @@ -251,56 +262,57 @@ filterB = filterA # identify biogrid filter options - filterValues = list() - filterValues.append([11, args.method]) - - # process biogrid database - print("Loading positive set from BioGRID file...") - positive, positiveCount = getReference(args.biogrid, aCol=23, bCol=26, - separator="\t", filterA=filterA, - filterB=filterB, skipFirstLine=True, - filterValues=filterValues) + performance = dict() + for methodReference in METHODS: - # estimate negative set - negative = getNegativeSet(args, filterA, filterB, positiveCount) + # process biogrid database + print("Loading positive set from BioGRID file (%s)..." % methodReference) + filterValues = [[11, methodReference]] + positive, positiveCount = getReference(args.biogrid, aCol=23, bCol=26, + separator="\t", filterA=filterA, + filterB=filterB, skipFirstLine=True, + filterValues=filterValues) - # get prediction results - print("Loading prediction file...") - prediction, _ = getReference(args.input, scoreCol=2, minScore=0.8) - mcc = getMCC(prediction, positive, positiveCount, negative) - yValues = [mcc] - yTicks = ["SPRING"] + # estimate negative set + negative = getNegativeSet(args, filterA, filterB, positiveCount) - # identify biogrid filter options - for method in ["Affinity Capture-MS", - "Biochemical Activity", - "Co-crystal Structure", - "Co-fractionation", - "Co-localization", - "Co-purification", - "Far Western", - "FRET", - "PCA", - "Reconstituted Complex", - "Two-hybrid"]: - if args.method != method: - print("Method: %s" % method) - filterValues = [[11, method]] - prediction, _ = getReference(args.biogrid, aCol=23, bCol=26, - separator="\t", filterA=filterA, - filterB=filterB, skipFirstLine=True, - filterValues=filterValues) - mcc = getMCC(prediction, positive, positiveCount, negative) - yValues.append(mcc) - yTicks.append(method) + # evaluate other methods + yValues = list() + for method in METHODS: + if methodReference != method: + print("Method: %s" % method) + filterValues = [[11, method]] + prediction, _ = getReference(args.biogrid, aCol=23, bCol=26, + separator="\t", filterA=filterA, + filterB=filterB, skipFirstLine=True, + filterValues=filterValues) + mcc = getMCC(prediction, positive, positiveCount, negative) + yValues.append(mcc) + else: + yValues.append(0.0) + + # add results to performance dication + performance[methodReference] = yValues + + # get and append prediction results + print("Loading prediction file...") + prediction, _ = getReference(args.input, scoreCol=2, minScore=0.0) + mcc = getMCC(prediction, positive, positiveCount, negative) + performance[methodReference].append(mcc) + + # build yTicks + yTicks = METHODS[:] + yTicks.append("SPRING") # create plot print("Producing plot data...") print("Total count in prediction file: %d." % len(prediction)) print("Total count in positive file: %d." % len(positive)) - plt.xlabel("Matthews-Correlation Coefficient (MCC)") - plt.title("Positive set: %s" % args.method) - plt.barh(yTicks, yValues) + df = pd.DataFrame(performance, index=yTicks) + ax = df.plot.barh() + ax.set_title(args.experiment) + ax.set_xlabel("Matthews-Correlation Coefficient (MCC)") + plt = ax.get_figure() plt.tight_layout() plt.savefig(args.output, format="png") @@ -314,7 +326,7 @@ parser.add_argument('-rb', '--region_b', help='Second subcellular location', required=False) parser.add_argument('-n', '--negative', help='Negative set (2-columns)', required=False) parser.add_argument('-t', '--throughput', help='Throughput (low/high)', required=False) - parser.add_argument('-m', '--method', help='Method e.g. Two-hybrid', required=False) + parser.add_argument('-e', '--experiment', help='Experiment Title', required=False, default="Results") parser.add_argument('-o', '--output', help='Output (png)', required=True) args = parser.parse_args() main(args)
--- a/spring_mcc.xml Sat Jan 23 14:42:46 2021 +0000 +++ b/spring_mcc.xml Mon Mar 01 15:02:36 2021 +0000 @@ -2,11 +2,13 @@ <description>plot generator</description> <requirements> <requirement type="package" version="3.3.2">matplotlib</requirement> + <requirement type="package" version="1.2.0">pandas</requirement> </requirements> <command detect_errors="exit_code"><![CDATA[ - python3 '$__tool_directory__/spring_mcc.py' -i '$input' -b '$database' -m '$method' -l '$sampling.locations' -ra '$sampling.regiona' -rb '$sampling.regionb' -n '$sampling.negative' -o '$output' + python3 '$__tool_directory__/spring_mcc.py' -i '$input' -b '$database' -e '$experiment' -l '$sampling.locations' -ra '$sampling.regiona' -rb '$sampling.regionb' -n '$sampling.negative' -o '$output' ]]></command> <inputs> + <param name="experiment" type="text" label="Title" help="Experiment title" value="Results"/> <param name="input" type="data" format="tabular" label="Interacting Pairs" help="Prediction input containing UniProt Accession codes (2-columns)."/> <conditional name="sampling"> <param name="type" type="select" label="Non-interacting Pairs" display="radio" help="Specify how to determine non-interacting pairs."> @@ -50,19 +52,6 @@ </when> </conditional> <param name="database" type="data" format="tabular" label="BioGRID Database" help="BioGRID Database in TAB 3.0 format."/> - <param name="method" type="select" label="Experimental Method" help="Choose a specific experimental method name."> - <option value="Affinity Capture-MS">Affinity Capture-MS</option> - <option value="Biochemical Activity">Biochemical Activity</option> - <option value="Co-crystal Structure">Co-crystal Structure</option> - <option value="Co-fractionation">Co-fractionation</option> - <option value="Co-localization">Co-localization</option> - <option value="Co-purification">Co-purification</option> - <option value="Far Western">Far Western</option> - <option value="FRET">FRET</option> - <option value="PCA">PCA</option> - <option value="Reconstituted Complex">Reconstituted Complex</option> - <option value="Two-hybrid">Two-hybrid</option> - </param> </inputs> <outputs> <data format="png" name="output" label="SPRING MCC Image" /> @@ -71,7 +60,6 @@ <test> <param name="input" value="mcc/human_hv1h2.txt" /> <param name="database" value="mcc/biogrid_fret.txt" /> - <param name="method" value="FRET" /> <output name="output" file="mcc/human_hv1h2.png" /> </test> </tests>
--- a/spring_model_all.py Sat Jan 23 14:42:46 2021 +0000 +++ b/spring_model_all.py Mon Mar 01 15:02:36 2021 +0000 @@ -34,7 +34,7 @@ mkdir("temp") dbkit = DBKit(args.hhr_index, args.hhr_database) logFile = open(args.log, "w") - logFile.write("#namea\t nameb\t springscore\t tmscore\t energy\t clashes\n") + logFile.write("#namea\t nameb\t springscore\t tmscore\t energy\t clashes\t zscore\t templatea\t templateb\n") with open(args.pairs, "r") as file: for line in file: param = line.split() @@ -52,12 +52,17 @@ modelArgs.set(a_hhr=aFile, b_hhr=bFile, output=output) modelData = createModel(modelArgs) if modelData: - infoStr = "%s\t %s\t %5.2f\t %5.2f\t %5.2f\t %5.2f\n" % (aIdentifier, bIdentifier, - modelData["springscore"], - modelData["tmscore"], - modelData["energy"], - modelData["clashes"]) + infoStr = "%s\t %s\t %5.2f\t %5.2f\t %5.2f\t %5.2f\t %5.2f\t %s\t %s\n" + infoStr = infoStr % (aIdentifier, bIdentifier, + modelData["springscore"], + modelData["tmscore"], + modelData["energy"], + modelData["clashes"], + modelData["zscore"], + modelData["aTemplate"], + modelData["bTemplate"]) logFile.write(infoStr) + logFile.flush() if isfile(aFile): remove(aFile) if isfile(bFile):
--- a/spring_package/Modeller.py Sat Jan 23 14:42:46 2021 +0000 +++ b/spring_package/Modeller.py Mon Mar 01 15:02:36 2021 +0000 @@ -85,7 +85,7 @@ templateIndex = 0 for i in range(len(alignment)): t = templateAlign[i] - if alignment[i] == ":": + if alignment[i] in [":", "."]: templateResidue = templateResidues[templateIndex] templateResidue["alignedResidue"] = modelAlign[i] aligned.append(templateResidue) @@ -112,7 +112,7 @@ if templateHit["score"] < minScore or maxTries == 0: break maxTries = maxTries - 1 - yield templateHit["templatePair"] + yield templateHit["templatePair"], templateHit["score"] def createModel(args): @@ -139,7 +139,7 @@ maxInfo = None minScore = float(args.minscore) maxTries = int(args.maxtries) - for [aTemplate, bTemplate] in getFrameworks(aTemplates, bTemplates, crossReference, minScore=minScore, maxTries=maxTries): + for [aTemplate, bTemplate], zscore in getFrameworks(aTemplates, bTemplates, crossReference, minScore=minScore, maxTries=maxTries): print("Evaluating Complex Template: %s." % aTemplate) templateFile = "temp/template.pdb" createPDB(aTemplate, pdbDatabase, templateFile) @@ -168,6 +168,7 @@ print(str(e)) continue biomolFound = True + print(" zscore:\t%5.2f" % zscore) tmscore = min(coreScore, partnerScore) print(" tmscore:\t%5.2f" % tmscore) energy = -interfaceEnergy.get(coreAligned, partnerAligned) @@ -178,7 +179,7 @@ print(" springscore:\t%5.2f" % springscore) if springscore > maxScore and clashes < args.maxclashes: maxScore = springscore - maxInfo = dict(springscore=springscore, tmscore=tmscore, energy=energy, clashes=clashes) + maxInfo = dict(aTemplate=aTemplate, bTemplate=bTemplate, springscore=springscore, tmscore=tmscore, energy=energy, clashes=clashes, zscore=zscore) coreMolecule.save(outputName, chainName="0") partnerMolecule.save(outputName, chainName="1", append=True) if args.showtemplate == "true": @@ -188,7 +189,7 @@ if maxInfo is not None: print("Final Model:") for key in maxInfo: - print(" %s:\t%5.2f" % (key, maxInfo[key])) + print(" %s:\t%s" % (key, maxInfo[key])) print("Completed.") else: print("Warning: Failed to determine model.")
--- a/test-data/model/log.txt Sat Jan 23 14:42:46 2021 +0000 +++ b/test-data/model/log.txt Mon Mar 01 15:02:36 2021 +0000 @@ -1,2 +1,2 @@ -#namea nameb springscore tmscore energy clashes -sp|Q9BYF1|ACE2_HUMAN sp|P0DTC2|SPIKE_SARS2 0.91 0.82 -8.92 0.00 +#namea nameb springscore tmscore energy clashes zscore templatea templateb +sp|Q9BYF1|ACE2_HUMAN sp|P0DTC2|SPIKE_SARS2 0.91 0.82 -8.92 0.00 406.00 6LZG_A 6LZG_B