changeset 41:f316caf098a6 draft default tip

"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
author guerler
date Mon, 01 Mar 2021 15:02:36 +0000 (2021-03-01)
parents 06337927c198
children
files spring_mcc.py spring_mcc.xml spring_model_all.py spring_package/Modeller.py spring_package/__pycache__/Modeller.cpython-38.pyc spring_package/__pycache__/Modeller.cpython-39.pyc test-data/mcc/human_hv1h2.png test-data/model/log.txt
diffstat 8 files changed, 80 insertions(+), 74 deletions(-) [+]
line wrap: on
line diff
--- a/spring_mcc.py	Sat Jan 23 14:42:46 2021 +0000
+++ b/spring_mcc.py	Mon Mar 01 15:02:36 2021 +0000
@@ -1,9 +1,20 @@
 #! /usr/bin/env python
 import argparse
 import math
+import pandas as pd
 from os.path import isfile
 import re
-from matplotlib import pyplot as plt
+
+METHODS = ["Biochemical Activity",
+           "Co-fractionation",
+           "Co-localization",
+           "Far Western",
+           "FRET",
+           "PCA",
+           "Co-crystal Structure",
+           "Co-purification",
+           "Two-hybrid",
+           "Affinity Capture-MS"]
 
 
 def getIds(rawIds):
@@ -213,8 +224,8 @@
             filterAList = sorted(locations[regionA])
             filterBList = sorted(locations[regionB])
         else:
-            filterAList = list(filterA)
-            filterBList = list(filterB)
+            filterAList = sorted(filterA)
+            filterBList = sorted(filterB)
         for i, j in randomPairs(len(filterAList), len(filterBList), jSize):
             nameA = filterAList[i]
             nameB = filterBList[j]
@@ -251,56 +262,57 @@
         filterB = filterA
 
     # identify biogrid filter options
-    filterValues = list()
-    filterValues.append([11, args.method])
-
-    # process biogrid database
-    print("Loading positive set from BioGRID file...")
-    positive, positiveCount = getReference(args.biogrid, aCol=23, bCol=26,
-                                           separator="\t", filterA=filterA,
-                                           filterB=filterB, skipFirstLine=True,
-                                           filterValues=filterValues)
+    performance = dict()
+    for methodReference in METHODS:
 
-    # estimate negative set
-    negative = getNegativeSet(args, filterA, filterB, positiveCount)
+        # process biogrid database
+        print("Loading positive set from BioGRID file (%s)..." % methodReference)
+        filterValues = [[11, methodReference]]
+        positive, positiveCount = getReference(args.biogrid, aCol=23, bCol=26,
+                                               separator="\t", filterA=filterA,
+                                               filterB=filterB, skipFirstLine=True,
+                                               filterValues=filterValues)
 
-    # get prediction results
-    print("Loading prediction file...")
-    prediction, _ = getReference(args.input, scoreCol=2, minScore=0.8)
-    mcc = getMCC(prediction, positive, positiveCount, negative)
-    yValues = [mcc]
-    yTicks = ["SPRING"]
+        # estimate negative set
+        negative = getNegativeSet(args, filterA, filterB, positiveCount)
 
-    # identify biogrid filter options
-    for method in ["Affinity Capture-MS",
-                   "Biochemical Activity",
-                   "Co-crystal Structure",
-                   "Co-fractionation",
-                   "Co-localization",
-                   "Co-purification",
-                   "Far Western",
-                   "FRET",
-                   "PCA",
-                   "Reconstituted Complex",
-                   "Two-hybrid"]:
-        if args.method != method:
-            print("Method: %s" % method)
-            filterValues = [[11, method]]
-            prediction, _ = getReference(args.biogrid, aCol=23, bCol=26,
-                                         separator="\t", filterA=filterA,
-                                         filterB=filterB, skipFirstLine=True,
-                                         filterValues=filterValues)
-            mcc = getMCC(prediction, positive, positiveCount, negative)
-            yValues.append(mcc)
-            yTicks.append(method)
+        # evaluate other methods
+        yValues = list()
+        for method in METHODS:
+            if methodReference != method:
+                print("Method: %s" % method)
+                filterValues = [[11, method]]
+                prediction, _ = getReference(args.biogrid, aCol=23, bCol=26,
+                                             separator="\t", filterA=filterA,
+                                             filterB=filterB, skipFirstLine=True,
+                                             filterValues=filterValues)
+                mcc = getMCC(prediction, positive, positiveCount, negative)
+                yValues.append(mcc)
+            else:
+                yValues.append(0.0)
+
+        # add results to performance dication
+        performance[methodReference] = yValues
+
+        # get and append prediction results
+        print("Loading prediction file...")
+        prediction, _ = getReference(args.input, scoreCol=2, minScore=0.0)
+        mcc = getMCC(prediction, positive, positiveCount, negative)
+        performance[methodReference].append(mcc)
+
+    # build yTicks
+    yTicks = METHODS[:]
+    yTicks.append("SPRING")
 
     # create plot
     print("Producing plot data...")
     print("Total count in prediction file: %d." % len(prediction))
     print("Total count in positive file: %d." % len(positive))
-    plt.xlabel("Matthews-Correlation Coefficient (MCC)")
-    plt.title("Positive set: %s" % args.method)
-    plt.barh(yTicks, yValues)
+    df = pd.DataFrame(performance, index=yTicks)
+    ax = df.plot.barh()
+    ax.set_title(args.experiment)
+    ax.set_xlabel("Matthews-Correlation Coefficient (MCC)")
+    plt = ax.get_figure()
     plt.tight_layout()
     plt.savefig(args.output, format="png")
 
@@ -314,7 +326,7 @@
     parser.add_argument('-rb', '--region_b', help='Second subcellular location', required=False)
     parser.add_argument('-n', '--negative', help='Negative set (2-columns)', required=False)
     parser.add_argument('-t', '--throughput', help='Throughput (low/high)', required=False)
-    parser.add_argument('-m', '--method', help='Method e.g. Two-hybrid', required=False)
+    parser.add_argument('-e', '--experiment', help='Experiment Title', required=False, default="Results")
     parser.add_argument('-o', '--output', help='Output (png)', required=True)
     args = parser.parse_args()
     main(args)
--- a/spring_mcc.xml	Sat Jan 23 14:42:46 2021 +0000
+++ b/spring_mcc.xml	Mon Mar 01 15:02:36 2021 +0000
@@ -2,11 +2,13 @@
     <description>plot generator</description>
     <requirements>
         <requirement type="package" version="3.3.2">matplotlib</requirement>
+        <requirement type="package" version="1.2.0">pandas</requirement>
     </requirements>
     <command detect_errors="exit_code"><![CDATA[
-        python3 '$__tool_directory__/spring_mcc.py' -i '$input' -b '$database' -m '$method' -l '$sampling.locations' -ra '$sampling.regiona' -rb '$sampling.regionb' -n '$sampling.negative' -o '$output'
+        python3 '$__tool_directory__/spring_mcc.py' -i '$input' -b '$database' -e '$experiment' -l '$sampling.locations' -ra '$sampling.regiona' -rb '$sampling.regionb' -n '$sampling.negative' -o '$output'
     ]]></command>
     <inputs>
+        <param name="experiment" type="text" label="Title" help="Experiment title" value="Results"/>
         <param name="input" type="data" format="tabular" label="Interacting Pairs" help="Prediction input containing UniProt Accession codes (2-columns)."/>
         <conditional name="sampling">
             <param name="type" type="select" label="Non-interacting Pairs" display="radio" help="Specify how to determine non-interacting pairs.">
@@ -50,19 +52,6 @@
             </when>
         </conditional>
         <param name="database" type="data" format="tabular" label="BioGRID Database" help="BioGRID Database in TAB 3.0 format."/>
-        <param name="method" type="select" label="Experimental Method" help="Choose a specific experimental method name.">
-            <option value="Affinity Capture-MS">Affinity Capture-MS</option>
-            <option value="Biochemical Activity">Biochemical Activity</option>
-            <option value="Co-crystal Structure">Co-crystal Structure</option>
-            <option value="Co-fractionation">Co-fractionation</option>
-            <option value="Co-localization">Co-localization</option>
-            <option value="Co-purification">Co-purification</option>
-            <option value="Far Western">Far Western</option>
-            <option value="FRET">FRET</option>
-            <option value="PCA">PCA</option>
-            <option value="Reconstituted Complex">Reconstituted Complex</option>
-            <option value="Two-hybrid">Two-hybrid</option>
-        </param>
     </inputs>
     <outputs>
         <data format="png" name="output" label="SPRING MCC Image" />
@@ -71,7 +60,6 @@
         <test>
             <param name="input" value="mcc/human_hv1h2.txt" />
             <param name="database" value="mcc/biogrid_fret.txt" />
-            <param name="method" value="FRET" />
             <output name="output" file="mcc/human_hv1h2.png" />
         </test>
     </tests>
--- a/spring_model_all.py	Sat Jan 23 14:42:46 2021 +0000
+++ b/spring_model_all.py	Mon Mar 01 15:02:36 2021 +0000
@@ -34,7 +34,7 @@
         mkdir("temp")
     dbkit = DBKit(args.hhr_index, args.hhr_database)
     logFile = open(args.log, "w")
-    logFile.write("#namea\t nameb\t springscore\t tmscore\t energy\t clashes\n")
+    logFile.write("#namea\t nameb\t springscore\t tmscore\t energy\t clashes\t zscore\t templatea\t templateb\n")
     with open(args.pairs, "r") as file:
         for line in file:
             param = line.split()
@@ -52,12 +52,17 @@
             modelArgs.set(a_hhr=aFile, b_hhr=bFile, output=output)
             modelData = createModel(modelArgs)
             if modelData:
-                infoStr = "%s\t %s\t %5.2f\t %5.2f\t %5.2f\t %5.2f\n" % (aIdentifier, bIdentifier,
-                                                                         modelData["springscore"],
-                                                                         modelData["tmscore"],
-                                                                         modelData["energy"],
-                                                                         modelData["clashes"])
+                infoStr = "%s\t %s\t %5.2f\t %5.2f\t %5.2f\t %5.2f\t %5.2f\t %s\t %s\n"
+                infoStr = infoStr % (aIdentifier, bIdentifier,
+                                     modelData["springscore"],
+                                     modelData["tmscore"],
+                                     modelData["energy"],
+                                     modelData["clashes"],
+                                     modelData["zscore"],
+                                     modelData["aTemplate"],
+                                     modelData["bTemplate"])
                 logFile.write(infoStr)
+                logFile.flush()
             if isfile(aFile):
                 remove(aFile)
             if isfile(bFile):
--- a/spring_package/Modeller.py	Sat Jan 23 14:42:46 2021 +0000
+++ b/spring_package/Modeller.py	Mon Mar 01 15:02:36 2021 +0000
@@ -85,7 +85,7 @@
     templateIndex = 0
     for i in range(len(alignment)):
         t = templateAlign[i]
-        if alignment[i] == ":":
+        if alignment[i] in [":", "."]:
             templateResidue = templateResidues[templateIndex]
             templateResidue["alignedResidue"] = modelAlign[i]
             aligned.append(templateResidue)
@@ -112,7 +112,7 @@
         if templateHit["score"] < minScore or maxTries == 0:
             break
         maxTries = maxTries - 1
-        yield templateHit["templatePair"]
+        yield templateHit["templatePair"], templateHit["score"]
 
 
 def createModel(args):
@@ -139,7 +139,7 @@
     maxInfo = None
     minScore = float(args.minscore)
     maxTries = int(args.maxtries)
-    for [aTemplate, bTemplate] in getFrameworks(aTemplates, bTemplates, crossReference, minScore=minScore, maxTries=maxTries):
+    for [aTemplate, bTemplate], zscore in getFrameworks(aTemplates, bTemplates, crossReference, minScore=minScore, maxTries=maxTries):
         print("Evaluating Complex Template: %s." % aTemplate)
         templateFile = "temp/template.pdb"
         createPDB(aTemplate, pdbDatabase, templateFile)
@@ -168,6 +168,7 @@
                     print(str(e))
                     continue
                 biomolFound = True
+                print("  zscore:\t%5.2f" % zscore)
                 tmscore = min(coreScore, partnerScore)
                 print("  tmscore:\t%5.2f" % tmscore)
                 energy = -interfaceEnergy.get(coreAligned, partnerAligned)
@@ -178,7 +179,7 @@
                 print("  springscore:\t%5.2f" % springscore)
                 if springscore > maxScore and clashes < args.maxclashes:
                     maxScore = springscore
-                    maxInfo = dict(springscore=springscore, tmscore=tmscore, energy=energy, clashes=clashes)
+                    maxInfo = dict(aTemplate=aTemplate, bTemplate=bTemplate, springscore=springscore, tmscore=tmscore, energy=energy, clashes=clashes, zscore=zscore)
                     coreMolecule.save(outputName, chainName="0")
                     partnerMolecule.save(outputName, chainName="1", append=True)
                     if args.showtemplate == "true":
@@ -188,7 +189,7 @@
     if maxInfo is not None:
         print("Final Model:")
         for key in maxInfo:
-            print("  %s:\t%5.2f" % (key, maxInfo[key]))
+            print("  %s:\t%s" % (key, maxInfo[key]))
         print("Completed.")
     else:
         print("Warning: Failed to determine model.")
Binary file spring_package/__pycache__/Modeller.cpython-38.pyc has changed
Binary file spring_package/__pycache__/Modeller.cpython-39.pyc has changed
Binary file test-data/mcc/human_hv1h2.png has changed
--- a/test-data/model/log.txt	Sat Jan 23 14:42:46 2021 +0000
+++ b/test-data/model/log.txt	Mon Mar 01 15:02:36 2021 +0000
@@ -1,2 +1,2 @@
-#namea	 nameb	 springscore	 tmscore	 energy	 clashes
-sp|Q9BYF1|ACE2_HUMAN	 sp|P0DTC2|SPIKE_SARS2	  0.91	  0.82	 -8.92	  0.00
+#namea	 nameb	 springscore	 tmscore	 energy	 clashes	 zscore	 templatea	 templateb
+sp|Q9BYF1|ACE2_HUMAN	 sp|P0DTC2|SPIKE_SARS2	  0.91	  0.82	 -8.92	  0.00	 406.00	 6LZG_A	 6LZG_B