springsuite: spring_roc.py comparison

comparison spring_roc.py @ 34:b300ddbbf9d0 draft

"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"

author	guerler
date	Tue, 24 Nov 2020 17:55:07 +0000
parents	3071750405c9
children	0bcc0a269916

comparison

equal deleted inserted replaced

-:f115fbf3ac63
+:b300ddbbf9d0
 #! /usr/bin/env python
 import argparse
 import math
 import random
 from os.path import isfile
-from datetime import datetime
 from matplotlib import pyplot as plt
 def getIds(rawIds):
 # process prediction file
 print("Loading prediction file...")
 prediction, _ = getReference(args.input, scoreCol=2)
-# get subcellular locations from UniProt export
+# determine negative set
-locations = dict()
+print("Identifying non-interacting pairs...")
-if isfile(args.locations):
-regions = list()
-if args.regions:
-regions = args.regions.split(",")
-with open(args.locations) as locFile:
-for line in locFile:
-searchKey = "SUBCELLULAR LOCATION"
-searchPos = line.find(searchKey)
-if searchPos != -1:
-uniId = line.split()[0]
-locStart = searchPos + len(searchKey) + 1
-locId = line[locStart:].split()[0]
-if regions:
-if locId not in regions:
-continue
-if uniId in filterA or uniId in filterB:
-locations[uniId] = locId
-print("Found %d subcellular locations." % (len(list(locations.keys()))))
-# estimate background noise
-print("Estimating background noise...")
 negative = set()
-filterAList = sorted(list(filterA))
+if isfile(args.negative):
-filterBList = sorted(list(filterB))
+# load from explicit file
-negativeRequired = positiveCount
+with open(args.negative) as file:
-random.seed(0)
+for line in file:
-totalAttempts = int(len(filterAList) * len(filterBList) / 2)
+cols = line.split()
-while totalAttempts > 0:
+nameA = cols[0]
-totalAttempts = totalAttempts - 1
+nameB = cols[1]
-nameA = random.choice(filterAList)
+key = getKey(nameA, nameB)
-nameB = random.choice(filterBList)
+if key not in putative and key not in negative:
-if locations:
+negative.add(key)
-if nameA not in locations or nameB not in locations:
+else:
-continue
+# get subcellular locations from UniProt export
-if locations[nameA] == locations[nameB]:
+locations = dict()
-continue
+if isfile(args.locations):
-key = getKey(nameA, nameB)
+regions = list()
-if key not in putative and key not in negative:
+if args.regions:
-negative.add(key)
+regions = args.regions.split(",")
-negativeRequired = negativeRequired - 1
+with open(args.locations) as locFile:
-if negativeRequired == 0:
+for line in locFile:
-break
+searchKey = "SUBCELLULAR LOCATION"
+searchPos = line.find(searchKey)
+if searchPos != -1:
+uniId = line.split()[0]
+locStart = searchPos + len(searchKey) + 1
+locId = line[locStart:].split()[0]
+if regions:
+if locId not in regions:
+continue
+if uniId in filterA or uniId in filterB:
+locations[uniId] = locId
+print("Found %d subcellular locations." % (len(list(locations.keys()))))
+# randomly sample non-interacting pairs
+filterAList = sorted(list(filterA))
+filterBList = sorted(list(filterB))
+negativeRequired = positiveCount
+random.seed(0)
+totalAttempts = int(len(filterAList) * len(filterBList) / 2)
+while totalAttempts > 0:
+totalAttempts = totalAttempts - 1
+nameA = random.choice(filterAList)
+nameB = random.choice(filterBList)
+if locations:
+if nameA not in locations or nameB not in locations:
+continue
+if locations[nameA] == locations[nameB]:
+continue
+key = getKey(nameA, nameB)
+if key not in putative and key not in negative:
+negative.add(key)
+negativeRequired = negativeRequired - 1
+if negativeRequired == 0:
+break
 # create plot
 print("Producing plot data...")
 print("Total count in prediction file: %d." % len(prediction))
 print("Total count in positive file: %d." % len(positive))
 plt.savefig(args.output, format="png")
 if __name__ == "__main__":
 parser = argparse.ArgumentParser(description='Create ROC plot.')
-parser.add_argument('-i', '--input', help='Input prediction file.', required=True)
+parser.add_argument('-i', '--input', help='Input prediction file (2-columns).', required=True)
 parser.add_argument('-b', '--biogrid', help='BioGRID interaction database file', required=True)
-parser.add_argument('-l', '--locations', help='UniProt export table with subcellular locations', required=False)
+parser.add_argument('-l', '--locations', help='UniProt export table with subcellular locations', default="", required=False)
 parser.add_argument('-r', '--regions', help='Comma-separated regions', required=False)
+parser.add_argument('-n', '--negative', help='Negative set (2-columns)', default="", required=False)
 parser.add_argument('-e', '--experiment', help='Type (physical/genetic)', default="", required=False)
 parser.add_argument('-t', '--throughput', help='Throughput (low/high)', default="", required=False)
 parser.add_argument('-m', '--method', help='Method e.g. Two-hybrid', default="", required=False)
 parser.add_argument('-o', '--output', help='Output (png)', required=True)
 args = parser.parse_args()

Mercurial > repos > guerler > springsuite

comparison spring_roc.py @ 34:b300ddbbf9d0 draft