Mercurial > repos > guerler > springsuite
diff spring_roc.py @ 34:b300ddbbf9d0 draft
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
author | guerler |
---|---|
date | Tue, 24 Nov 2020 17:55:07 +0000 |
parents | 3071750405c9 |
children | 0bcc0a269916 |
line wrap: on
line diff
--- a/spring_roc.py Tue Nov 24 17:27:22 2020 +0000 +++ b/spring_roc.py Tue Nov 24 17:55:07 2020 +0000 @@ -3,7 +3,6 @@ import math import random from os.path import isfile -from datetime import datetime from matplotlib import pyplot as plt @@ -207,50 +206,62 @@ print("Loading prediction file...") prediction, _ = getReference(args.input, scoreCol=2) - # get subcellular locations from UniProt export - locations = dict() - if isfile(args.locations): - regions = list() - if args.regions: - regions = args.regions.split(",") - with open(args.locations) as locFile: - for line in locFile: - searchKey = "SUBCELLULAR LOCATION" - searchPos = line.find(searchKey) - if searchPos != -1: - uniId = line.split()[0] - locStart = searchPos + len(searchKey) + 1 - locId = line[locStart:].split()[0] - if regions: - if locId not in regions: - continue - if uniId in filterA or uniId in filterB: - locations[uniId] = locId - print("Found %d subcellular locations." % (len(list(locations.keys())))) + # determine negative set + print("Identifying non-interacting pairs...") + negative = set() + if isfile(args.negative): + # load from explicit file + with open(args.negative) as file: + for line in file: + cols = line.split() + nameA = cols[0] + nameB = cols[1] + key = getKey(nameA, nameB) + if key not in putative and key not in negative: + negative.add(key) + else: + # get subcellular locations from UniProt export + locations = dict() + if isfile(args.locations): + regions = list() + if args.regions: + regions = args.regions.split(",") + with open(args.locations) as locFile: + for line in locFile: + searchKey = "SUBCELLULAR LOCATION" + searchPos = line.find(searchKey) + if searchPos != -1: + uniId = line.split()[0] + locStart = searchPos + len(searchKey) + 1 + locId = line[locStart:].split()[0] + if regions: + if locId not in regions: + continue + if uniId in filterA or uniId in filterB: + locations[uniId] = locId + print("Found %d subcellular locations." % (len(list(locations.keys())))) - # estimate background noise - print("Estimating background noise...") - negative = set() - filterAList = sorted(list(filterA)) - filterBList = sorted(list(filterB)) - negativeRequired = positiveCount - random.seed(0) - totalAttempts = int(len(filterAList) * len(filterBList) / 2) - while totalAttempts > 0: - totalAttempts = totalAttempts - 1 - nameA = random.choice(filterAList) - nameB = random.choice(filterBList) - if locations: - if nameA not in locations or nameB not in locations: - continue - if locations[nameA] == locations[nameB]: - continue - key = getKey(nameA, nameB) - if key not in putative and key not in negative: - negative.add(key) - negativeRequired = negativeRequired - 1 - if negativeRequired == 0: - break + # randomly sample non-interacting pairs + filterAList = sorted(list(filterA)) + filterBList = sorted(list(filterB)) + negativeRequired = positiveCount + random.seed(0) + totalAttempts = int(len(filterAList) * len(filterBList) / 2) + while totalAttempts > 0: + totalAttempts = totalAttempts - 1 + nameA = random.choice(filterAList) + nameB = random.choice(filterBList) + if locations: + if nameA not in locations or nameB not in locations: + continue + if locations[nameA] == locations[nameB]: + continue + key = getKey(nameA, nameB) + if key not in putative and key not in negative: + negative.add(key) + negativeRequired = negativeRequired - 1 + if negativeRequired == 0: + break # create plot print("Producing plot data...") @@ -271,10 +282,11 @@ if __name__ == "__main__": parser = argparse.ArgumentParser(description='Create ROC plot.') - parser.add_argument('-i', '--input', help='Input prediction file.', required=True) + parser.add_argument('-i', '--input', help='Input prediction file (2-columns).', required=True) parser.add_argument('-b', '--biogrid', help='BioGRID interaction database file', required=True) - parser.add_argument('-l', '--locations', help='UniProt export table with subcellular locations', required=False) + parser.add_argument('-l', '--locations', help='UniProt export table with subcellular locations', default="", required=False) parser.add_argument('-r', '--regions', help='Comma-separated regions', required=False) + parser.add_argument('-n', '--negative', help='Negative set (2-columns)', default="", required=False) parser.add_argument('-e', '--experiment', help='Type (physical/genetic)', default="", required=False) parser.add_argument('-t', '--throughput', help='Throughput (low/high)', default="", required=False) parser.add_argument('-m', '--method', help='Method e.g. Two-hybrid', default="", required=False)