Mercurial > repos > guerler > springsuite
changeset 34:b300ddbbf9d0 draft
"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
author | guerler |
---|---|
date | Tue, 24 Nov 2020 17:55:07 +0000 (2020-11-24) |
parents | f115fbf3ac63 |
children | 0bcc0a269916 |
files | spring_roc.py spring_roc.xml |
diffstat | 2 files changed, 68 insertions(+), 48 deletions(-) [+] |
line wrap: on
line diff
--- a/spring_roc.py Tue Nov 24 17:27:22 2020 +0000 +++ b/spring_roc.py Tue Nov 24 17:55:07 2020 +0000 @@ -3,7 +3,6 @@ import math import random from os.path import isfile -from datetime import datetime from matplotlib import pyplot as plt @@ -207,50 +206,62 @@ print("Loading prediction file...") prediction, _ = getReference(args.input, scoreCol=2) - # get subcellular locations from UniProt export - locations = dict() - if isfile(args.locations): - regions = list() - if args.regions: - regions = args.regions.split(",") - with open(args.locations) as locFile: - for line in locFile: - searchKey = "SUBCELLULAR LOCATION" - searchPos = line.find(searchKey) - if searchPos != -1: - uniId = line.split()[0] - locStart = searchPos + len(searchKey) + 1 - locId = line[locStart:].split()[0] - if regions: - if locId not in regions: - continue - if uniId in filterA or uniId in filterB: - locations[uniId] = locId - print("Found %d subcellular locations." % (len(list(locations.keys())))) + # determine negative set + print("Identifying non-interacting pairs...") + negative = set() + if isfile(args.negative): + # load from explicit file + with open(args.negative) as file: + for line in file: + cols = line.split() + nameA = cols[0] + nameB = cols[1] + key = getKey(nameA, nameB) + if key not in putative and key not in negative: + negative.add(key) + else: + # get subcellular locations from UniProt export + locations = dict() + if isfile(args.locations): + regions = list() + if args.regions: + regions = args.regions.split(",") + with open(args.locations) as locFile: + for line in locFile: + searchKey = "SUBCELLULAR LOCATION" + searchPos = line.find(searchKey) + if searchPos != -1: + uniId = line.split()[0] + locStart = searchPos + len(searchKey) + 1 + locId = line[locStart:].split()[0] + if regions: + if locId not in regions: + continue + if uniId in filterA or uniId in filterB: + locations[uniId] = locId + print("Found %d subcellular locations." % (len(list(locations.keys())))) - # estimate background noise - print("Estimating background noise...") - negative = set() - filterAList = sorted(list(filterA)) - filterBList = sorted(list(filterB)) - negativeRequired = positiveCount - random.seed(0) - totalAttempts = int(len(filterAList) * len(filterBList) / 2) - while totalAttempts > 0: - totalAttempts = totalAttempts - 1 - nameA = random.choice(filterAList) - nameB = random.choice(filterBList) - if locations: - if nameA not in locations or nameB not in locations: - continue - if locations[nameA] == locations[nameB]: - continue - key = getKey(nameA, nameB) - if key not in putative and key not in negative: - negative.add(key) - negativeRequired = negativeRequired - 1 - if negativeRequired == 0: - break + # randomly sample non-interacting pairs + filterAList = sorted(list(filterA)) + filterBList = sorted(list(filterB)) + negativeRequired = positiveCount + random.seed(0) + totalAttempts = int(len(filterAList) * len(filterBList) / 2) + while totalAttempts > 0: + totalAttempts = totalAttempts - 1 + nameA = random.choice(filterAList) + nameB = random.choice(filterBList) + if locations: + if nameA not in locations or nameB not in locations: + continue + if locations[nameA] == locations[nameB]: + continue + key = getKey(nameA, nameB) + if key not in putative and key not in negative: + negative.add(key) + negativeRequired = negativeRequired - 1 + if negativeRequired == 0: + break # create plot print("Producing plot data...") @@ -271,10 +282,11 @@ if __name__ == "__main__": parser = argparse.ArgumentParser(description='Create ROC plot.') - parser.add_argument('-i', '--input', help='Input prediction file.', required=True) + parser.add_argument('-i', '--input', help='Input prediction file (2-columns).', required=True) parser.add_argument('-b', '--biogrid', help='BioGRID interaction database file', required=True) - parser.add_argument('-l', '--locations', help='UniProt export table with subcellular locations', required=False) + parser.add_argument('-l', '--locations', help='UniProt export table with subcellular locations', default="", required=False) parser.add_argument('-r', '--regions', help='Comma-separated regions', required=False) + parser.add_argument('-n', '--negative', help='Negative set (2-columns)', default="", required=False) parser.add_argument('-e', '--experiment', help='Type (physical/genetic)', default="", required=False) parser.add_argument('-t', '--throughput', help='Throughput (low/high)', default="", required=False) parser.add_argument('-m', '--method', help='Method e.g. Two-hybrid', default="", required=False)
--- a/spring_roc.xml Tue Nov 24 17:27:22 2020 +0000 +++ b/spring_roc.xml Tue Nov 24 17:55:07 2020 +0000 @@ -4,7 +4,7 @@ <requirement type="package" version="3.3.3">matplotlib</requirement> </requirements> <command detect_errors="exit_code"><![CDATA[ - python3 '$__tool_directory__/spring_roc.py' -i '$input' -b '$database' -e '$experiment.type' -m '$experiment.method' -t '$throughput' -l '$sampling.locations' -r '$sampling.regions' -o '$rocplot' + python3 '$__tool_directory__/spring_roc.py' -i '$input' -b '$database' -e '$experiment.type' -m '$experiment.method' -t '$throughput' -l '$sampling.locations' -r '$sampling.regions' -n '$sampling.negative' -o '$rocplot' ]]></command> <inputs> <param name="input" type="data" format="tabular" label="Interactions" help="Prediction Input Table with 2-columns containing UniProt Accession codes."/> @@ -13,9 +13,11 @@ <param name="type" type="select" label="Non-interacting Pairs" display="radio" help="Decide how to retrieve non-interacting pairs."> <option value="random">Random Sampling</option> <option value="uniprot">Sampling with UniProt Localization</option> + <option value="negative">Specify non-interacting pairs</option> </param> <when value="uniprot"> - <param name="locations" type="data" format="tabular" label="UniProt Localization Database" help="UniProt tabular export with localization column to sample non-interacting pairs." optional="True" /> + <param name="locations" type="data" format="tabular" label="UniProt Localization Database" help="UniProt tabular export with localization column to sample non-interacting pairs." /> + <param name="negative" type="hidden" value="" /> <param name="regions" type="select" multiple="True" label="Choose Subcellular Locations"> <option value="Membrane" selected="True">Membrane</option> <option value="Mitochondrion" selected="True">Mitochondrion</option> @@ -24,9 +26,15 @@ </param> </when> <when value="random"> + <param name="negative" type="hidden" value="" /> <param name="locations" type="hidden" value="" /> <param name="regions" type="hidden" value="" /> </when> + <when value="negative"> + <param name="locations" type="hidden" value="" /> + <param name="negative" type="data" format="tabular" label="Non-interacting Pairs" help="Tabular file containing non-interacting pairs." /> + <param name="regions" type="hidden" value="" /> + </when> </conditional> <conditional name="experiment"> <param name="type" type="select" label="Experimental Type" display="radio" help="Choose a specific experimental system type.">