changeset 34:b300ddbbf9d0 draft

"planemo upload commit 0410e2fadc4e9fc1df6010de7b3678154cbdfe62-dirty"
author guerler
date Tue, 24 Nov 2020 17:55:07 +0000 (2020-11-24)
parents f115fbf3ac63
children 0bcc0a269916
files spring_roc.py spring_roc.xml
diffstat 2 files changed, 68 insertions(+), 48 deletions(-) [+]
line wrap: on
line diff
--- a/spring_roc.py	Tue Nov 24 17:27:22 2020 +0000
+++ b/spring_roc.py	Tue Nov 24 17:55:07 2020 +0000
@@ -3,7 +3,6 @@
 import math
 import random
 from os.path import isfile
-from datetime import datetime
 
 from matplotlib import pyplot as plt
 
@@ -207,50 +206,62 @@
     print("Loading prediction file...")
     prediction, _ = getReference(args.input, scoreCol=2)
 
-    # get subcellular locations from UniProt export
-    locations = dict()
-    if isfile(args.locations):
-        regions = list()
-        if args.regions:
-            regions = args.regions.split(",")
-        with open(args.locations) as locFile:
-            for line in locFile:
-                searchKey = "SUBCELLULAR LOCATION"
-                searchPos = line.find(searchKey)
-                if searchPos != -1:
-                    uniId = line.split()[0]
-                    locStart = searchPos + len(searchKey) + 1
-                    locId = line[locStart:].split()[0]
-                    if regions:
-                        if locId not in regions:
-                            continue
-                    if uniId in filterA or uniId in filterB:
-                        locations[uniId] = locId
-        print("Found %d subcellular locations." % (len(list(locations.keys()))))
+    # determine negative set
+    print("Identifying non-interacting pairs...")
+    negative = set()
+    if isfile(args.negative):
+        # load from explicit file
+        with open(args.negative) as file:
+            for line in file:
+                cols = line.split()
+                nameA = cols[0]
+                nameB = cols[1]
+                key = getKey(nameA, nameB)
+                if key not in putative and key not in negative:
+                    negative.add(key)
+    else:
+        # get subcellular locations from UniProt export
+        locations = dict()
+        if isfile(args.locations):
+            regions = list()
+            if args.regions:
+                regions = args.regions.split(",")
+            with open(args.locations) as locFile:
+                for line in locFile:
+                    searchKey = "SUBCELLULAR LOCATION"
+                    searchPos = line.find(searchKey)
+                    if searchPos != -1:
+                        uniId = line.split()[0]
+                        locStart = searchPos + len(searchKey) + 1
+                        locId = line[locStart:].split()[0]
+                        if regions:
+                            if locId not in regions:
+                                continue
+                        if uniId in filterA or uniId in filterB:
+                            locations[uniId] = locId
+            print("Found %d subcellular locations." % (len(list(locations.keys()))))
 
-    # estimate background noise
-    print("Estimating background noise...")
-    negative = set()
-    filterAList = sorted(list(filterA))
-    filterBList = sorted(list(filterB))
-    negativeRequired = positiveCount
-    random.seed(0)
-    totalAttempts = int(len(filterAList) * len(filterBList) / 2)
-    while totalAttempts > 0:
-        totalAttempts = totalAttempts - 1
-        nameA = random.choice(filterAList)
-        nameB = random.choice(filterBList)
-        if locations:
-            if nameA not in locations or nameB not in locations:
-                continue
-            if locations[nameA] == locations[nameB]:
-                continue
-        key = getKey(nameA, nameB)
-        if key not in putative and key not in negative:
-            negative.add(key)
-            negativeRequired = negativeRequired - 1
-            if negativeRequired == 0:
-                break
+        # randomly sample non-interacting pairs
+        filterAList = sorted(list(filterA))
+        filterBList = sorted(list(filterB))
+        negativeRequired = positiveCount
+        random.seed(0)
+        totalAttempts = int(len(filterAList) * len(filterBList) / 2)
+        while totalAttempts > 0:
+            totalAttempts = totalAttempts - 1
+            nameA = random.choice(filterAList)
+            nameB = random.choice(filterBList)
+            if locations:
+                if nameA not in locations or nameB not in locations:
+                    continue
+                if locations[nameA] == locations[nameB]:
+                    continue
+            key = getKey(nameA, nameB)
+            if key not in putative and key not in negative:
+                negative.add(key)
+                negativeRequired = negativeRequired - 1
+                if negativeRequired == 0:
+                    break
 
     # create plot
     print("Producing plot data...")
@@ -271,10 +282,11 @@
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description='Create ROC plot.')
-    parser.add_argument('-i', '--input', help='Input prediction file.', required=True)
+    parser.add_argument('-i', '--input', help='Input prediction file (2-columns).', required=True)
     parser.add_argument('-b', '--biogrid', help='BioGRID interaction database file', required=True)
-    parser.add_argument('-l', '--locations', help='UniProt export table with subcellular locations', required=False)
+    parser.add_argument('-l', '--locations', help='UniProt export table with subcellular locations', default="", required=False)
     parser.add_argument('-r', '--regions', help='Comma-separated regions', required=False)
+    parser.add_argument('-n', '--negative', help='Negative set (2-columns)', default="", required=False)
     parser.add_argument('-e', '--experiment', help='Type (physical/genetic)', default="", required=False)
     parser.add_argument('-t', '--throughput', help='Throughput (low/high)', default="", required=False)
     parser.add_argument('-m', '--method', help='Method e.g. Two-hybrid', default="", required=False)
--- a/spring_roc.xml	Tue Nov 24 17:27:22 2020 +0000
+++ b/spring_roc.xml	Tue Nov 24 17:55:07 2020 +0000
@@ -4,7 +4,7 @@
         <requirement type="package" version="3.3.3">matplotlib</requirement>
     </requirements>
     <command detect_errors="exit_code"><![CDATA[
-        python3 '$__tool_directory__/spring_roc.py' -i '$input' -b '$database' -e '$experiment.type' -m '$experiment.method' -t '$throughput' -l '$sampling.locations' -r '$sampling.regions' -o '$rocplot'
+        python3 '$__tool_directory__/spring_roc.py' -i '$input' -b '$database' -e '$experiment.type' -m '$experiment.method' -t '$throughput' -l '$sampling.locations' -r '$sampling.regions' -n '$sampling.negative' -o '$rocplot'
     ]]></command>
     <inputs>
         <param name="input" type="data" format="tabular" label="Interactions" help="Prediction Input Table with 2-columns containing UniProt Accession codes."/>
@@ -13,9 +13,11 @@
             <param name="type" type="select" label="Non-interacting Pairs" display="radio" help="Decide how to retrieve non-interacting pairs.">
                 <option value="random">Random Sampling</option>
                 <option value="uniprot">Sampling with UniProt Localization</option>
+                <option value="negative">Specify non-interacting pairs</option>
             </param>
             <when value="uniprot">
-                <param name="locations" type="data" format="tabular" label="UniProt Localization Database" help="UniProt tabular export with localization column to sample non-interacting pairs." optional="True" />
+                <param name="locations" type="data" format="tabular" label="UniProt Localization Database" help="UniProt tabular export with localization column to sample non-interacting pairs." />
+                <param name="negative" type="hidden" value="" />
                 <param name="regions" type="select" multiple="True" label="Choose Subcellular Locations">
                     <option value="Membrane" selected="True">Membrane</option>
                     <option value="Mitochondrion" selected="True">Mitochondrion</option>
@@ -24,9 +26,15 @@
                  </param>
             </when>
             <when value="random">
+                <param name="negative" type="hidden" value="" />
                 <param name="locations" type="hidden" value="" />
                 <param name="regions" type="hidden" value="" />
             </when>
+            <when value="negative">
+                <param name="locations" type="hidden" value="" />
+                <param name="negative" type="data" format="tabular" label="Non-interacting Pairs" help="Tabular file containing non-interacting pairs." />
+                <param name="regions" type="hidden" value="" />
+            </when>
         </conditional>
         <conditional name="experiment">
             <param name="type" type="select" label="Experimental Type" display="radio" help="Choose a specific experimental system type.">