annotate mqppep_mrgfltr.py @ 0:8dfd5d2b5903 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
author galaxyp
date Mon, 11 Jul 2022 19:22:54 +0000
parents
children b76c75521d91
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1 #!/usr/bin/env python
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
2
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
3 # Import the packages needed
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
4 import argparse
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
5 import operator # for operator.itemgetter
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
6 import os.path
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
7 import re
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
8 import shutil # for shutil.copyfile(src, dest)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
9 import sqlite3 as sql
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
10 import sys # import the sys module for exc_info
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
11 import time
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
12 import traceback # for formatting stack-trace
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
13 from codecs import getreader as cx_getreader
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
14
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
15 import numpy as np
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
16 import pandas
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
17
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
18 # global constants
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
19 N_A = "N/A"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
20
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
21
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
22 # ref: https://stackoverflow.com/a/8915613/15509512
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
23 # answers: "How to handle exceptions in a list comprehensions"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
24 # usage:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
25 # from math import log
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
26 # eggs = [1,3,0,3,2]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
27 # print([x for x in [catch(log, egg) for egg in eggs] if x is not None])
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
28 # producing:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
29 # for <built-in function log>
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
30 # with args (0,)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
31 # exception: math domain error
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
32 # [0.0, 1.0986122886681098, 1.0986122886681098, 0.6931471805599453]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
33 def catch(func, *args, handle=lambda e: e, **kwargs):
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
34
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
35 try:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
36 return func(*args, **kwargs)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
37 except Exception as e:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
38 print("For %s" % str(func))
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
39 print(" with args %s" % str(args))
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
40 print(" caught exception: %s" % str(e))
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
41 (ty, va, tb) = sys.exc_info()
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
42 print(" stack trace: " + str(traceback.format_exception(ty, va, tb)))
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
43 exit(-1)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
44 return None
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
45
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
46
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
47 def whine(func, *args, handle=lambda e: e, **kwargs):
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
48
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
49 try:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
50 return func(*args, **kwargs)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
51 except Exception as e:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
52 print("Warning: For %s" % str(func))
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
53 print(" with args %s" % str(args))
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
54 print(" caught exception: %s" % str(e))
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
55 (ty, va, tb) = sys.exc_info()
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
56 print(" stack trace: " + str(traceback.format_exception(ty, va, tb)))
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
57 return None
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
58
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
59
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
60 def ppep_join(x):
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
61 x = [i for i in x if N_A != i]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
62 result = "%s" % " | ".join(x)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
63 if result != "":
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
64 return result
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
65 else:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
66 return N_A
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
67
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
68
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
69 def melt_join(x):
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
70 tmp = {key.lower(): key for key in x}
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
71 result = "%s" % " | ".join([tmp[key] for key in tmp])
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
72 return result
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
73
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
74
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
75 def __main__():
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
76 # Parse Command Line
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
77 parser = argparse.ArgumentParser(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
78 description="Phopsphoproteomic Enrichment Pipeline Merge and Filter."
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
79 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
80
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
81 # inputs:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
82 # Phosphopeptide data for experimental results, including the intensities
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
83 # and the mapping to kinase domains, in tabular format.
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
84 parser.add_argument(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
85 "--phosphopeptides",
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
86 "-p",
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
87 nargs=1,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
88 required=True,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
89 dest="phosphopeptides",
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
90 help="Phosphopeptide data for experimental results, including the intensities and the mapping to kinase domains, in tabular format",
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
91 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
92 # UniProtKB/SwissProt DB input, SQLite
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
93 parser.add_argument(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
94 "--ppep_mapping_db",
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
95 "-d",
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
96 nargs=1,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
97 required=True,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
98 dest="ppep_mapping_db",
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
99 help="UniProtKB/SwissProt SQLite Database",
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
100 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
101 # species to limit records chosed from PhosPhositesPlus
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
102 parser.add_argument(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
103 "--species",
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
104 "-x",
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
105 nargs=1,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
106 required=False,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
107 default=[],
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
108 dest="species",
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
109 help="limit PhosphoSitePlus records to indicated species (field may be empty)",
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
110 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
111
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
112 # outputs:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
113 # tabular output
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
114 parser.add_argument(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
115 "--mrgfltr_tab",
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
116 "-o",
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
117 nargs=1,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
118 required=True,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
119 dest="mrgfltr_tab",
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
120 help="Tabular output file for results",
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
121 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
122 # CSV output
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
123 parser.add_argument(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
124 "--mrgfltr_csv",
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
125 "-c",
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
126 nargs=1,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
127 required=True,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
128 dest="mrgfltr_csv",
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
129 help="CSV output file for results",
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
130 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
131 # SQLite output
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
132 parser.add_argument(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
133 "--mrgfltr_sqlite",
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
134 "-S",
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
135 nargs=1,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
136 required=True,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
137 dest="mrgfltr_sqlite",
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
138 help="SQLite output file for results",
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
139 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
140
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
141 # "Make it so!" (parse the arguments)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
142 options = parser.parse_args()
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
143 print("options: " + str(options))
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
144
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
145 # determine phosphopeptide ("upstream map") input tabular file access
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
146 if options.phosphopeptides is None:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
147 exit('Argument "phosphopeptides" is required but not supplied')
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
148 try:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
149 upstream_map_filename_tab = os.path.abspath(options.phosphopeptides[0])
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
150 input_file = open(upstream_map_filename_tab, "r")
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
151 input_file.close()
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
152 except Exception as e:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
153 exit("Error parsing phosphopeptides argument: %s" % str(e))
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
154
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
155 # determine input SQLite access
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
156 if options.ppep_mapping_db is None:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
157 exit('Argument "ppep_mapping_db" is required but not supplied')
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
158 try:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
159 uniprot_sqlite = os.path.abspath(options.ppep_mapping_db[0])
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
160 input_file = open(uniprot_sqlite, "rb")
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
161 input_file.close()
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
162 except Exception as e:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
163 exit("Error parsing ppep_mapping_db argument: %s" % str(e))
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
164
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
165 # copy input SQLite dataset to output SQLite dataset
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
166 if options.mrgfltr_sqlite is None:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
167 exit('Argument "mrgfltr_sqlite" is required but not supplied')
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
168 try:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
169 output_sqlite = os.path.abspath(options.mrgfltr_sqlite[0])
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
170 shutil.copyfile(uniprot_sqlite, output_sqlite)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
171 except Exception as e:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
172 exit("Error copying ppep_mapping_db to mrgfltr_sqlite: %s" % str(e))
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
173
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
174 # determine species to limit records from PSP_Regulatory_Sites
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
175 if options.species is None:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
176 exit(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
177 'Argument "species" is required (and may be empty) but not supplied'
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
178 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
179 try:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
180 if len(options.species) > 0:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
181 species = options.species[0]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
182 else:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
183 species = ""
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
184 except Exception as e:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
185 exit("Error parsing species argument: %s" % str(e))
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
186
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
187 # determine tabular output destination
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
188 if options.mrgfltr_tab is None:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
189 exit('Argument "mrgfltr_tab" is required but not supplied')
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
190 try:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
191 output_filename_tab = os.path.abspath(options.mrgfltr_tab[0])
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
192 output_file = open(output_filename_tab, "w")
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
193 output_file.close()
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
194 except Exception as e:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
195 exit("Error parsing mrgfltr_tab argument: %s" % str(e))
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
196
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
197 # determine CSV output destination
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
198 if options.mrgfltr_csv is None:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
199 exit('Argument "mrgfltr_csv" is required but not supplied')
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
200 try:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
201 output_filename_csv = os.path.abspath(options.mrgfltr_csv[0])
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
202 output_file = open(output_filename_csv, "w")
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
203 output_file.close()
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
204 except Exception as e:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
205 exit("Error parsing mrgfltr_csv argument: %s" % str(e))
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
206
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
207 def mqpep_getswissprot():
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
208
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
209 #
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
210 # copied from Excel Output Script.ipynb BEGIN #
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
211 #
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
212
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
213 # String Constants #################
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
214 DEPHOSPHOPEP = "DephosphoPep"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
215 DESCRIPTION = "Description"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
216 FUNCTION_PHOSPHORESIDUE = (
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
217 "Function Phosphoresidue(PSP=PhosphoSitePlus.org)"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
218 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
219 GENE_NAME = "Gene_Name" # Gene Name from UniProtKB
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
220 ON_FUNCTION = (
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
221 "ON_FUNCTION" # ON_FUNCTION column from PSP_Regulatory_Sites
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
222 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
223 ON_NOTES = "NOTES" # NOTES column from PSP_Regulatory_Sites
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
224 ON_OTHER_INTERACT = "ON_OTHER_INTERACT" # ON_OTHER_INTERACT column from PSP_Regulatory_Sites
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
225 ON_PROCESS = (
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
226 "ON_PROCESS" # ON_PROCESS column from PSP_Regulatory_Sites
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
227 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
228 ON_PROT_INTERACT = "ON_PROT_INTERACT" # ON_PROT_INTERACT column from PSP_Regulatory_Sites
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
229 PHOSPHOPEPTIDE = "Phosphopeptide"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
230 PHOSPHOPEPTIDE_MATCH = "Phosphopeptide_match"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
231 PHOSPHORESIDUE = "Phosphoresidue"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
232 PUTATIVE_UPSTREAM_DOMAINS = "Putative Upstream Kinases(PSP=PhosphoSitePlus.org)/Phosphatases/Binding Domains"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
233 SEQUENCE = "Sequence"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
234 SEQUENCE10 = "Sequence10"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
235 SEQUENCE7 = "Sequence7"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
236 SITE_PLUSMINUS_7AA_SQL = "SITE_PLUSMINUS_7AA"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
237 UNIPROT_ID = "UniProt_ID"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
238 UNIPROT_SEQ_AND_META_SQL = """
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
239 select Uniprot_ID, Description, Gene_Name, Sequence,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
240 Organism_Name, Organism_ID, PE, SV
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
241 from UniProtKB
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
242 order by Sequence, UniProt_ID
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
243 """
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
244 UNIPROT_UNIQUE_SEQ_SQL = """
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
245 select distinct Sequence
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
246 from UniProtKB
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
247 group by Sequence
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
248 """
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
249 PPEP_PEP_UNIPROTSEQ_SQL = """
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
250 select distinct phosphopeptide, peptide, sequence
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
251 from uniprotkb_pep_ppep_view
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
252 order by sequence
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
253 """
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
254 PPEP_MELT_SQL = """
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
255 SELECT DISTINCT
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
256 phospho_peptide AS 'p_peptide',
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
257 kinase_map AS 'characterization',
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
258 'X' AS 'X'
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
259 FROM ppep_gene_site_view
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
260 """
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
261 # CREATE TABLE PSP_Regulatory_site (
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
262 # site_plusminus_7AA TEXT PRIMARY KEY ON CONFLICT IGNORE,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
263 # domain TEXT,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
264 # ON_FUNCTION TEXT,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
265 # ON_PROCESS TEXT,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
266 # ON_PROT_INTERACT TEXT,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
267 # ON_OTHER_INTERACT TEXT,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
268 # notes TEXT,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
269 # organism TEXT
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
270 # );
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
271 PSP_REGSITE_SQL = """
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
272 SELECT DISTINCT
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
273 SITE_PLUSMINUS_7AA ,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
274 DOMAIN ,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
275 ON_FUNCTION ,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
276 ON_PROCESS ,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
277 ON_PROT_INTERACT ,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
278 ON_OTHER_INTERACT ,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
279 NOTES ,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
280 ORGANISM
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
281 FROM PSP_Regulatory_site
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
282 """
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
283 PPEP_ID_SQL = """
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
284 SELECT
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
285 id AS 'ppep_id',
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
286 seq AS 'ppep_seq'
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
287 FROM ppep
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
288 """
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
289 MRGFLTR_DDL = """
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
290 DROP VIEW IF EXISTS mrgfltr_metadata_view;
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
291 DROP TABLE IF EXISTS mrgfltr_metadata;
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
292 CREATE TABLE mrgfltr_metadata
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
293 ( ppep_id INTEGER REFERENCES ppep(id)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
294 , Sequence10 TEXT
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
295 , Sequence7 TEXT
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
296 , GeneName TEXT
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
297 , Phosphoresidue TEXT
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
298 , UniProtID TEXT
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
299 , Description TEXT
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
300 , FunctionPhosphoresidue TEXT
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
301 , PutativeUpstreamDomains TEXT
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
302 , PRIMARY KEY (ppep_id) ON CONFLICT IGNORE
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
303 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
304 ;
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
305 CREATE VIEW mrgfltr_metadata_view AS
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
306 SELECT DISTINCT
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
307 ppep.seq AS phospho_peptide
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
308 , Sequence10
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
309 , Sequence7
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
310 , GeneName
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
311 , Phosphoresidue
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
312 , UniProtID
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
313 , Description
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
314 , FunctionPhosphoresidue
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
315 , PutativeUpstreamDomains
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
316 FROM
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
317 ppep, mrgfltr_metadata
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
318 WHERE
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
319 mrgfltr_metadata.ppep_id = ppep.id
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
320 ORDER BY
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
321 ppep.seq
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
322 ;
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
323 """
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
324
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
325 CITATION_INSERT_STMT = """
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
326 INSERT INTO Citation (
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
327 ObjectName,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
328 CitationData
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
329 ) VALUES (?,?)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
330 """
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
331 CITATION_INSERT_PSP = 'PhosphoSitePlus(R) (PSP) was created by Cell Signaling Technology Inc. It is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License. When using PSP data or analyses in printed publications or in online resources, the following acknowledgements must be included: (a) the words "PhosphoSitePlus(R), www.phosphosite.org" must be included at appropriate places in the text or webpage, and (b) the following citation must be included in the bibliography: "Hornbeck PV, Zhang B, Murray B, Kornhauser JM, Latham V, Skrzypek E PhosphoSitePlus, 2014: mutations, PTMs and recalibrations. Nucleic Acids Res. 2015 43:D512-20. PMID: 25514926."'
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
332 CITATION_INSERT_PSP_REF = 'Hornbeck, 2014, "PhosphoSitePlus, 2014: mutations, PTMs and recalibrations.", https://pubmed.ncbi.nlm.nih.gov/22135298, https://doi.org/10.1093/nar/gkr1122'
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
333
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
334 MRGFLTR_METADATA_COLUMNS = [
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
335 "ppep_id",
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
336 "Sequence10",
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
337 "Sequence7",
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
338 "GeneName",
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
339 "Phosphoresidue",
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
340 "UniProtID",
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
341 "Description",
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
342 "FunctionPhosphoresidue",
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
343 "PutativeUpstreamDomains",
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
344 ]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
345
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
346 # String Constants (end) ############
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
347
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
348 class Error(Exception):
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
349 """Base class for exceptions in this module."""
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
350
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
351 pass
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
352
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
353 class PreconditionError(Error):
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
354 """Exception raised for errors in the input.
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
355
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
356 Attributes:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
357 expression -- input expression in which the error occurred
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
358 message -- explanation of the error
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
359 """
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
360
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
361 def __init__(self, expression, message):
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
362 self.expression = expression
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
363 self.message = message
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
364
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
365 # start_time = time.clock() #timer
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
366 start_time = time.process_time() # timer
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
367
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
368 # get keys from upstream tabular file using readline()
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
369 # ref: https://stackoverflow.com/a/16713581/15509512
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
370 # answer to "Use codecs to read file with correct encoding"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
371 file1_encoded = open(upstream_map_filename_tab, "rb")
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
372 file1 = cx_getreader("latin-1")(file1_encoded)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
373
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
374 count = 0
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
375 upstream_map_p_peptide_list = []
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
376 re_tab = re.compile("^[^\t]*")
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
377 while True:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
378 count += 1
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
379 # Get next line from file
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
380 line = file1.readline()
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
381 # if line is empty
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
382 # end of file is reached
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
383 if not line:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
384 break
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
385 if count > 1:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
386 m = re_tab.match(line)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
387 upstream_map_p_peptide_list.append(m[0])
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
388 file1.close()
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
389 file1_encoded.close()
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
390
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
391 # Get the list of phosphopeptides with the p's that represent the phosphorylation sites removed
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
392 re_phos = re.compile("p")
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
393
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
394 end_time = time.process_time() # timer
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
395 print(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
396 "%0.6f pre-read-SwissProt [0.1]" % (end_time - start_time,),
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
397 file=sys.stderr,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
398 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
399
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
400 # ----------- Get SwissProt data from SQLite database (start) -----------
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
401 # build UniProt sequence LUT and list of unique SwissProt sequences
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
402
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
403 # Open SwissProt SQLite database
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
404 conn = sql.connect(uniprot_sqlite)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
405 cur = conn.cursor()
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
406
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
407 # Set up structures to hold SwissProt data
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
408
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
409 uniprot_Sequence_List = []
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
410 UniProtSeqLUT = {}
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
411
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
412 # Execute query for unique seqs without fetching the results yet
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
413 uniprot_unique_seq_cur = cur.execute(UNIPROT_UNIQUE_SEQ_SQL)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
414
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
415 while 1:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
416 batch = uniprot_unique_seq_cur.fetchmany(size=50)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
417 if not batch:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
418 # handle case where no records are returned
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
419 break
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
420 for row in batch:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
421 Sequence = row[0]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
422 UniProtSeqLUT[(Sequence, DESCRIPTION)] = []
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
423 UniProtSeqLUT[(Sequence, GENE_NAME)] = []
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
424 UniProtSeqLUT[(Sequence, UNIPROT_ID)] = []
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
425 UniProtSeqLUT[Sequence] = []
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
426
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
427 # Execute query for seqs and metadata without fetching the results yet
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
428 uniprot_seq_and_meta = cur.execute(UNIPROT_SEQ_AND_META_SQL)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
429
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
430 while 1:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
431 batch = uniprot_seq_and_meta.fetchmany(size=50)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
432 if not batch:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
433 # handle case where no records are returned
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
434 break
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
435 for (
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
436 UniProt_ID,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
437 Description,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
438 Gene_Name,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
439 Sequence,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
440 OS,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
441 OX,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
442 PE,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
443 SV,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
444 ) in batch:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
445 uniprot_Sequence_List.append(Sequence)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
446 UniProtSeqLUT[Sequence] = Sequence
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
447 UniProtSeqLUT[(Sequence, UNIPROT_ID)].append(UniProt_ID)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
448 UniProtSeqLUT[(Sequence, GENE_NAME)].append(Gene_Name)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
449 if OS != N_A:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
450 Description += " OS=" + OS
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
451 if OX != -1:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
452 Description += " OX=" + str(OX)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
453 if Gene_Name != N_A:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
454 Description += " GN=" + Gene_Name
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
455 if PE != N_A:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
456 Description += " PE=" + PE
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
457 if SV != N_A:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
458 Description += " SV=" + SV
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
459 UniProtSeqLUT[(Sequence, DESCRIPTION)].append(Description)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
460
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
461 # Close SwissProt SQLite database; clean up local variables
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
462 conn.close()
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
463 Sequence = ""
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
464 UniProt_ID = ""
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
465 Description = ""
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
466 Gene_Name = ""
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
467
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
468 # ----------- Get SwissProt data from SQLite database (finish) -----------
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
469
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
470 end_time = time.process_time() # timer
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
471 print(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
472 "%0.6f post-read-SwissProt [0.2]" % (end_time - start_time,),
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
473 file=sys.stderr,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
474 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
475
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
476 # ----------- Get SwissProt data from SQLite database (start) -----------
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
477 # Open SwissProt SQLite database
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
478 conn = sql.connect(uniprot_sqlite)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
479 cur = conn.cursor()
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
480
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
481 # Set up dictionary to aggregate results for phosphopeptides correspounding to dephosphoeptide
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
482 DephosphoPep_UniProtSeq_LUT = {}
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
483
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
484 # Set up dictionary to accumulate results
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
485 PhosphoPep_UniProtSeq_LUT = {}
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
486
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
487 # Execute query for tuples without fetching the results yet
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
488 ppep_pep_uniprotseq_cur = cur.execute(PPEP_PEP_UNIPROTSEQ_SQL)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
489
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
490 while 1:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
491 batch = ppep_pep_uniprotseq_cur.fetchmany(size=50)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
492 if not batch:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
493 # handle case where no records are returned
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
494 break
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
495 for (phospho_pep, dephospho_pep, sequence) in batch:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
496 # do interesting stuff here...
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
497 PhosphoPep_UniProtSeq_LUT[phospho_pep] = phospho_pep
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
498 PhosphoPep_UniProtSeq_LUT[
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
499 (phospho_pep, DEPHOSPHOPEP)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
500 ] = dephospho_pep
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
501 if dephospho_pep not in DephosphoPep_UniProtSeq_LUT:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
502 DephosphoPep_UniProtSeq_LUT[dephospho_pep] = set()
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
503 DephosphoPep_UniProtSeq_LUT[
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
504 (dephospho_pep, DESCRIPTION)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
505 ] = []
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
506 DephosphoPep_UniProtSeq_LUT[
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
507 (dephospho_pep, GENE_NAME)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
508 ] = []
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
509 DephosphoPep_UniProtSeq_LUT[
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
510 (dephospho_pep, UNIPROT_ID)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
511 ] = []
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
512 DephosphoPep_UniProtSeq_LUT[(dephospho_pep, SEQUENCE)] = []
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
513 DephosphoPep_UniProtSeq_LUT[dephospho_pep].add(phospho_pep)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
514
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
515 if (
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
516 sequence
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
517 not in DephosphoPep_UniProtSeq_LUT[
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
518 (dephospho_pep, SEQUENCE)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
519 ]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
520 ):
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
521 DephosphoPep_UniProtSeq_LUT[
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
522 (dephospho_pep, SEQUENCE)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
523 ].append(sequence)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
524 for phospho_pep in DephosphoPep_UniProtSeq_LUT[dephospho_pep]:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
525 if phospho_pep != phospho_pep:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
526 print(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
527 "phospho_pep:'%s' phospho_pep:'%s'"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
528 % (phospho_pep, phospho_pep)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
529 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
530 if phospho_pep not in PhosphoPep_UniProtSeq_LUT:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
531 PhosphoPep_UniProtSeq_LUT[phospho_pep] = phospho_pep
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
532 PhosphoPep_UniProtSeq_LUT[
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
533 (phospho_pep, DEPHOSPHOPEP)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
534 ] = dephospho_pep
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
535 r = list(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
536 zip(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
537 [s for s in UniProtSeqLUT[(sequence, UNIPROT_ID)]],
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
538 [s for s in UniProtSeqLUT[(sequence, GENE_NAME)]],
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
539 [
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
540 s
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
541 for s in UniProtSeqLUT[(sequence, DESCRIPTION)]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
542 ],
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
543 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
544 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
545 # Sort by `UniProt_ID`
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
546 # ref: https://stackoverflow.com/a/4174955/15509512
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
547 r = sorted(r, key=operator.itemgetter(0))
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
548 # Get one tuple for each `phospho_pep`
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
549 # in DephosphoPep_UniProtSeq_LUT[dephospho_pep]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
550 for (upid, gn, desc) in r:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
551 # Append pseudo-tuple per UniProt_ID but only when it is not present
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
552 if (
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
553 upid
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
554 not in DephosphoPep_UniProtSeq_LUT[
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
555 (dephospho_pep, UNIPROT_ID)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
556 ]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
557 ):
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
558 DephosphoPep_UniProtSeq_LUT[
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
559 (dephospho_pep, UNIPROT_ID)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
560 ].append(upid)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
561 DephosphoPep_UniProtSeq_LUT[
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
562 (dephospho_pep, DESCRIPTION)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
563 ].append(desc)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
564 DephosphoPep_UniProtSeq_LUT[
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
565 (dephospho_pep, GENE_NAME)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
566 ].append(gn)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
567
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
568 # Close SwissProt SQLite database; clean up local variables
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
569 conn.close()
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
570 # wipe local variables
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
571 phospho_pep = dephospho_pep = sequence = 0
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
572 upid = gn = desc = r = ""
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
573
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
574 # ----------- Get SwissProt data from SQLite database (finish) -----------
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
575
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
576 end_time = time.process_time() # timer
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
577 print(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
578 "%0.6f finished reading and decoding '%s' [0.4]"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
579 % (end_time - start_time, upstream_map_filename_tab),
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
580 file=sys.stderr,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
581 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
582
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
583 print(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
584 "{:>10} unique upstream phosphopeptides tested".format(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
585 str(len(upstream_map_p_peptide_list))
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
586 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
587 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
588
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
589 # Read in Upstream tabular file
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
590 # We are discarding the intensity data; so read it as text
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
591 upstream_data = pandas.read_table(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
592 upstream_map_filename_tab, dtype="str", index_col=0
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
593 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
594
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
595 end_time = time.process_time() # timer
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
596 print(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
597 "%0.6f read Upstream Map from file [1g_1]"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
598 % (end_time - start_time,),
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
599 file=sys.stderr,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
600 ) # timer
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
601
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
602 upstream_data.index = upstream_map_p_peptide_list
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
603
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
604 end_time = time.process_time() # timer
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
605 print(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
606 "%0.6f added index to Upstream Map [1g_2]"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
607 % (end_time - start_time,),
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
608 file=sys.stderr,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
609 ) # timer
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
610
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
611 # ########################################################################
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
612 # # trim upstream_data to include only the upstream map columns
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
613 # old_cols = upstream_data.columns.tolist()
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
614 # i = 0
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
615 # first_intensity = -1
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
616 # last_intensity = -1
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
617 # intensity_re = re.compile("Intensity.*")
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
618 # for col_name in old_cols:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
619 # m = intensity_re.match(col_name)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
620 # if m:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
621 # last_intensity = i
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
622 # if first_intensity == -1:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
623 # first_intensity = i
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
624 # i += 1
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
625 # # print('last intensity = %d' % last_intensity)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
626 # col_PKCalpha = last_intensity + 2
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
627 #
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
628 # data_in_cols = [old_cols[0]] + old_cols[
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
629 # first_intensity: last_intensity + 1
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
630 # ]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
631 #
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
632 # if upstream_data.empty:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
633 # print("upstream_data is empty")
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
634 # exit(0)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
635 #
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
636 # data_in = upstream_data.copy(deep=True)[data_in_cols]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
637 ########################################################################
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
638 # trim upstream_data to include only the upstream map columns
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
639 old_cols = upstream_data.columns.tolist()
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
640 i = 0
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
641 first_intensity = -1
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
642 last_intensity = -1
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
643 intensity_re = re.compile("Intensity.*")
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
644 for col_name in old_cols:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
645 m = intensity_re.match(col_name)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
646 if m:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
647 last_intensity = i
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
648 if first_intensity == -1:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
649 first_intensity = i
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
650 i += 1
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
651 # print('last intensity = %d' % last_intensity)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
652 col_PKCalpha = last_intensity + 2
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
653
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
654 data_in_cols = [old_cols[0]] + old_cols[
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
655 first_intensity - 1: last_intensity
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
656 ]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
657 data_col_names = [old_cols[0]] + old_cols[
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
658 first_intensity: last_intensity + 1
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
659 ]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
660
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
661 if upstream_data.empty:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
662 print("upstream_data is empty")
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
663 exit(0)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
664
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
665 data_in = upstream_data.copy(deep=True)[data_in_cols]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
666 data_in.columns = data_col_names
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
667 print("data_in")
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
668 print(data_in)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
669 ########################################################################
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
670
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
671 # Convert floating-point integers to int64 integers
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
672 # ref: https://stackoverflow.com/a/68497603/15509512
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
673 data_in[list(data_in.columns[1:])] = (
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
674 data_in[list(data_in.columns[1:])]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
675 .astype("float64")
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
676 .apply(np.int64)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
677 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
678
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
679 # create another phosphopeptide column that will be used to join later;
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
680 # MAY need to change depending on Phosphopeptide column position
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
681 # data_in[PHOSPHOPEPTIDE_MATCH] = data_in[data_in.columns.tolist()[0]]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
682 data_in[PHOSPHOPEPTIDE_MATCH] = data_in.index
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
683
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
684 end_time = time.process_time() # timer
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
685 print(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
686 "%0.6f set data_in[PHOSPHOPEPTIDE_MATCH] [A]"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
687 % (end_time - start_time,),
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
688 file=sys.stderr,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
689 ) # timer
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
690
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
691 # Produce a dictionary of metadata for a single phosphopeptide.
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
692 # This is a replacement of `UniProtInfo_subdict` in the original code.
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
693 def pseq_to_subdict(phospho_pep):
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
694 # Strip "p" from phosphopeptide sequence
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
695 dephospho_pep = re_phos.sub("", phospho_pep)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
696
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
697 # Determine number of phosphoresidues in phosphopeptide
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
698 numps = len(phospho_pep) - len(dephospho_pep)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
699
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
700 # Determine location(s) of phosphoresidue(s) in phosphopeptide
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
701 # (used later for Phosphoresidue, Sequence7, and Sequence10)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
702 ploc = [] # list of p locations
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
703 i = 0
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
704 p = phospho_pep
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
705 while i < numps:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
706 ploc.append(p.find("p"))
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
707 p = p[: p.find("p")] + p[p.find("p") + 1:]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
708 i += 1
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
709
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
710 # Establish nested dictionary
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
711 result = {}
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
712 result[SEQUENCE] = []
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
713 result[UNIPROT_ID] = []
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
714 result[DESCRIPTION] = []
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
715 result[GENE_NAME] = []
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
716 result[PHOSPHORESIDUE] = []
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
717 result[SEQUENCE7] = []
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
718 result[SEQUENCE10] = []
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
719
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
720 # Add stripped sequence to dictionary
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
721 result[SEQUENCE].append(dephospho_pep)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
722
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
723 # Locate phospho_pep in PhosphoPep_UniProtSeq_LUT
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
724 # Caller may elect to:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
725 # try:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
726 # ...
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
727 # except PreconditionError as pe:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
728 # print("'{expression}': {message}".format(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
729 # expression = pe.expression,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
730 # message = pe.message))
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
731 # )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
732 # )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
733 if phospho_pep not in PhosphoPep_UniProtSeq_LUT:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
734 raise PreconditionError(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
735 phospho_pep,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
736 "no matching phosphopeptide found in PhosphoPep_UniProtSeq_LUT",
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
737 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
738 if dephospho_pep not in DephosphoPep_UniProtSeq_LUT:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
739 raise PreconditionError(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
740 dephospho_pep,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
741 "dephosphorylated phosphopeptide not found in DephosphoPep_UniProtSeq_LUT",
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
742 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
743 if (
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
744 dephospho_pep != PhosphoPep_UniProtSeq_LUT[(phospho_pep, DEPHOSPHOPEP)]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
745 ):
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
746 my_err_msg = "dephosphorylated phosphopeptide does not match "
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
747 my_err_msg += "PhosphoPep_UniProtSeq_LUT[(phospho_pep,DEPHOSPHOPEP)] = "
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
748 my_err_msg += PhosphoPep_UniProtSeq_LUT[(phospho_pep, DEPHOSPHOPEP)]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
749 raise PreconditionError(dephospho_pep, my_err_msg)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
750
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
751 result[SEQUENCE] = [dephospho_pep]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
752 result[UNIPROT_ID] = DephosphoPep_UniProtSeq_LUT[
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
753 (dephospho_pep, UNIPROT_ID)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
754 ]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
755 result[DESCRIPTION] = DephosphoPep_UniProtSeq_LUT[
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
756 (dephospho_pep, DESCRIPTION)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
757 ]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
758 result[GENE_NAME] = DephosphoPep_UniProtSeq_LUT[
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
759 (dephospho_pep, GENE_NAME)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
760 ]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
761 if (dephospho_pep, SEQUENCE) not in DephosphoPep_UniProtSeq_LUT:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
762 raise PreconditionError(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
763 dephospho_pep,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
764 "no matching phosphopeptide found in DephosphoPep_UniProtSeq_LUT",
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
765 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
766 UniProtSeqList = DephosphoPep_UniProtSeq_LUT[
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
767 (dephospho_pep, SEQUENCE)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
768 ]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
769 if len(UniProtSeqList) < 1:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
770 print(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
771 "Skipping DephosphoPep_UniProtSeq_LUT[('%s',SEQUENCE)] because value has zero length"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
772 % dephospho_pep
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
773 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
774 # raise PreconditionError(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
775 # "DephosphoPep_UniProtSeq_LUT[('" + dephospho_pep + ",SEQUENCE)",
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
776 # 'value has zero length'
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
777 # )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
778 for UniProtSeq in UniProtSeqList:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
779 i = 0
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
780 phosphoresidues = []
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
781 seq7s_set = set()
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
782 seq7s = []
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
783 seq10s_set = set()
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
784 seq10s = []
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
785 while i < len(ploc):
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
786 start = UniProtSeq.find(dephospho_pep)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
787 # handle case where no sequence was found for dep-pep
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
788 if start < 0:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
789 i += 1
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
790 continue
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
791 psite = (
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
792 start + ploc[i]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
793 ) # location of phosphoresidue on protein sequence
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
794
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
795 # add Phosphoresidue
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
796 phosphosite = "p" + str(UniProtSeq)[psite] + str(psite + 1)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
797 phosphoresidues.append(phosphosite)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
798
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
799 # Add Sequence7
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
800 if psite < 7: # phospho_pep at N terminus
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
801 seq7 = str(UniProtSeq)[: psite + 8]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
802 if seq7[psite] == "S": # if phosphosresidue is serine
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
803 pres = "s"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
804 elif (
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
805 seq7[psite] == "T"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
806 ): # if phosphosresidue is threonine
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
807 pres = "t"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
808 elif (
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
809 seq7[psite] == "Y"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
810 ): # if phosphoresidue is tyrosine
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
811 pres = "y"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
812 else: # if not pSTY
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
813 pres = "?"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
814 seq7 = (
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
815 seq7[:psite] + pres + seq7[psite + 1: psite + 8]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
816 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
817 while (
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
818 len(seq7) < 15
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
819 ): # add appropriate number of "_" to the front
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
820 seq7 = "_" + seq7
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
821 elif (
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
822 len(UniProtSeq) - psite < 8
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
823 ): # phospho_pep at C terminus
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
824 seq7 = str(UniProtSeq)[psite - 7:]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
825 if seq7[7] == "S":
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
826 pres = "s"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
827 elif seq7[7] == "T":
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
828 pres = "t"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
829 elif seq7[7] == "Y":
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
830 pres = "y"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
831 else:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
832 pres = "?"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
833 seq7 = seq7[:7] + pres + seq7[8:]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
834 while (
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
835 len(seq7) < 15
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
836 ): # add appropriate number of "_" to the back
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
837 seq7 = seq7 + "_"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
838 else:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
839 seq7 = str(UniProtSeq)[psite - 7: psite + 8]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
840 pres = "" # phosphoresidue
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
841 if seq7[7] == "S": # if phosphosresidue is serine
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
842 pres = "s"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
843 elif seq7[7] == "T": # if phosphosresidue is threonine
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
844 pres = "t"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
845 elif seq7[7] == "Y": # if phosphoresidue is tyrosine
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
846 pres = "y"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
847 else: # if not pSTY
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
848 pres = "?"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
849 seq7 = seq7[:7] + pres + seq7[8:]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
850 if seq7 not in seq7s_set:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
851 seq7s.append(seq7)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
852 seq7s_set.add(seq7)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
853
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
854 # add Sequence10
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
855 if psite < 10: # phospho_pep at N terminus
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
856 seq10 = (
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
857 str(UniProtSeq)[:psite] + "p" + str(UniProtSeq)[psite: psite + 11]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
858 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
859 elif (
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
860 len(UniProtSeq) - psite < 11
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
861 ): # phospho_pep at C terminus
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
862 seq10 = (
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
863 str(UniProtSeq)[psite - 10: psite] + "p" + str(UniProtSeq)[psite:]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
864 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
865 else:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
866 seq10 = str(UniProtSeq)[psite - 10: psite + 11]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
867 seq10 = seq10[:10] + "p" + seq10[10:]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
868 if seq10 not in seq10s_set:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
869 seq10s.append(seq10)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
870 seq10s_set.add(seq10)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
871
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
872 i += 1
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
873
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
874 result[PHOSPHORESIDUE].append(phosphoresidues)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
875 result[SEQUENCE7].append(seq7s)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
876 # result[SEQUENCE10] is a list of lists of strings
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
877 result[SEQUENCE10].append(seq10s)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
878
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
879 r = list(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
880 zip(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
881 result[UNIPROT_ID],
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
882 result[GENE_NAME],
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
883 result[DESCRIPTION],
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
884 result[PHOSPHORESIDUE],
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
885 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
886 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
887 # Sort by `UniProt_ID`
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
888 # ref: https://stackoverflow.com//4174955/15509512
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
889 s = sorted(r, key=operator.itemgetter(0))
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
890
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
891 result[UNIPROT_ID] = []
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
892 result[GENE_NAME] = []
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
893 result[DESCRIPTION] = []
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
894 result[PHOSPHORESIDUE] = []
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
895
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
896 for r in s:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
897 result[UNIPROT_ID].append(r[0])
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
898 result[GENE_NAME].append(r[1])
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
899 result[DESCRIPTION].append(r[2])
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
900 result[PHOSPHORESIDUE].append(r[3])
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
901
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
902 # convert lists to strings in the dictionary
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
903 for key, value in result.items():
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
904 if key not in [PHOSPHORESIDUE, SEQUENCE7, SEQUENCE10]:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
905 result[key] = "; ".join(map(str, value))
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
906 elif key in [SEQUENCE10]:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
907 # result[SEQUENCE10] is a list of lists of strings
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
908 joined_value = ""
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
909 joined_set = set()
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
910 sep = ""
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
911 for valL in value:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
912 # valL is a list of strings
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
913 for val in valL:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
914 # val is a string
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
915 if val not in joined_set:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
916 joined_set.add(val)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
917 joined_value += sep + val
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
918 sep = "; "
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
919 # joined_value is a string
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
920 result[key] = joined_value
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
921
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
922 newstring = "; ".join(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
923 [", ".join(prez) for prez in result[PHOSPHORESIDUE]]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
924 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
925 # #separate the isoforms in PHOSPHORESIDUE column with ";"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
926 # oldstring = result[PHOSPHORESIDUE]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
927 # oldlist = list(oldstring)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
928 # newstring = ""
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
929 # i = 0
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
930 # for e in oldlist:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
931 # if e == ";":
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
932 # if numps > 1:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
933 # if i%numps:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
934 # newstring = newstring + ";"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
935 # else:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
936 # newstring = newstring + ","
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
937 # else:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
938 # newstring = newstring + ";"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
939 # i +=1
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
940 # else:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
941 # newstring = newstring + e
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
942 result[PHOSPHORESIDUE] = newstring
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
943
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
944 # separate sequence7's by |
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
945 oldstring = result[SEQUENCE7]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
946 oldlist = oldstring
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
947 newstring = ""
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
948 for ol in oldlist:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
949 for e in ol:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
950 if e == ";":
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
951 newstring = newstring + " |"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
952 elif len(newstring) > 0 and 1 > newstring.count(e):
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
953 newstring = newstring + " | " + e
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
954 elif 1 > newstring.count(e):
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
955 newstring = newstring + e
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
956 result[SEQUENCE7] = newstring
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
957
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
958 return [phospho_pep, result]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
959
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
960 # Construct list of [string, dictionary] lists
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
961 # where the dictionary provides the SwissProt metadata
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
962 # for a phosphopeptide
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
963 result_list = [
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
964 whine(pseq_to_subdict, psequence)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
965 for psequence in data_in[PHOSPHOPEPTIDE_MATCH]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
966 ]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
967
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
968 end_time = time.process_time() # timer
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
969 print(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
970 "%0.6f added SwissProt annotations to phosphopeptides [B]"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
971 % (end_time - start_time,),
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
972 file=sys.stderr,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
973 ) # timer
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
974
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
975 # Construct dictionary from list of lists
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
976 # ref: https://www.8bitavenue.com/how-to-convert-list-of-lists-to-dictionary-in-python/
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
977 UniProt_Info = {
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
978 result[0]: result[1]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
979 for result in result_list
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
980 if result is not None
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
981 }
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
982
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
983 end_time = time.process_time() # timer
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
984 print(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
985 "%0.6f create dictionary mapping phosphopeptide to metadata dictionary [C]"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
986 % (end_time - start_time,),
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
987 file=sys.stderr,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
988 ) # timer
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
989
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
990 # cosmetic: add N_A to phosphopeptide rows with no hits
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
991 p_peptide_list = []
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
992 for key in UniProt_Info:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
993 p_peptide_list.append(key)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
994 for nestedKey in UniProt_Info[key]:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
995 if UniProt_Info[key][nestedKey] == "":
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
996 UniProt_Info[key][nestedKey] = N_A
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
997
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
998 end_time = time.process_time() # timer
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
999 print(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1000 "%0.6f performed cosmetic clean-up [D]" % (end_time - start_time,),
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1001 file=sys.stderr,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1002 ) # timer
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1003
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1004 # convert UniProt_Info dictionary to dataframe
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1005 uniprot_df = pandas.DataFrame.transpose(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1006 pandas.DataFrame.from_dict(UniProt_Info)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1007 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1008
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1009 # reorder columns to match expected output file
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1010 uniprot_df[
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1011 PHOSPHOPEPTIDE
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1012 ] = uniprot_df.index # make index a column too
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1013
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1014 cols = uniprot_df.columns.tolist()
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1015 # cols = [cols[-1]]+cols[4:6]+[cols[1]]+[cols[2]]+[cols[6]]+[cols[0]]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1016 # uniprot_df = uniprot_df[cols]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1017 uniprot_df = uniprot_df[
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1018 [
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1019 PHOSPHOPEPTIDE,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1020 SEQUENCE10,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1021 SEQUENCE7,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1022 GENE_NAME,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1023 PHOSPHORESIDUE,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1024 UNIPROT_ID,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1025 DESCRIPTION,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1026 ]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1027 ]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1028
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1029 end_time = time.process_time() # timer
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1030 print(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1031 "%0.6f reordered columns to match expected output file [1]"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1032 % (end_time - start_time,),
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1033 file=sys.stderr,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1034 ) # timer
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1035
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1036 # concat to split then groupby to collapse
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1037 seq7_df = pandas.concat(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1038 [
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1039 pandas.Series(row[PHOSPHOPEPTIDE], row[SEQUENCE7].split(" | "))
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1040 for _, row in uniprot_df.iterrows()
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1041 ]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1042 ).reset_index()
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1043 seq7_df.columns = [SEQUENCE7, PHOSPHOPEPTIDE]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1044
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1045 # --- -------------- begin read PSP_Regulatory_sites ---------------------------------
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1046 # read in PhosphoSitePlus Regulatory Sites dataset
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1047 # ----------- Get PhosphoSitePlus Regulatory Sites data from SQLite database (start) -----------
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1048 conn = sql.connect(uniprot_sqlite)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1049 regsites_df = pandas.read_sql_query(PSP_REGSITE_SQL, conn)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1050 # Close SwissProt SQLite database
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1051 conn.close()
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1052 # ... -------------- end read PSP_Regulatory_sites ------------------------------------
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1053
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1054 # keep only the human entries in dataframe
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1055 if len(species) > 0:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1056 print(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1057 'Limit PhosphoSitesPlus records to species "' + species + '"'
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1058 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1059 regsites_df = regsites_df[regsites_df.ORGANISM == species]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1060
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1061 # merge the seq7 df with the regsites df based off of the sequence7
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1062 merge_df = seq7_df.merge(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1063 regsites_df,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1064 left_on=SEQUENCE7,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1065 right_on=SITE_PLUSMINUS_7AA_SQL,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1066 how="left",
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1067 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1068
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1069 # after merging df, select only the columns of interest;
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1070 # note that PROTEIN is absent here
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1071 merge_df = merge_df[
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1072 [
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1073 PHOSPHOPEPTIDE,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1074 SEQUENCE7,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1075 ON_FUNCTION,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1076 ON_PROCESS,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1077 ON_PROT_INTERACT,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1078 ON_OTHER_INTERACT,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1079 ON_NOTES,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1080 ]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1081 ]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1082 # combine column values of interest
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1083 # into one FUNCTION_PHOSPHORESIDUE column"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1084 merge_df[FUNCTION_PHOSPHORESIDUE] = merge_df[ON_FUNCTION].str.cat(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1085 merge_df[ON_PROCESS], sep="; ", na_rep=""
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1086 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1087 merge_df[FUNCTION_PHOSPHORESIDUE] = merge_df[
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1088 FUNCTION_PHOSPHORESIDUE
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1089 ].str.cat(merge_df[ON_PROT_INTERACT], sep="; ", na_rep="")
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1090 merge_df[FUNCTION_PHOSPHORESIDUE] = merge_df[
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1091 FUNCTION_PHOSPHORESIDUE
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1092 ].str.cat(merge_df[ON_OTHER_INTERACT], sep="; ", na_rep="")
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1093 merge_df[FUNCTION_PHOSPHORESIDUE] = merge_df[
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1094 FUNCTION_PHOSPHORESIDUE
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1095 ].str.cat(merge_df[ON_NOTES], sep="; ", na_rep="")
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1096
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1097 # remove the columns that were combined
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1098 merge_df = merge_df[
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1099 [PHOSPHOPEPTIDE, SEQUENCE7, FUNCTION_PHOSPHORESIDUE]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1100 ]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1101
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1102 end_time = time.process_time() # timer
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1103 print(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1104 "%0.6f merge regsite metadata [1a]" % (end_time - start_time,),
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1105 file=sys.stderr,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1106 ) # timer
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1107
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1108 # cosmetic changes to Function Phosphoresidue column
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1109 fp_series = pandas.Series(merge_df[FUNCTION_PHOSPHORESIDUE])
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1110
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1111 end_time = time.process_time() # timer
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1112 print(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1113 "%0.6f more cosmetic changes [1b]" % (end_time - start_time,),
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1114 file=sys.stderr,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1115 ) # timer
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1116
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1117 i = 0
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1118 while i < len(fp_series):
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1119 # remove the extra ";" so that it looks more professional
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1120 if fp_series[i] == "; ; ; ; ": # remove ; from empty hits
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1121 fp_series[i] = ""
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1122 while fp_series[i].endswith("; "): # remove ; from the ends
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1123 fp_series[i] = fp_series[i][:-2]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1124 while fp_series[i].startswith("; "): # remove ; from the beginning
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1125 fp_series[i] = fp_series[i][2:]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1126 fp_series[i] = fp_series[i].replace("; ; ; ; ", "; ")
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1127 fp_series[i] = fp_series[i].replace("; ; ; ", "; ")
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1128 fp_series[i] = fp_series[i].replace("; ; ", "; ")
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1129
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1130 # turn blanks into N_A to signify the info was searched for but cannot be found
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1131 if fp_series[i] == "":
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1132 fp_series[i] = N_A
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1133
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1134 i += 1
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1135 merge_df[FUNCTION_PHOSPHORESIDUE] = fp_series
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1136
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1137 end_time = time.process_time() # timer
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1138 print(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1139 "%0.6f cleaned up semicolons [1c]" % (end_time - start_time,),
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1140 file=sys.stderr,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1141 ) # timer
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1142
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1143 # merge uniprot df with merge df
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1144 uniprot_regsites_merged_df = uniprot_df.merge(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1145 merge_df,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1146 left_on=PHOSPHOPEPTIDE,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1147 right_on=PHOSPHOPEPTIDE,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1148 how="left",
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1149 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1150
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1151 # collapse the merged df
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1152 uniprot_regsites_collapsed_df = pandas.DataFrame(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1153 uniprot_regsites_merged_df.groupby(PHOSPHOPEPTIDE)[
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1154 FUNCTION_PHOSPHORESIDUE
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1155 ].apply(lambda x: ppep_join(x))
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1156 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1157 # .apply(lambda x: "%s" % ' | '.join(x)))
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1158
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1159 end_time = time.process_time() # timer
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1160 print(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1161 "%0.6f collapsed pandas dataframe [1d]" % (end_time - start_time,),
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1162 file=sys.stderr,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1163 ) # timer
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1164
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1165 uniprot_regsites_collapsed_df[
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1166 PHOSPHOPEPTIDE
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1167 ] = (
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1168 uniprot_regsites_collapsed_df.index
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1169 ) # add df index as its own column
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1170
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1171 # rename columns
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1172 uniprot_regsites_collapsed_df.columns = [
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1173 FUNCTION_PHOSPHORESIDUE,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1174 "ppp",
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1175 ]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1176
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1177 end_time = time.process_time() # timer
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1178 print(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1179 "%0.6f selected columns to be merged to uniprot_df [1e]"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1180 % (end_time - start_time,),
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1181 file=sys.stderr,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1182 ) # timer
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1183
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1184 # add columns based on Sequence7 matching site_+/-7_AA
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1185 uniprot_regsite_df = pandas.merge(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1186 left=uniprot_df,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1187 right=uniprot_regsites_collapsed_df,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1188 how="left",
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1189 left_on=PHOSPHOPEPTIDE,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1190 right_on="ppp",
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1191 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1192
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1193 end_time = time.process_time() # timer
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1194 print(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1195 "%0.6f added columns based on Sequence7 matching site_+/-7_AA [1f]"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1196 % (end_time - start_time,),
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1197 file=sys.stderr,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1198 ) # timer
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1199
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1200 data_in.rename(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1201 {"Protein description": PHOSPHOPEPTIDE},
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1202 axis="columns",
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1203 inplace=True,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1204 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1205
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1206 # data_in.sort_values(PHOSPHOPEPTIDE_MATCH, inplace=True, kind='mergesort')
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1207 res2 = sorted(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1208 data_in[PHOSPHOPEPTIDE_MATCH].tolist(), key=lambda s: s.casefold()
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1209 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1210 data_in = data_in.loc[res2]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1211
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1212 end_time = time.process_time() # timer
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1213 print(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1214 "%0.6f sorting time [1f]" % (end_time - start_time,),
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1215 file=sys.stderr,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1216 ) # timer
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1217
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1218 print("old_cols[:col_PKCalpha]")
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1219 print(old_cols[:col_PKCalpha])
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1220 cols = [old_cols[0]] + old_cols[col_PKCalpha - 1:]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1221 upstream_data = upstream_data[cols]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1222 print("upstream_data.columns")
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1223 print(upstream_data.columns)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1224
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1225 end_time = time.process_time() # timer
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1226 print(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1227 "%0.6f refactored columns for Upstream Map [1g]"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1228 % (end_time - start_time,),
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1229 file=sys.stderr,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1230 ) # timer
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1231
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1232 # #rename upstream columns in new list
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1233 # new_cols = []
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1234 # for name in cols:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1235 # if "_NetworKIN" in name:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1236 # name = name.split("_")[0]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1237 # if " motif" in name:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1238 # name = name.split(" motif")[0]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1239 # if " sequence " in name:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1240 # name = name.split(" sequence")[0]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1241 # if "_Phosida" in name:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1242 # name = name.split("_")[0]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1243 # if "_PhosphoSite" in name:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1244 # name = name.split("_")[0]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1245 # new_cols.append(name)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1246
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1247 # rename upstream columns in new list
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1248 def col_rename(name):
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1249 if "_NetworKIN" in name:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1250 name = name.split("_")[0]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1251 if " motif" in name:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1252 name = name.split(" motif")[0]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1253 if " sequence " in name:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1254 name = name.split(" sequence")[0]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1255 if "_Phosida" in name:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1256 name = name.split("_")[0]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1257 if "_PhosphoSite" in name:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1258 name = name.split("_")[0]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1259 return name
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1260
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1261 new_cols = [col_rename(col) for col in cols]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1262 upstream_data.columns = new_cols
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1263
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1264 end_time = time.process_time() # timer
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1265 print(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1266 "%0.6f renamed columns for Upstream Map [1h_1]"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1267 % (end_time - start_time,),
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1268 file=sys.stderr,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1269 ) # timer
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1270
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1271 # Create upstream_data_cast as a copy of upstream_data
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1272 # but with first column substituted by the phosphopeptide sequence
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1273 upstream_data_cast = upstream_data.copy()
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1274 new_cols_cast = new_cols
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1275 new_cols_cast[0] = "p_peptide"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1276 upstream_data_cast.columns = new_cols_cast
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1277 upstream_data_cast["p_peptide"] = upstream_data.index
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1278
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1279 # --- -------------- begin read upstream_data_melt ------------------------------------
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1280 # ----------- Get melted kinase mapping data from SQLite database (start) -----------
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1281 conn = sql.connect(uniprot_sqlite)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1282 upstream_data_melt_df = pandas.read_sql_query(PPEP_MELT_SQL, conn)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1283 # Close SwissProt SQLite database
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1284 conn.close()
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1285 upstream_data_melt = upstream_data_melt_df.copy()
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1286 upstream_data_melt.columns = ["p_peptide", "characterization", "X"]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1287 upstream_data_melt["characterization"] = [
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1288 col_rename(s) for s in upstream_data_melt["characterization"]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1289 ]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1290
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1291 print(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1292 "%0.6f upstream_data_melt_df initially has %d rows"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1293 % (end_time - start_time, len(upstream_data_melt.axes[0])),
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1294 file=sys.stderr,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1295 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1296 # ref: https://stackoverflow.com/a/27360130/15509512
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1297 # e.g. df.drop(df[df.score < 50].index, inplace=True)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1298 upstream_data_melt.drop(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1299 upstream_data_melt[upstream_data_melt.X != "X"].index, inplace=True
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1300 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1301 print(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1302 "%0.6f upstream_data_melt_df pre-dedup has %d rows"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1303 % (end_time - start_time, len(upstream_data_melt.axes[0])),
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1304 file=sys.stderr,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1305 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1306 # ----------- Get melted kinase mapping data from SQLite database (finish) -----------
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1307 # ... -------------- end read upstream_data_melt --------------------------------------
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1308
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1309 end_time = time.process_time() # timer
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1310 print(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1311 "%0.6f melted and minimized Upstream Map dataframe [1h_2]"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1312 % (end_time - start_time,),
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1313 file=sys.stderr,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1314 ) # timer
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1315 # ... end read upstream_data_melt
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1316
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1317 end_time = time.process_time() # timer
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1318 print(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1319 "%0.6f indexed melted Upstream Map [1h_2a]"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1320 % (end_time - start_time,),
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1321 file=sys.stderr,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1322 ) # timer
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1323
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1324 upstream_delta_melt_LoL = upstream_data_melt.values.tolist()
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1325
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1326 melt_dict = {}
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1327 for key in upstream_map_p_peptide_list:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1328 melt_dict[key] = []
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1329
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1330 for el in upstream_delta_melt_LoL:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1331 (p_peptide, characterization, X) = tuple(el)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1332 if p_peptide in melt_dict:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1333 melt_dict[p_peptide].append(characterization)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1334 else:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1335 exit(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1336 'Phosphopeptide %s not found in ppep_mapping_db: "phopsphopeptides" and "ppep_mapping_db" must both originate from the same run of mqppep_kinase_mapping'
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1337 % (p_peptide)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1338 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1339
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1340 end_time = time.process_time() # timer
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1341 print(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1342 "%0.6f appended peptide characterizations [1h_2b]"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1343 % (end_time - start_time,),
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1344 file=sys.stderr,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1345 ) # timer
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1346
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1347 # for key in upstream_map_p_peptide_list:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1348 # melt_dict[key] = ' | '.join(melt_dict[key])
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1349
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1350 for key in upstream_map_p_peptide_list:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1351 melt_dict[key] = melt_join(melt_dict[key])
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1352
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1353 end_time = time.process_time() # timer
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1354 print(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1355 "%0.6f concatenated multiple characterizations [1h_2c]"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1356 % (end_time - start_time,),
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1357 file=sys.stderr,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1358 ) # timer
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1359
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1360 # map_dict is a dictionary of dictionaries
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1361 map_dict = {}
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1362 for key in upstream_map_p_peptide_list:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1363 map_dict[key] = {}
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1364 map_dict[key][PUTATIVE_UPSTREAM_DOMAINS] = melt_dict[key]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1365
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1366 end_time = time.process_time() # timer
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1367 print(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1368 "%0.6f instantiated map dictionary [2]" % (end_time - start_time,),
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1369 file=sys.stderr,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1370 ) # timer
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1371
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1372 # convert map_dict to dataframe
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1373 map_df = pandas.DataFrame.transpose(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1374 pandas.DataFrame.from_dict(map_dict)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1375 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1376 map_df["p-peptide"] = map_df.index # make index a column too
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1377 cols_map_df = map_df.columns.tolist()
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1378 cols_map_df = [cols_map_df[1]] + [cols_map_df[0]]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1379 map_df = map_df[cols_map_df]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1380
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1381 # join map_df to uniprot_regsite_df
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1382 output_df = uniprot_regsite_df.merge(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1383 map_df, how="left", left_on=PHOSPHOPEPTIDE, right_on="p-peptide"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1384 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1385
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1386 output_df = output_df[
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1387 [
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1388 PHOSPHOPEPTIDE,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1389 SEQUENCE10,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1390 SEQUENCE7,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1391 GENE_NAME,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1392 PHOSPHORESIDUE,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1393 UNIPROT_ID,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1394 DESCRIPTION,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1395 FUNCTION_PHOSPHORESIDUE,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1396 PUTATIVE_UPSTREAM_DOMAINS,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1397 ]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1398 ]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1399
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1400 # cols_output_prelim = output_df.columns.tolist()
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1401 #
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1402 # print("cols_output_prelim")
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1403 # print(cols_output_prelim)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1404 #
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1405 # cols_output = cols_output_prelim[:8]+[cols_output_prelim[9]]+[cols_output_prelim[10]]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1406 #
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1407 # print("cols_output with p-peptide")
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1408 # print(cols_output)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1409 #
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1410 # cols_output = [col for col in cols_output if not col == "p-peptide"]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1411 #
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1412 # print("cols_output")
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1413 # print(cols_output)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1414 #
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1415 # output_df = output_df[cols_output]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1416
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1417 # join output_df back to quantitative columns in data_in df
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1418 quant_cols = data_in.columns.tolist()
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1419 quant_cols = quant_cols[1:]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1420 quant_data = data_in[quant_cols]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1421
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1422 # ----------- Write merge/filter metadata to SQLite database (start) -----------
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1423 # Open SwissProt SQLite database
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1424 conn = sql.connect(output_sqlite)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1425 cur = conn.cursor()
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1426
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1427 cur.executescript(MRGFLTR_DDL)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1428
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1429 cur.execute(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1430 CITATION_INSERT_STMT,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1431 ("mrgfltr_metadata_view", CITATION_INSERT_PSP),
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1432 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1433 cur.execute(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1434 CITATION_INSERT_STMT, ("mrgfltr_metadata", CITATION_INSERT_PSP)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1435 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1436 cur.execute(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1437 CITATION_INSERT_STMT,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1438 ("mrgfltr_metadata_view", CITATION_INSERT_PSP_REF),
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1439 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1440 cur.execute(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1441 CITATION_INSERT_STMT, ("mrgfltr_metadata", CITATION_INSERT_PSP_REF)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1442 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1443
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1444 # Read ppep-to-sequence LUT
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1445 ppep_lut_df = pandas.read_sql_query(PPEP_ID_SQL, conn)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1446 # write only metadata for merged/filtered records to SQLite
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1447 mrgfltr_metadata_df = output_df.copy()
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1448 # replace phosphopeptide seq with ppep.id
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1449 mrgfltr_metadata_df = ppep_lut_df.merge(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1450 mrgfltr_metadata_df,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1451 left_on="ppep_seq",
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1452 right_on=PHOSPHOPEPTIDE,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1453 how="inner",
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1454 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1455 mrgfltr_metadata_df.drop(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1456 columns=[PHOSPHOPEPTIDE, "ppep_seq"], inplace=True
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1457 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1458 # rename columns
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1459 mrgfltr_metadata_df.columns = MRGFLTR_METADATA_COLUMNS
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1460 mrgfltr_metadata_df.to_sql(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1461 "mrgfltr_metadata",
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1462 con=conn,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1463 if_exists="append",
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1464 index=False,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1465 method="multi",
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1466 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1467
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1468 # Close SwissProt SQLite database
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1469 conn.close()
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1470 # ----------- Write merge/filter metadata to SQLite database (finish) -----------
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1471
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1472 output_df = output_df.merge(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1473 quant_data,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1474 how="right",
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1475 left_on=PHOSPHOPEPTIDE,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1476 right_on=PHOSPHOPEPTIDE_MATCH,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1477 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1478 output_cols = output_df.columns.tolist()
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1479 output_cols = output_cols[:-1]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1480 output_df = output_df[output_cols]
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1481
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1482 # cosmetic changes to Upstream column
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1483 output_df[PUTATIVE_UPSTREAM_DOMAINS] = output_df[
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1484 PUTATIVE_UPSTREAM_DOMAINS
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1485 ].fillna(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1486 ""
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1487 ) # fill the NaN with "" for those Phosphopeptides that got a "WARNING: Failed match for " in the upstream mapping
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1488 us_series = pandas.Series(output_df[PUTATIVE_UPSTREAM_DOMAINS])
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1489 i = 0
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1490 while i < len(us_series):
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1491 # turn blanks into N_A to signify the info was searched for but cannot be found
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1492 if us_series[i] == "":
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1493 us_series[i] = N_A
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1494 i += 1
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1495 output_df[PUTATIVE_UPSTREAM_DOMAINS] = us_series
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1496
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1497 end_time = time.process_time() # timer
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1498 print(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1499 "%0.6f establisheed output [3]" % (end_time - start_time,),
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1500 file=sys.stderr,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1501 ) # timer
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1502
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1503 (output_rows, output_cols) = output_df.shape
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1504
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1505 output_df = output_df.convert_dtypes(convert_integer=True)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1506
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1507 # Output onto Final CSV file
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1508 output_df.to_csv(output_filename_csv, index=False)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1509 output_df.to_csv(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1510 output_filename_tab, quoting=None, sep="\t", index=False
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1511 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1512
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1513 end_time = time.process_time() # timer
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1514 print(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1515 "%0.6f wrote output [4]" % (end_time - start_time,),
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1516 file=sys.stderr,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1517 ) # timer
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1518
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1519 print(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1520 "{:>10} phosphopeptides written to output".format(str(output_rows))
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1521 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1522
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1523 end_time = time.process_time() # timer
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1524 print(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1525 "%0.6f seconds of non-system CPU time were consumed"
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1526 % (end_time - start_time,),
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1527 file=sys.stderr,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1528 ) # timer
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1529
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1530 # Rev. 7/1/2016
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1531 # Rev. 7/3/2016 : fill NaN in Upstream column to replace to N/A's
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1532 # Rev. 7/3/2016: renamed Upstream column to PUTATIVE_UPSTREAM_DOMAINS
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1533 # Rev. 12/2/2021: Converted to Python from ipynb; use fast Aho-Corasick searching; \
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1534 # read from SwissProt SQLite database
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1535 # Rev. 12/9/2021: Transfer code to Galaxy tool wrapper
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1536
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1537 #
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1538 # copied from Excel Output Script.ipynb END #
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1539 #
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1540
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1541 try:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1542 catch(
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1543 mqpep_getswissprot,
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1544 )
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1545 exit(0)
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1546 except Exception as e:
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1547 exit("Internal error running mqpep_getswissprot(): %s" % (e))
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1548
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1549
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1550 if __name__ == "__main__":
8dfd5d2b5903 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff changeset
1551 __main__()