Mercurial > repos > galaxyp > mqppep_preproc
annotate search_ppep.py @ 0:8dfd5d2b5903 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
author | galaxyp |
---|---|
date | Mon, 11 Jul 2022 19:22:54 +0000 |
parents | |
children | b76c75521d91 |
rev | line source |
---|---|
0
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
1 #!/usr/bin/env python |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
2 # Search and memoize phosphopeptides in Swiss-Prot SQLite table UniProtKB |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
3 |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
4 import argparse |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
5 import os.path |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
6 import re |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
7 import sqlite3 |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
8 import sys # import the sys module for exc_info |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
9 import time |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
10 import traceback # import the traceback module for format_exception |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
11 from codecs import getreader as cx_getreader |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
12 |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
13 # For Aho-Corasick search for fixed set of substrings |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
14 # - add_word |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
15 # - make_automaton |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
16 # - iter |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
17 import ahocorasick |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
18 |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
19 |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
20 # ref: https://stackoverflow.com/a/8915613/15509512 |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
21 # answers: "How to handle exceptions in a list comprehensions" |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
22 # usage: |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
23 # from math import log |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
24 # eggs = [1,3,0,3,2] |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
25 # print([x for x in [catch(log, egg) for egg in eggs] if x is not None]) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
26 # producing: |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
27 # for <built-in function log> |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
28 # with args (0,) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
29 # exception: math domain error |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
30 # [0.0, 1.0986122886681098, 1.0986122886681098, 0.6931471805599453] |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
31 def catch(func, *args, handle=lambda e: e, **kwargs): |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
32 |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
33 try: |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
34 return func(*args, **kwargs) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
35 except Exception as e: |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
36 print("For %s" % str(func)) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
37 print(" with args %s" % str(args)) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
38 print(" caught exception: %s" % str(e)) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
39 (ty, va, tb) = sys.exc_info() |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
40 print(" stack trace: " + str(traceback.format_exception(ty, va, tb))) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
41 # exit(-1) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
42 return None # was handle(e) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
43 |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
44 |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
45 def __main__(): |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
46 |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
47 DROP_TABLES_SQL = """ |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
48 DROP VIEW IF EXISTS ppep_gene_site_view; |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
49 DROP VIEW IF EXISTS uniprot_view; |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
50 DROP VIEW IF EXISTS uniprotkb_pep_ppep_view; |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
51 DROP VIEW IF EXISTS ppep_intensity_view; |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
52 DROP VIEW IF EXISTS ppep_metadata_view; |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
53 |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
54 DROP TABLE IF EXISTS sample; |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
55 DROP TABLE IF EXISTS ppep; |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
56 DROP TABLE IF EXISTS site_type; |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
57 DROP TABLE IF EXISTS deppep_UniProtKB; |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
58 DROP TABLE IF EXISTS deppep; |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
59 DROP TABLE IF EXISTS ppep_gene_site; |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
60 DROP TABLE IF EXISTS ppep_metadata; |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
61 DROP TABLE IF EXISTS ppep_intensity; |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
62 """ |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
63 |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
64 CREATE_TABLES_SQL = """ |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
65 CREATE TABLE deppep |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
66 ( id INTEGER PRIMARY KEY |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
67 , seq TEXT UNIQUE ON CONFLICT IGNORE |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
68 ) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
69 ; |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
70 CREATE TABLE deppep_UniProtKB |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
71 ( deppep_id INTEGER REFERENCES deppep(id) ON DELETE CASCADE |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
72 , UniProtKB_id TEXT REFERENCES UniProtKB(id) ON DELETE CASCADE |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
73 , pos_start INTEGER |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
74 , pos_end INTEGER |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
75 , PRIMARY KEY (deppep_id, UniProtKB_id, pos_start, pos_end) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
76 ON CONFLICT IGNORE |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
77 ) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
78 ; |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
79 CREATE TABLE ppep |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
80 ( id INTEGER PRIMARY KEY |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
81 , deppep_id INTEGER REFERENCES deppep(id) ON DELETE CASCADE |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
82 , seq TEXT UNIQUE ON CONFLICT IGNORE |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
83 , scrubbed TEXT |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
84 ); |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
85 CREATE TABLE site_type |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
86 ( id INTEGER PRIMARY KEY |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
87 , type_name TEXT UNIQUE ON CONFLICT IGNORE |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
88 ); |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
89 CREATE INDEX idx_ppep_scrubbed on ppep(scrubbed) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
90 ; |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
91 CREATE TABLE sample |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
92 ( id INTEGER PRIMARY KEY |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
93 , name TEXT UNIQUE ON CONFLICT IGNORE |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
94 ) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
95 ; |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
96 CREATE VIEW uniprot_view AS |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
97 SELECT DISTINCT |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
98 Uniprot_ID |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
99 , Description |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
100 , Organism_Name |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
101 , Organism_ID |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
102 , Gene_Name |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
103 , PE |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
104 , SV |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
105 , Sequence |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
106 , Description || |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
107 CASE WHEN Organism_Name = 'N/A' |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
108 THEN '' |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
109 ELSE ' OS='|| Organism_Name |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
110 END || |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
111 CASE WHEN Organism_ID = -1 |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
112 THEN '' |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
113 ELSE ' OX='|| Organism_ID |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
114 END || |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
115 CASE WHEN Gene_Name = 'N/A' |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
116 THEN '' |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
117 ELSE ' GN='|| Gene_Name |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
118 END || |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
119 CASE WHEN PE = 'N/A' |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
120 THEN '' |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
121 ELSE ' PE='|| PE |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
122 END || |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
123 CASE WHEN SV = 'N/A' |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
124 THEN '' |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
125 ELSE ' SV='|| SV |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
126 END AS long_description |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
127 , Database |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
128 FROM UniProtKB |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
129 ; |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
130 CREATE VIEW uniprotkb_pep_ppep_view AS |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
131 SELECT deppep_UniProtKB.UniprotKB_ID AS accession |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
132 , deppep_UniProtKB.pos_start AS pos_start |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
133 , deppep_UniProtKB.pos_end AS pos_end |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
134 , deppep.seq AS peptide |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
135 , ppep.seq AS phosphopeptide |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
136 , ppep.scrubbed AS scrubbed |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
137 , uniprot_view.Sequence AS sequence |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
138 , uniprot_view.Description AS description |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
139 , uniprot_view.long_description AS long_description |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
140 , ppep.id AS ppep_id |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
141 FROM ppep, deppep, deppep_UniProtKB, uniprot_view |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
142 WHERE deppep.id = ppep.deppep_id |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
143 AND deppep.id = deppep_UniProtKB.deppep_id |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
144 AND deppep_UniProtKB.UniprotKB_ID = uniprot_view.Uniprot_ID |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
145 ORDER BY UniprotKB_ID, deppep.seq, ppep.seq |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
146 ; |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
147 CREATE TABLE ppep_gene_site |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
148 ( ppep_id INTEGER REFERENCES ppep(id) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
149 , gene_names TEXT |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
150 , site_type_id INTEGER REFERENCES site_type(id) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
151 , kinase_map TEXT |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
152 , PRIMARY KEY (ppep_id, kinase_map) ON CONFLICT IGNORE |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
153 ) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
154 ; |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
155 CREATE VIEW ppep_gene_site_view AS |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
156 SELECT DISTINCT |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
157 ppep.seq AS phospho_peptide |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
158 , ppep_id |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
159 , gene_names |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
160 , type_name |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
161 , kinase_map |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
162 FROM |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
163 ppep, ppep_gene_site, site_type |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
164 WHERE |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
165 ppep_gene_site.ppep_id = ppep.id |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
166 AND |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
167 ppep_gene_site.site_type_id = site_type.id |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
168 ORDER BY |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
169 ppep.seq |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
170 ; |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
171 CREATE TABLE ppep_metadata |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
172 ( ppep_id INTEGER REFERENCES ppep(id) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
173 , protein_description TEXT |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
174 , gene_name TEXT |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
175 , FASTA_name TEXT |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
176 , phospho_sites TEXT |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
177 , motifs_unique TEXT |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
178 , accessions TEXT |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
179 , motifs_all_members TEXT |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
180 , domain TEXT |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
181 , ON_FUNCTION TEXT |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
182 , ON_PROCESS TEXT |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
183 , ON_PROT_INTERACT TEXT |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
184 , ON_OTHER_INTERACT TEXT |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
185 , notes TEXT |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
186 , PRIMARY KEY (ppep_id) ON CONFLICT IGNORE |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
187 ) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
188 ; |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
189 CREATE VIEW ppep_metadata_view AS |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
190 SELECT DISTINCT |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
191 ppep.seq AS phospho_peptide |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
192 , protein_description |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
193 , gene_name |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
194 , FASTA_name |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
195 , phospho_sites |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
196 , motifs_unique |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
197 , accessions |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
198 , motifs_all_members |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
199 , domain |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
200 , ON_FUNCTION |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
201 , ON_PROCESS |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
202 , ON_PROT_INTERACT |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
203 , ON_OTHER_INTERACT |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
204 , notes |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
205 FROM |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
206 ppep, ppep_metadata |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
207 WHERE |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
208 ppep_metadata.ppep_id = ppep.id |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
209 ORDER BY |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
210 ppep.seq |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
211 ; |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
212 CREATE TABLE ppep_intensity |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
213 ( ppep_id INTEGER REFERENCES ppep(id) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
214 , sample_id INTEGER |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
215 , intensity INTEGER |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
216 , PRIMARY KEY (ppep_id, sample_id) ON CONFLICT IGNORE |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
217 ) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
218 ; |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
219 CREATE VIEW ppep_intensity_view AS |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
220 SELECT DISTINCT |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
221 ppep.seq AS phospho_peptide |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
222 , sample.name AS sample |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
223 , intensity |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
224 FROM |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
225 ppep, sample, ppep_intensity |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
226 WHERE |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
227 ppep_intensity.sample_id = sample.id |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
228 AND |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
229 ppep_intensity.ppep_id = ppep.id |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
230 ; |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
231 """ |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
232 |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
233 UNIPROT_SEQ_AND_ID_SQL = """ |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
234 select Sequence, Uniprot_ID |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
235 from UniProtKB |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
236 """ |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
237 |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
238 # Parse Command Line |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
239 parser = argparse.ArgumentParser( |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
240 description="Phopsphoproteomic Enrichment phosphopeptide SwissProt search (in place in SQLite DB)." |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
241 ) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
242 |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
243 # inputs: |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
244 # Phosphopeptide data for experimental results, including the intensities |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
245 # and the mapping to kinase domains, in tabular format. |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
246 parser.add_argument( |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
247 "--phosphopeptides", |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
248 "-p", |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
249 nargs=1, |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
250 required=True, |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
251 dest="phosphopeptides", |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
252 help="Phosphopeptide data for experimental results, generated by the Phopsphoproteomic Enrichment Localization Filter tool", |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
253 ) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
254 parser.add_argument( |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
255 "--uniprotkb", |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
256 "-u", |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
257 nargs=1, |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
258 required=True, |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
259 dest="uniprotkb", |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
260 help="UniProtKB/Swiss-Prot data, converted from FASTA format by the Phopsphoproteomic Enrichment Kinase Mapping tool", |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
261 ) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
262 parser.add_argument( |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
263 "--schema", |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
264 action="store_true", |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
265 dest="db_schema", |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
266 help="show updated database schema", |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
267 ) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
268 parser.add_argument( |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
269 "--warn-duplicates", |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
270 action="store_true", |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
271 dest="warn_duplicates", |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
272 help="show warnings for duplicated sequences", |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
273 ) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
274 parser.add_argument( |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
275 "--verbose", |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
276 action="store_true", |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
277 dest="verbose", |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
278 help="show somewhat verbose program tracing", |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
279 ) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
280 # "Make it so!" (parse the arguments) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
281 options = parser.parse_args() |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
282 if options.verbose: |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
283 print("options: " + str(options) + "\n") |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
284 |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
285 # path to phosphopeptide (e.g., "outputfile_STEP2.txt") input tabular file |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
286 if options.phosphopeptides is None: |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
287 exit('Argument "phosphopeptides" is required but not supplied') |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
288 try: |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
289 f_name = os.path.abspath(options.phosphopeptides[0]) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
290 except Exception as e: |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
291 exit("Error parsing phosphopeptides argument: %s" % (e)) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
292 |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
293 # path to SQLite input/output tabular file |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
294 if options.uniprotkb is None: |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
295 exit('Argument "uniprotkb" is required but not supplied') |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
296 try: |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
297 db_name = os.path.abspath(options.uniprotkb[0]) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
298 except Exception as e: |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
299 exit("Error parsing uniprotkb argument: %s" % (e)) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
300 |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
301 # print("options.schema is %d" % options.db_schema) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
302 |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
303 # db_name = "demo/test.sqlite" |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
304 # f_name = "demo/test_input.txt" |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
305 |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
306 con = sqlite3.connect(db_name) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
307 cur = con.cursor() |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
308 ker = con.cursor() |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
309 |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
310 cur.executescript(DROP_TABLES_SQL) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
311 |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
312 # if options.db_schema: |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
313 # print("\nAfter dropping tables/views that are to be created, schema is:") |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
314 # cur.execute("SELECT * FROM sqlite_schema") |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
315 # for row in cur.fetchall(): |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
316 # if row[4] is not None: |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
317 # print("%s;" % row[4]) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
318 |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
319 cur.executescript(CREATE_TABLES_SQL) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
320 |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
321 if options.db_schema: |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
322 print( |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
323 "\nAfter creating tables/views that are to be created, schema is:" |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
324 ) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
325 cur.execute("SELECT * FROM sqlite_schema") |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
326 for row in cur.fetchall(): |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
327 if row[4] is not None: |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
328 print("%s;" % row[4]) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
329 |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
330 def generate_ppep(f): |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
331 # get keys from upstream tabular file using readline() |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
332 # ref: https://stackoverflow.com/a/16713581/15509512 |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
333 # answer to "Use codecs to read file with correct encoding" |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
334 file1_encoded = open(f, "rb") |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
335 file1 = cx_getreader("latin-1")(file1_encoded) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
336 |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
337 count = 0 |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
338 re_tab = re.compile("^[^\t]*") |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
339 re_quote = re.compile('"') |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
340 while True: |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
341 count += 1 |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
342 # Get next line from file |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
343 line = file1.readline() |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
344 # if line is empty |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
345 # end of file is reached |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
346 if not line: |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
347 break |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
348 if count > 1: |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
349 m = re_tab.match(line) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
350 m = re_quote.sub("", m[0]) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
351 yield m |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
352 file1.close() |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
353 file1_encoded.close() |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
354 |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
355 # Build an Aho-Corasick automaton from a trie |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
356 # - ref: |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
357 # - https://pypi.org/project/pyahocorasick/ |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
358 # - https://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_algorithm |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
359 # - https://en.wikipedia.org/wiki/Trie |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
360 auto = ahocorasick.Automaton() |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
361 re_phos = re.compile("p") |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
362 # scrub out unsearchable characters per section |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
363 # "Match the p_peptides to the @sequences array:" |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
364 # of the original |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
365 # PhosphoPeptide Upstream Kinase Mapping.pl |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
366 # which originally read |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
367 # $tmp_p_peptide =~ s/#//g; |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
368 # $tmp_p_peptide =~ s/\d//g; |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
369 # $tmp_p_peptide =~ s/\_//g; |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
370 # $tmp_p_peptide =~ s/\.//g; |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
371 # |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
372 re_scrub = re.compile("0-9_.#") |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
373 ppep_count = 0 |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
374 for ppep in generate_ppep(f_name): |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
375 ppep_count += 1 |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
376 add_to_trie = False |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
377 # print(ppep) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
378 scrubbed = re_scrub.sub("", ppep) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
379 deppep = re_phos.sub("", scrubbed) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
380 if options.verbose: |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
381 print("deppep: %s; scrubbed: %s" % (deppep, scrubbed)) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
382 # print(deppep) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
383 cur.execute("SELECT id FROM deppep WHERE seq = (?)", (deppep,)) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
384 if cur.fetchone() is None: |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
385 add_to_trie = True |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
386 cur.execute("INSERT INTO deppep(seq) VALUES (?)", (deppep,)) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
387 cur.execute("SELECT id FROM deppep WHERE seq = (?)", (deppep,)) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
388 deppep_id = cur.fetchone()[0] |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
389 if add_to_trie: |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
390 # print((deppep_id, deppep)) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
391 # Build the trie |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
392 auto.add_word(deppep, (deppep_id, deppep)) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
393 cur.execute( |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
394 "INSERT INTO ppep(seq, scrubbed, deppep_id) VALUES (?,?,?)", |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
395 (ppep, scrubbed, deppep_id), |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
396 ) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
397 # def generate_deppep(): |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
398 # cur.execute("SELECT seq FROM deppep") |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
399 # for row in cur.fetchall(): |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
400 # yield row[0] |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
401 cur.execute("SELECT count(*) FROM (SELECT seq FROM deppep GROUP BY seq)") |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
402 for row in cur.fetchall(): |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
403 deppep_count = row[0] |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
404 |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
405 cur.execute( |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
406 "SELECT count(*) FROM (SELECT Sequence FROM UniProtKB GROUP BY Sequence)" |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
407 ) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
408 for row in cur.fetchall(): |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
409 sequence_count = row[0] |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
410 |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
411 print("%d phosphopeptides were read from input" % ppep_count) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
412 print( |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
413 "%d corresponding dephosphopeptides are represented in input" |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
414 % deppep_count |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
415 ) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
416 # Look for cases where both Gene_Name and Sequence are identical |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
417 cur.execute( |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
418 """ |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
419 SELECT Uniprot_ID, Gene_Name, Sequence |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
420 FROM UniProtKB |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
421 WHERE Sequence IN ( |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
422 SELECT Sequence |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
423 FROM UniProtKB |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
424 GROUP BY Sequence, Gene_Name |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
425 HAVING count(*) > 1 |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
426 ) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
427 ORDER BY Sequence |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
428 """ |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
429 ) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
430 duplicate_count = 0 |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
431 old_seq = "" |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
432 for row in cur.fetchall(): |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
433 if duplicate_count == 0: |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
434 print( |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
435 "\nEach of the following sequences is associated with several accession IDs (which are listed in the first column) but the same gene ID (which is listed in the second column)." |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
436 ) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
437 if row[2] != old_seq: |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
438 old_seq = row[2] |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
439 duplicate_count += 1 |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
440 if options.warn_duplicates: |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
441 print("\n%s\t%s\t%s" % row) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
442 else: |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
443 if options.warn_duplicates: |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
444 print("%s\t%s" % (row[0], row[1])) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
445 if duplicate_count > 0: |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
446 print( |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
447 "\n%d sequences have duplicated accession IDs\n" % duplicate_count |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
448 ) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
449 |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
450 print("%s accession sequences will be searched\n" % sequence_count) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
451 |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
452 # print(auto.dump()) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
453 |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
454 # Convert the trie to an automaton (a finite-state machine) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
455 auto.make_automaton() |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
456 |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
457 # Execute query for seqs and metadata without fetching the results yet |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
458 uniprot_seq_and_id = cur.execute(UNIPROT_SEQ_AND_ID_SQL) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
459 while 1: |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
460 batch = uniprot_seq_and_id.fetchmany(size=50) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
461 if not batch: |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
462 break |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
463 for Sequence, UniProtKB_id in batch: |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
464 if Sequence is not None: |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
465 for end_index, (insert_order, original_value) in auto.iter( |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
466 Sequence |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
467 ): |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
468 ker.execute( |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
469 """ |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
470 INSERT INTO deppep_UniProtKB |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
471 (deppep_id,UniProtKB_id,pos_start,pos_end) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
472 VALUES (?,?,?,?) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
473 """, |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
474 ( |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
475 insert_order, |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
476 UniProtKB_id, |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
477 1 + end_index - len(original_value), |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
478 end_index, |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
479 ), |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
480 ) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
481 else: |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
482 raise ValueError( |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
483 "UniProtKB_id %s, but Sequence is None: Check whether SwissProt file is missing sequence for this ID" |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
484 % (UniProtKB_id,) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
485 ) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
486 ker.execute( |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
487 """ |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
488 SELECT count(*) || ' accession-peptide-phosphopeptide combinations were found' |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
489 FROM uniprotkb_pep_ppep_view |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
490 """ |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
491 ) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
492 for row in ker.fetchall(): |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
493 print(row[0]) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
494 |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
495 ker.execute( |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
496 """ |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
497 SELECT count(*) || ' accession matches were found', count(*) AS accession_count |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
498 FROM ( |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
499 SELECT accession |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
500 FROM uniprotkb_pep_ppep_view |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
501 GROUP BY accession |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
502 ) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
503 """ |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
504 ) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
505 for row in ker.fetchall(): |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
506 print(row[0]) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
507 |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
508 ker.execute( |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
509 """ |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
510 SELECT count(*) || ' peptide matches were found' |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
511 FROM ( |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
512 SELECT peptide |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
513 FROM uniprotkb_pep_ppep_view |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
514 GROUP BY peptide |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
515 ) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
516 """ |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
517 ) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
518 for row in ker.fetchall(): |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
519 print(row[0]) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
520 |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
521 ker.execute( |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
522 """ |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
523 SELECT count(*) || ' phosphopeptide matches were found', count(*) AS phosphopeptide_count |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
524 FROM ( |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
525 SELECT phosphopeptide |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
526 FROM uniprotkb_pep_ppep_view |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
527 GROUP BY phosphopeptide |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
528 ) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
529 """ |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
530 ) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
531 for row in ker.fetchall(): |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
532 print(row[0]) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
533 |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
534 # link peptides not found in sequence database to a dummy sequence-record |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
535 ker.execute( |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
536 """ |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
537 INSERT INTO deppep_UniProtKB(deppep_id,UniProtKB_id,pos_start,pos_end) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
538 SELECT id, 'No Uniprot_ID', 0, 0 |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
539 FROM deppep |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
540 WHERE id NOT IN (SELECT deppep_id FROM deppep_UniProtKB) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
541 """ |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
542 ) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
543 |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
544 con.commit() |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
545 ker.execute("vacuum") |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
546 con.close() |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
547 |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
548 |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
549 if __name__ == "__main__": |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
550 wrap_start_time = time.perf_counter() |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
551 __main__() |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
552 wrap_stop_time = time.perf_counter() |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
553 # print(wrap_start_time) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
554 # print(wrap_stop_time) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
555 print( |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
556 "\nThe matching process took %d milliseconds to run.\n" |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
557 % ((wrap_stop_time - wrap_start_time) * 1000), |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
558 ) |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
559 |
8dfd5d2b5903
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
560 # vim: sw=4 ts=4 et ai : |