comparison search_ppep.py @ 1:b76c75521d91 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 43e7a43b545c24b2dc33d039198551c032aa79be
author galaxyp
date Fri, 28 Oct 2022 18:26:42 +0000
parents 8dfd5d2b5903
children
comparison
equal deleted inserted replaced
0:8dfd5d2b5903 1:b76c75521d91
235 from UniProtKB 235 from UniProtKB
236 """ 236 """
237 237
238 # Parse Command Line 238 # Parse Command Line
239 parser = argparse.ArgumentParser( 239 parser = argparse.ArgumentParser(
240 description="Phopsphoproteomic Enrichment phosphopeptide SwissProt search (in place in SQLite DB)." 240 description=" ".join([
241 "Phopsphoproteomic Enrichment",
242 "phosphopeptide SwissProt search (in place in SQLite DB)."
243 ])
241 ) 244 )
242 245
243 # inputs: 246 # inputs:
244 # Phosphopeptide data for experimental results, including the intensities 247 # Phosphopeptide data for experimental results, including the intensities
245 # and the mapping to kinase domains, in tabular format. 248 # and the mapping to kinase domains, in tabular format.
247 "--phosphopeptides", 250 "--phosphopeptides",
248 "-p", 251 "-p",
249 nargs=1, 252 nargs=1,
250 required=True, 253 required=True,
251 dest="phosphopeptides", 254 dest="phosphopeptides",
252 help="Phosphopeptide data for experimental results, generated by the Phopsphoproteomic Enrichment Localization Filter tool", 255 help=" ".join([
256 "Phosphopeptide data for experimental results,",
257 "generated by the Phopsphoproteomic Enrichment Localization",
258 "Filter tool"
259 ]),
253 ) 260 )
254 parser.add_argument( 261 parser.add_argument(
255 "--uniprotkb", 262 "--uniprotkb",
256 "-u", 263 "-u",
257 nargs=1, 264 nargs=1,
258 required=True, 265 required=True,
259 dest="uniprotkb", 266 dest="uniprotkb",
260 help="UniProtKB/Swiss-Prot data, converted from FASTA format by the Phopsphoproteomic Enrichment Kinase Mapping tool", 267 help=" ".join([
268 "UniProtKB/Swiss-Prot data, converted from FASTA format by the",
269 "Phopsphoproteomic Enrichment Kinase Mapping tool"
270 ]),
261 ) 271 )
262 parser.add_argument( 272 parser.add_argument(
263 "--schema", 273 "--schema",
264 action="store_true", 274 action="store_true",
265 dest="db_schema", 275 dest="db_schema",
308 ker = con.cursor() 318 ker = con.cursor()
309 319
310 cur.executescript(DROP_TABLES_SQL) 320 cur.executescript(DROP_TABLES_SQL)
311 321
312 # if options.db_schema: 322 # if options.db_schema:
313 # print("\nAfter dropping tables/views that are to be created, schema is:") 323 # print("\nAfter dropping tables/views that are to be created,"
324 # + schema is:")
314 # cur.execute("SELECT * FROM sqlite_schema") 325 # cur.execute("SELECT * FROM sqlite_schema")
315 # for row in cur.fetchall(): 326 # for row in cur.fetchall():
316 # if row[4] is not None: 327 # if row[4] is not None:
317 # print("%s;" % row[4]) 328 # print("%s;" % row[4])
318 329
401 cur.execute("SELECT count(*) FROM (SELECT seq FROM deppep GROUP BY seq)") 412 cur.execute("SELECT count(*) FROM (SELECT seq FROM deppep GROUP BY seq)")
402 for row in cur.fetchall(): 413 for row in cur.fetchall():
403 deppep_count = row[0] 414 deppep_count = row[0]
404 415
405 cur.execute( 416 cur.execute(
406 "SELECT count(*) FROM (SELECT Sequence FROM UniProtKB GROUP BY Sequence)" 417 """
418 SELECT count(*) FROM (
419 SELECT Sequence FROM UniProtKB GROUP BY Sequence
420 )
421 """
407 ) 422 )
408 for row in cur.fetchall(): 423 for row in cur.fetchall():
409 sequence_count = row[0] 424 sequence_count = row[0]
410 425
411 print("%d phosphopeptides were read from input" % ppep_count) 426 print("%d phosphopeptides were read from input" % ppep_count)
429 ) 444 )
430 duplicate_count = 0 445 duplicate_count = 0
431 old_seq = "" 446 old_seq = ""
432 for row in cur.fetchall(): 447 for row in cur.fetchall():
433 if duplicate_count == 0: 448 if duplicate_count == 0:
434 print( 449 print(" ".join([
435 "\nEach of the following sequences is associated with several accession IDs (which are listed in the first column) but the same gene ID (which is listed in the second column)." 450 "\nEach of the following sequences is associated with several",
436 ) 451 "accession IDs (which are listed in the first column) but",
452 "the same gene ID (which is listed in the second column)."
453 ]))
437 if row[2] != old_seq: 454 if row[2] != old_seq:
438 old_seq = row[2] 455 old_seq = row[2]
439 duplicate_count += 1 456 duplicate_count += 1
440 if options.warn_duplicates: 457 if options.warn_duplicates:
441 print("\n%s\t%s\t%s" % row) 458 print("\n%s\t%s\t%s" % row)
478 end_index, 495 end_index,
479 ), 496 ),
480 ) 497 )
481 else: 498 else:
482 raise ValueError( 499 raise ValueError(
483 "UniProtKB_id %s, but Sequence is None: Check whether SwissProt file is missing sequence for this ID" 500 "UniProtKB_id %s, but Sequence is None: %s %s"
484 % (UniProtKB_id,) 501 % (
502 UniProtKB_id,
503 "Check whether SwissProt file is missing",
504 "the sequence for this ID")
485 ) 505 )
486 ker.execute( 506 ker.execute(
487 """ 507 """
488 SELECT count(*) || ' accession-peptide-phosphopeptide combinations were found' 508 SELECT
489 FROM uniprotkb_pep_ppep_view 509 count(*) ||
510 ' accession-peptide-phosphopeptide combinations were found'
511 FROM
512 uniprotkb_pep_ppep_view
490 """ 513 """
491 ) 514 )
492 for row in ker.fetchall(): 515 for row in ker.fetchall():
493 print(row[0]) 516 print(row[0])
494 517
495 ker.execute( 518 ker.execute(
496 """ 519 """
497 SELECT count(*) || ' accession matches were found', count(*) AS accession_count 520 SELECT
521 count(*) || ' accession matches were found',
522 count(*) AS accession_count
498 FROM ( 523 FROM (
499 SELECT accession 524 SELECT accession
500 FROM uniprotkb_pep_ppep_view 525 FROM uniprotkb_pep_ppep_view
501 GROUP BY accession 526 GROUP BY accession
502 ) 527 )
518 for row in ker.fetchall(): 543 for row in ker.fetchall():
519 print(row[0]) 544 print(row[0])
520 545
521 ker.execute( 546 ker.execute(
522 """ 547 """
523 SELECT count(*) || ' phosphopeptide matches were found', count(*) AS phosphopeptide_count 548 SELECT
549 count(*) || ' phosphopeptide matches were found',
550 count(*) AS phosphopeptide_count
524 FROM ( 551 FROM (
525 SELECT phosphopeptide 552 SELECT phosphopeptide
526 FROM uniprotkb_pep_ppep_view 553 FROM uniprotkb_pep_ppep_view
527 GROUP BY phosphopeptide 554 GROUP BY phosphopeptide
528 ) 555 )