Mercurial > repos > galaxyp > mqppep_preproc
comparison search_ppep.py @ 1:b76c75521d91 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 43e7a43b545c24b2dc33d039198551c032aa79be
author | galaxyp |
---|---|
date | Fri, 28 Oct 2022 18:26:42 +0000 |
parents | 8dfd5d2b5903 |
children |
comparison
equal
deleted
inserted
replaced
0:8dfd5d2b5903 | 1:b76c75521d91 |
---|---|
235 from UniProtKB | 235 from UniProtKB |
236 """ | 236 """ |
237 | 237 |
238 # Parse Command Line | 238 # Parse Command Line |
239 parser = argparse.ArgumentParser( | 239 parser = argparse.ArgumentParser( |
240 description="Phopsphoproteomic Enrichment phosphopeptide SwissProt search (in place in SQLite DB)." | 240 description=" ".join([ |
241 "Phopsphoproteomic Enrichment", | |
242 "phosphopeptide SwissProt search (in place in SQLite DB)." | |
243 ]) | |
241 ) | 244 ) |
242 | 245 |
243 # inputs: | 246 # inputs: |
244 # Phosphopeptide data for experimental results, including the intensities | 247 # Phosphopeptide data for experimental results, including the intensities |
245 # and the mapping to kinase domains, in tabular format. | 248 # and the mapping to kinase domains, in tabular format. |
247 "--phosphopeptides", | 250 "--phosphopeptides", |
248 "-p", | 251 "-p", |
249 nargs=1, | 252 nargs=1, |
250 required=True, | 253 required=True, |
251 dest="phosphopeptides", | 254 dest="phosphopeptides", |
252 help="Phosphopeptide data for experimental results, generated by the Phopsphoproteomic Enrichment Localization Filter tool", | 255 help=" ".join([ |
256 "Phosphopeptide data for experimental results,", | |
257 "generated by the Phopsphoproteomic Enrichment Localization", | |
258 "Filter tool" | |
259 ]), | |
253 ) | 260 ) |
254 parser.add_argument( | 261 parser.add_argument( |
255 "--uniprotkb", | 262 "--uniprotkb", |
256 "-u", | 263 "-u", |
257 nargs=1, | 264 nargs=1, |
258 required=True, | 265 required=True, |
259 dest="uniprotkb", | 266 dest="uniprotkb", |
260 help="UniProtKB/Swiss-Prot data, converted from FASTA format by the Phopsphoproteomic Enrichment Kinase Mapping tool", | 267 help=" ".join([ |
268 "UniProtKB/Swiss-Prot data, converted from FASTA format by the", | |
269 "Phopsphoproteomic Enrichment Kinase Mapping tool" | |
270 ]), | |
261 ) | 271 ) |
262 parser.add_argument( | 272 parser.add_argument( |
263 "--schema", | 273 "--schema", |
264 action="store_true", | 274 action="store_true", |
265 dest="db_schema", | 275 dest="db_schema", |
308 ker = con.cursor() | 318 ker = con.cursor() |
309 | 319 |
310 cur.executescript(DROP_TABLES_SQL) | 320 cur.executescript(DROP_TABLES_SQL) |
311 | 321 |
312 # if options.db_schema: | 322 # if options.db_schema: |
313 # print("\nAfter dropping tables/views that are to be created, schema is:") | 323 # print("\nAfter dropping tables/views that are to be created," |
324 # + schema is:") | |
314 # cur.execute("SELECT * FROM sqlite_schema") | 325 # cur.execute("SELECT * FROM sqlite_schema") |
315 # for row in cur.fetchall(): | 326 # for row in cur.fetchall(): |
316 # if row[4] is not None: | 327 # if row[4] is not None: |
317 # print("%s;" % row[4]) | 328 # print("%s;" % row[4]) |
318 | 329 |
401 cur.execute("SELECT count(*) FROM (SELECT seq FROM deppep GROUP BY seq)") | 412 cur.execute("SELECT count(*) FROM (SELECT seq FROM deppep GROUP BY seq)") |
402 for row in cur.fetchall(): | 413 for row in cur.fetchall(): |
403 deppep_count = row[0] | 414 deppep_count = row[0] |
404 | 415 |
405 cur.execute( | 416 cur.execute( |
406 "SELECT count(*) FROM (SELECT Sequence FROM UniProtKB GROUP BY Sequence)" | 417 """ |
418 SELECT count(*) FROM ( | |
419 SELECT Sequence FROM UniProtKB GROUP BY Sequence | |
420 ) | |
421 """ | |
407 ) | 422 ) |
408 for row in cur.fetchall(): | 423 for row in cur.fetchall(): |
409 sequence_count = row[0] | 424 sequence_count = row[0] |
410 | 425 |
411 print("%d phosphopeptides were read from input" % ppep_count) | 426 print("%d phosphopeptides were read from input" % ppep_count) |
429 ) | 444 ) |
430 duplicate_count = 0 | 445 duplicate_count = 0 |
431 old_seq = "" | 446 old_seq = "" |
432 for row in cur.fetchall(): | 447 for row in cur.fetchall(): |
433 if duplicate_count == 0: | 448 if duplicate_count == 0: |
434 print( | 449 print(" ".join([ |
435 "\nEach of the following sequences is associated with several accession IDs (which are listed in the first column) but the same gene ID (which is listed in the second column)." | 450 "\nEach of the following sequences is associated with several", |
436 ) | 451 "accession IDs (which are listed in the first column) but", |
452 "the same gene ID (which is listed in the second column)." | |
453 ])) | |
437 if row[2] != old_seq: | 454 if row[2] != old_seq: |
438 old_seq = row[2] | 455 old_seq = row[2] |
439 duplicate_count += 1 | 456 duplicate_count += 1 |
440 if options.warn_duplicates: | 457 if options.warn_duplicates: |
441 print("\n%s\t%s\t%s" % row) | 458 print("\n%s\t%s\t%s" % row) |
478 end_index, | 495 end_index, |
479 ), | 496 ), |
480 ) | 497 ) |
481 else: | 498 else: |
482 raise ValueError( | 499 raise ValueError( |
483 "UniProtKB_id %s, but Sequence is None: Check whether SwissProt file is missing sequence for this ID" | 500 "UniProtKB_id %s, but Sequence is None: %s %s" |
484 % (UniProtKB_id,) | 501 % ( |
502 UniProtKB_id, | |
503 "Check whether SwissProt file is missing", | |
504 "the sequence for this ID") | |
485 ) | 505 ) |
486 ker.execute( | 506 ker.execute( |
487 """ | 507 """ |
488 SELECT count(*) || ' accession-peptide-phosphopeptide combinations were found' | 508 SELECT |
489 FROM uniprotkb_pep_ppep_view | 509 count(*) || |
510 ' accession-peptide-phosphopeptide combinations were found' | |
511 FROM | |
512 uniprotkb_pep_ppep_view | |
490 """ | 513 """ |
491 ) | 514 ) |
492 for row in ker.fetchall(): | 515 for row in ker.fetchall(): |
493 print(row[0]) | 516 print(row[0]) |
494 | 517 |
495 ker.execute( | 518 ker.execute( |
496 """ | 519 """ |
497 SELECT count(*) || ' accession matches were found', count(*) AS accession_count | 520 SELECT |
521 count(*) || ' accession matches were found', | |
522 count(*) AS accession_count | |
498 FROM ( | 523 FROM ( |
499 SELECT accession | 524 SELECT accession |
500 FROM uniprotkb_pep_ppep_view | 525 FROM uniprotkb_pep_ppep_view |
501 GROUP BY accession | 526 GROUP BY accession |
502 ) | 527 ) |
518 for row in ker.fetchall(): | 543 for row in ker.fetchall(): |
519 print(row[0]) | 544 print(row[0]) |
520 | 545 |
521 ker.execute( | 546 ker.execute( |
522 """ | 547 """ |
523 SELECT count(*) || ' phosphopeptide matches were found', count(*) AS phosphopeptide_count | 548 SELECT |
549 count(*) || ' phosphopeptide matches were found', | |
550 count(*) AS phosphopeptide_count | |
524 FROM ( | 551 FROM ( |
525 SELECT phosphopeptide | 552 SELECT phosphopeptide |
526 FROM uniprotkb_pep_ppep_view | 553 FROM uniprotkb_pep_ppep_view |
527 GROUP BY phosphopeptide | 554 GROUP BY phosphopeptide |
528 ) | 555 ) |