Mercurial > repos > greg > ensure_synced
comparison ensure_synced.py @ 8:aaa6ae7c64de draft
Uploaded
author | greg |
---|---|
date | Tue, 25 Jan 2022 14:24:54 +0000 |
parents | 59edc91d0bea |
children | 05920a4bd5b6 |
comparison
equal
deleted
inserted
replaced
7:59edc91d0bea | 8:aaa6ae7c64de |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 | |
3 | 2 |
4 import argparse | 3 import argparse |
5 import sys | 4 import sys |
6 | 5 |
7 import psycopg2 | 6 import psycopg2 |
21 self.parse_args() | 20 self.parse_args() |
22 self.outfh = open(self.args.output, "w") | 21 self.outfh = open(self.args.output, "w") |
23 self.connect_db() | 22 self.connect_db() |
24 self.engine = create_engine(self.args.database_connection_string) | 23 self.engine = create_engine(self.args.database_connection_string) |
25 self.metadata = MetaData(self.engine) | 24 self.metadata = MetaData(self.engine) |
26 self.affy_ids_from_db = [] | 25 self.coral_mlg_rep_sample_ids_from_db = [] |
27 self.affy_ids_from_file = [] | 26 self.affy_ids_from_file = [] |
28 | 27 |
29 def connect_db(self): | 28 def connect_db(self): |
30 url = make_url(self.args.database_connection_string) | 29 url = make_url(self.args.database_connection_string) |
31 args = url.translate_connect_args(username='user') | 30 args = url.translate_connect_args(username='user') |
32 args.update(url.query) | 31 args.update(url.query) |
33 assert url.get_dialect().name == 'postgresql', 'This script can only be used with PostgreSQL.' | 32 assert url.get_dialect().name == 'postgresql', 'This script can only be used with PostgreSQL.' |
34 self.conn = psycopg2.connect(**args) | 33 self.conn = psycopg2.connect(**args) |
35 | 34 |
36 def get_affy_ids_from_db(self): | 35 def get_coral_mlg_rep_sample_ids_from_db(self): |
37 cmd = "SELECT coral_mlg_rep_sample_id, coral_mlg_clonal_id FROM genotype WHERE coral_mlg_rep_sample_id IS NOT NULL AND coral_mlg_rep_sample_id != '' AND coral_mlg_clonal_id != 'failed' ORDER BY coral_mlg_rep_sample_id;" | 36 cmd = "SELECT coral_mlg_rep_sample_id, coral_mlg_clonal_id FROM genotype WHERE coral_mlg_rep_sample_id IS NOT NULL AND coral_mlg_rep_sample_id != '' AND coral_mlg_clonal_id != 'failed' ORDER BY coral_mlg_rep_sample_id;" |
38 cur = self.conn.cursor() | 37 cur = self.conn.cursor() |
39 cur.execute(cmd) | 38 cur.execute(cmd) |
40 rows = cur.fetchall() | 39 rows = cur.fetchall() |
41 for row in rows: | 40 for row in rows: |
42 self.affy_ids_from_db.append(row[0]) | 41 self.coral_mlg_rep_sample_ids_from_db.append(row[0]) |
43 self.affy_ids_from_db.sort() | 42 self.coral_mlg_rep_sample_ids_from_db.sort() |
44 | 43 |
45 def get_affy_ids_from_file(self, f): | 44 def get_affy_ids_from_file(self, f): |
46 with open(f) as fh: | 45 with open(f) as fh: |
47 for line in fh: | 46 for line in fh: |
48 line = line.strip() | 47 line = line.strip() |
66 parser.add_argument('--affy_ids_from_file', dest='affy_ids_from_file', help='Affy ids taken from all previously genotyped samples vcf file') | 65 parser.add_argument('--affy_ids_from_file', dest='affy_ids_from_file', help='Affy ids taken from all previously genotyped samples vcf file') |
67 parser.add_argument('--output', dest='output', help='Output dataset'), | 66 parser.add_argument('--output', dest='output', help='Output dataset'), |
68 self.args = parser.parse_args() | 67 self.args = parser.parse_args() |
69 | 68 |
70 def run(self): | 69 def run(self): |
71 self.get_affy_ids_from_db() | 70 self.get_coral_mlg_rep_sample_ids_from_db() |
72 self.get_affy_ids_from_file(self.args.affy_ids_from_file) | 71 self.get_affy_ids_from_file(self.args.affy_ids_from_file) |
73 if self.affy_ids_from_db == self.affy_ids_from_file: | 72 if self.coral_mlg_rep_sample_ids_from_db == self.affy_ids_from_file: |
74 in_sync = True | 73 in_sync = True |
75 self.log("The selected file is in sync with the database.\n\n") | 74 self.log("The selected file is in sync with the database.\n\n") |
76 else: | 75 else: |
77 in_sync = False | 76 in_sync = False |
78 self.log("The selected file is not in sync with the database.\n\n") | 77 self.log("The selected file is not in sync with the database.\n\n") |
79 num_affy_ids_from_db = len(self.affy_ids_from_db) | 78 num_coral_mlg_rep_sample_ids_from_db = len(self.coral_mlg_rep_sample_ids_from_db) |
80 self.log("Number of Affymetrix ids in the database: %d\n" % num_affy_ids_from_db) | 79 self.log("Number of coral mlg rep sample ids in the database: %d\n" % num_coral_mlg_rep_sample_ids_from_db) |
81 num_affy_ids_from_file = len(self.affy_ids_from_file) | 80 num_affy_ids_from_file = len(self.affy_ids_from_file) |
82 self.log("Number of Affymetrix ids in the file: %d\n" % num_affy_ids_from_file) | 81 self.log("Number of Affymetrix ids in the file: %d\n" % num_affy_ids_from_file) |
83 if not in_sync: | 82 if not in_sync: |
84 if num_affy_ids_from_db > num_affy_ids_from_file: | 83 if num_coral_mlg_rep_sample_ids_from_db > num_affy_ids_from_file: |
85 self.log("The database contains the following Affymetrix ids that are not in the file.\n") | 84 self.log("The database contains the following Affymetrix ids that are not in the file.\n") |
86 else: | 85 else: |
87 self.log("The file contains the following Affymetrix ids that are not in the database.\n") | 86 self.log("The file contains the following Affymetrix ids that are not in the database.\n") |
88 diff_list = self.get_difference(self.affy_ids_from_db, self.affy_ids_from_file) | 87 diff_list = self.get_difference(self.coral_mlg_rep_sample_ids_from_db, self.affy_ids_from_file) |
89 for affy_id in diff_list: | 88 for affy_id in diff_list: |
90 self.log("%s\n" % affy_id) | 89 self.log("%s\n" % affy_id) |
91 self.outfh.flush() | 90 self.outfh.flush() |
92 self.outfh.close() | 91 self.outfh.close() |
93 sys.exit(1) | 92 sys.exit(1) |