changeset 0:c39c543605a2 draft default tip

Uploaded
author greg
date Wed, 02 Jun 2021 19:18:10 +0000
parents
children
files unique_mlg_id.py unique_mlg_id.xml
diffstat 2 files changed, 88 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/unique_mlg_id.py	Wed Jun 02 19:18:10 2021 +0000
@@ -0,0 +1,61 @@
+#!/usr/bin/env python
+import argparse
+import sys
+
+import psycopg2
+
+from sqlalchemy import MetaData, create_engine
+from sqlalchemy.engine.url import make_url
+
+
+class UniqueMGLIDGenerator(object):
+    def __init__(self):
+        self.args = None
+        self.conn = None
+        self.parse_args()
+        self.outfh = open(self.args.output, "w")
+        self.connect_db()
+        self.engine = create_engine(self.args.database_connection_string)
+        self.metadata = MetaData(self.engine)
+
+    def parse_args(self):
+        parser = argparse.ArgumentParser()
+        parser.add_argument('--database_connection_string', dest='database_connection_string', help='Postgres database connection string'),
+        parser.add_argument('--output', dest='output', help='Output dataset'),
+        self.args = parser.parse_args()
+
+    def connect_db(self):
+        url = make_url(self.args.database_connection_string)
+        args = url.translate_connect_args(username='user')
+        args.update(url.query)
+        assert url.get_dialect().name == 'postgresql', 'This script can only be used with PostgreSQL.'
+        self.conn = psycopg2.connect(**args)
+
+    def run(self):
+        cmd = """
+              SELECT DISTINCT coral_mlg_rep_sample_id
+              FROM genotype
+              WHERE coral_mlg_rep_sample_id is not NULL
+              ORDER BY coral_mlg_rep_sample_id;
+        """
+        cur = self.conn.cursor()
+        cur.execute(cmd)
+        rows = cur.fetchall()
+        for tup in rows:
+            self.outfh.write("%s\n" % tup[0])
+        self.outfh.close()
+
+    def shutdown(self):
+        self.conn.close()
+
+    def stop_err(self, msg):
+        sys.stderr.write(msg)
+        self.outfh.flush()
+        self.outfh.close()
+        sys.exit(1)
+
+
+if __name__ == '__main__':
+    umlgidg = UniqueMGLIDGenerator()
+    umlgidg.run()
+    umlgidg.shutdown()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/unique_mlg_id.xml	Wed Jun 02 19:18:10 2021 +0000
@@ -0,0 +1,27 @@
+<tool id="unique_mlg_id" name="Unique MLG IDs" version="1.0.0">
+    <description></description>
+    <command detect_errors="exit_code"><![CDATA[
+python $__tool_directory__/unique_mlg_id.py
+--database_connection_string '$__app__.config.corals_database_connection'
+--output '$output'
+]]></command>
+    <inputs>
+    </inputs>
+    <outputs>
+        <data name="output" format="tabular"/>
+    </outputs>
+    <tests>
+        <test>
+            <!--Testing this tool is a bit difficult at the current time.-->
+        </test>
+    </tests>
+    <help>
+**What it does**
+
+Queries the  corals (stag) database to retrieve the unique corals multi-locus genotype representative sample ids.
+    </help>
+    <citations>
+        <citation type="doi">https://doi.org/10.1038/s41598-020-69101-z</citation>
+    </citations>
+</tool>
+