Mercurial > repos > nml > biohansel_bionumeric_converter

diff bionumeric_converter.py @ 0:b000a3130db8 draft
planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
author: nml
date: Mon, 18 Mar 2019 13:15:57 -0400
children: 07dfb8fd47f4
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bionumeric_converter.py	Mon Mar 18 13:15:57 2019 -0400
@@ -0,0 +1,55 @@
+#!/usr/bin/env python
+
+# Import dependancies needed
+import argparse
+
+import pandas as pd
+
+# Define the main function:
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '-f',
+        '--filename',
+        required=True,
+        help='Specify your tsv input')
+    parser.add_argument(
+        '-o',
+        '--output',
+        default='output.csv',
+        help='Specify output name')
+    args = parser.parse_args()
+    tsv_file = args.filename
+    out_name = args.output
+
+    no_comma_tsv = comma_remover(tsv_file)
+    df = qc_shortener(no_comma_tsv)
+    df.to_csv(out_name, index=False)
+
+# Remove comma function:
+
+
+def comma_remover(tsv_file):
+    # Create a table from the tsv file as an input into the dataframe.
+    df = pd.read_csv(tsv_file, sep='\t')
+    # Change all commas to / in the QC message
+    no_comma_tsv = df.replace(',', '/', regex=True)
+    return no_comma_tsv
+
+# Shorten QC results:
+
+
+def qc_shortener(df):
+    for count in df.index:
+        message = str(df.at[count, 'qc_message'])
+        if len(message) > 150:
+            results = message.find('|')
+            new_message = "Truncated after first '|' : " + message[0:results]
+            df['qc_message'] = df['qc_message'].replace(message, new_message)
+    return df
+
+
+if __name__ == '__main__':
+    main()