Mercurial > repos > nml > biohansel_bionumeric_converter

diff bionumeric_converter.py @ 1:07dfb8fd47f4 draft default tip
planemo upload commit e0d65bf0850ce95ffb89982e61f2136fcf0359ee
author: nml
date: Mon, 13 May 2019 12:59:15 -0400
parents: b000a3130db8
--- a/bionumeric_converter.py	Mon Mar 18 13:15:57 2019 -0400
+++ b/bionumeric_converter.py	Mon May 13 12:59:15 2019 -0400
@@ -14,7 +14,7 @@
         '-f',
         '--filename',
         required=True,
-        help='Specify your tsv input')
+        help='Specify your biohansel tsv or other tabular separated input')
     parser.add_argument(
         '-o',
         '--output',
@@ -24,30 +24,27 @@
     tsv_file = args.filename
     out_name = args.output
 
-    no_comma_tsv = comma_remover(tsv_file)
-    df = qc_shortener(no_comma_tsv)
-    df.to_csv(out_name, index=False)
-
-# Remove comma function:
-
+    df_input = pd.read_csv(tsv_file, sep='\t')
 
-def comma_remover(tsv_file):
-    # Create a table from the tsv file as an input into the dataframe.
-    df = pd.read_csv(tsv_file, sep='\t')
-    # Change all commas to / in the QC message
-    no_comma_tsv = df.replace(',', '/', regex=True)
-    return no_comma_tsv
+    df_no_comma = df_input.replace(',', '/', regex=True)
+    df = qc_shortener(df_no_comma)
+    df.to_csv(out_name, index=False)
 
 # Shorten QC results:
 
 
+def splittingstrings(string, length):
+    return (string[0+i:length+i] for i in range(0, len(string), length))
+
+
 def qc_shortener(df):
-    for count in df.index:
-        message = str(df.at[count, 'qc_message'])
+    for i, row in df.iterrows():
+        message = str(row['qc_message'])
         if len(message) > 150:
-            results = message.find('|')
-            new_message = "Truncated after first '|' : " + message[0:results]
-            df['qc_message'] = df['qc_message'].replace(message, new_message)
+            message_list = list(splittingstrings(message, 150))
+            df.at[i, 'qc_message'] = message_list[0]
+            for val in range(1, len(message_list)):
+                df.at[i, 'qc_message_{}'.format(val)] = message_list[val]
     return df
author	nml
date	Mon, 13 May 2019 12:59:15 -0400
parents	b000a3130db8
children