Mercurial > repos > nml > biohansel_bionumeric_converter
diff bionumeric_converter.py @ 1:07dfb8fd47f4 draft default tip
planemo upload commit e0d65bf0850ce95ffb89982e61f2136fcf0359ee
author | nml |
---|---|
date | Mon, 13 May 2019 12:59:15 -0400 |
parents | b000a3130db8 |
children |
line wrap: on
line diff
--- a/bionumeric_converter.py Mon Mar 18 13:15:57 2019 -0400 +++ b/bionumeric_converter.py Mon May 13 12:59:15 2019 -0400 @@ -14,7 +14,7 @@ '-f', '--filename', required=True, - help='Specify your tsv input') + help='Specify your biohansel tsv or other tabular separated input') parser.add_argument( '-o', '--output', @@ -24,30 +24,27 @@ tsv_file = args.filename out_name = args.output - no_comma_tsv = comma_remover(tsv_file) - df = qc_shortener(no_comma_tsv) - df.to_csv(out_name, index=False) - -# Remove comma function: - + df_input = pd.read_csv(tsv_file, sep='\t') -def comma_remover(tsv_file): - # Create a table from the tsv file as an input into the dataframe. - df = pd.read_csv(tsv_file, sep='\t') - # Change all commas to / in the QC message - no_comma_tsv = df.replace(',', '/', regex=True) - return no_comma_tsv + df_no_comma = df_input.replace(',', '/', regex=True) + df = qc_shortener(df_no_comma) + df.to_csv(out_name, index=False) # Shorten QC results: +def splittingstrings(string, length): + return (string[0+i:length+i] for i in range(0, len(string), length)) + + def qc_shortener(df): - for count in df.index: - message = str(df.at[count, 'qc_message']) + for i, row in df.iterrows(): + message = str(row['qc_message']) if len(message) > 150: - results = message.find('|') - new_message = "Truncated after first '|' : " + message[0:results] - df['qc_message'] = df['qc_message'].replace(message, new_message) + message_list = list(splittingstrings(message, 150)) + df.at[i, 'qc_message'] = message_list[0] + for val in range(1, len(message_list)): + df.at[i, 'qc_message_{}'.format(val)] = message_list[val] return df