Mercurial > repos > nml > biohansel_bionumeric_converter
diff bionumeric_converter.py @ 0:b000a3130db8 draft
planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
author | nml |
---|---|
date | Mon, 18 Mar 2019 13:15:57 -0400 |
parents | |
children | 07dfb8fd47f4 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bionumeric_converter.py Mon Mar 18 13:15:57 2019 -0400 @@ -0,0 +1,55 @@ +#!/usr/bin/env python + +# Import dependancies needed +import argparse + +import pandas as pd + +# Define the main function: + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + '-f', + '--filename', + required=True, + help='Specify your tsv input') + parser.add_argument( + '-o', + '--output', + default='output.csv', + help='Specify output name') + args = parser.parse_args() + tsv_file = args.filename + out_name = args.output + + no_comma_tsv = comma_remover(tsv_file) + df = qc_shortener(no_comma_tsv) + df.to_csv(out_name, index=False) + +# Remove comma function: + + +def comma_remover(tsv_file): + # Create a table from the tsv file as an input into the dataframe. + df = pd.read_csv(tsv_file, sep='\t') + # Change all commas to / in the QC message + no_comma_tsv = df.replace(',', '/', regex=True) + return no_comma_tsv + +# Shorten QC results: + + +def qc_shortener(df): + for count in df.index: + message = str(df.at[count, 'qc_message']) + if len(message) > 150: + results = message.find('|') + new_message = "Truncated after first '|' : " + message[0:results] + df['qc_message'] = df['qc_message'].replace(message, new_message) + return df + + +if __name__ == '__main__': + main()