Mercurial > repos > nml > biohansel_bionumeric_converter
view bionumeric_converter.py @ 0:b000a3130db8 draft
planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
author | nml |
---|---|
date | Mon, 18 Mar 2019 13:15:57 -0400 |
parents | |
children | 07dfb8fd47f4 |
line wrap: on
line source
#!/usr/bin/env python # Import dependancies needed import argparse import pandas as pd # Define the main function: def main(): parser = argparse.ArgumentParser() parser.add_argument( '-f', '--filename', required=True, help='Specify your tsv input') parser.add_argument( '-o', '--output', default='output.csv', help='Specify output name') args = parser.parse_args() tsv_file = args.filename out_name = args.output no_comma_tsv = comma_remover(tsv_file) df = qc_shortener(no_comma_tsv) df.to_csv(out_name, index=False) # Remove comma function: def comma_remover(tsv_file): # Create a table from the tsv file as an input into the dataframe. df = pd.read_csv(tsv_file, sep='\t') # Change all commas to / in the QC message no_comma_tsv = df.replace(',', '/', regex=True) return no_comma_tsv # Shorten QC results: def qc_shortener(df): for count in df.index: message = str(df.at[count, 'qc_message']) if len(message) > 150: results = message.find('|') new_message = "Truncated after first '|' : " + message[0:results] df['qc_message'] = df['qc_message'].replace(message, new_message) return df if __name__ == '__main__': main()