view bionumeric_converter.py @ 1:07dfb8fd47f4 draft default tip

planemo upload commit e0d65bf0850ce95ffb89982e61f2136fcf0359ee
author nml
date Mon, 13 May 2019 12:59:15 -0400
parents b000a3130db8
children
line wrap: on
line source

#!/usr/bin/env python

# Import dependancies needed
import argparse

import pandas as pd

# Define the main function:


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-f',
        '--filename',
        required=True,
        help='Specify your biohansel tsv or other tabular separated input')
    parser.add_argument(
        '-o',
        '--output',
        default='output.csv',
        help='Specify output name')
    args = parser.parse_args()
    tsv_file = args.filename
    out_name = args.output

    df_input = pd.read_csv(tsv_file, sep='\t')

    df_no_comma = df_input.replace(',', '/', regex=True)
    df = qc_shortener(df_no_comma)
    df.to_csv(out_name, index=False)

# Shorten QC results:


def splittingstrings(string, length):
    return (string[0+i:length+i] for i in range(0, len(string), length))


def qc_shortener(df):
    for i, row in df.iterrows():
        message = str(row['qc_message'])
        if len(message) > 150:
            message_list = list(splittingstrings(message, 150))
            df.at[i, 'qc_message'] = message_list[0]
            for val in range(1, len(message_list)):
                df.at[i, 'qc_message_{}'.format(val)] = message_list[val]
    return df


if __name__ == '__main__':
    main()