Mercurial > repos > nml > biohansel_bionumeric_converter
comparison bionumeric_converter.py @ 1:07dfb8fd47f4 draft default tip
planemo upload commit e0d65bf0850ce95ffb89982e61f2136fcf0359ee
author | nml |
---|---|
date | Mon, 13 May 2019 12:59:15 -0400 |
parents | b000a3130db8 |
children |
comparison
equal
deleted
inserted
replaced
0:b000a3130db8 | 1:07dfb8fd47f4 |
---|---|
12 parser = argparse.ArgumentParser() | 12 parser = argparse.ArgumentParser() |
13 parser.add_argument( | 13 parser.add_argument( |
14 '-f', | 14 '-f', |
15 '--filename', | 15 '--filename', |
16 required=True, | 16 required=True, |
17 help='Specify your tsv input') | 17 help='Specify your biohansel tsv or other tabular separated input') |
18 parser.add_argument( | 18 parser.add_argument( |
19 '-o', | 19 '-o', |
20 '--output', | 20 '--output', |
21 default='output.csv', | 21 default='output.csv', |
22 help='Specify output name') | 22 help='Specify output name') |
23 args = parser.parse_args() | 23 args = parser.parse_args() |
24 tsv_file = args.filename | 24 tsv_file = args.filename |
25 out_name = args.output | 25 out_name = args.output |
26 | 26 |
27 no_comma_tsv = comma_remover(tsv_file) | 27 df_input = pd.read_csv(tsv_file, sep='\t') |
28 df = qc_shortener(no_comma_tsv) | 28 |
29 df_no_comma = df_input.replace(',', '/', regex=True) | |
30 df = qc_shortener(df_no_comma) | |
29 df.to_csv(out_name, index=False) | 31 df.to_csv(out_name, index=False) |
30 | |
31 # Remove comma function: | |
32 | |
33 | |
34 def comma_remover(tsv_file): | |
35 # Create a table from the tsv file as an input into the dataframe. | |
36 df = pd.read_csv(tsv_file, sep='\t') | |
37 # Change all commas to / in the QC message | |
38 no_comma_tsv = df.replace(',', '/', regex=True) | |
39 return no_comma_tsv | |
40 | 32 |
41 # Shorten QC results: | 33 # Shorten QC results: |
42 | 34 |
43 | 35 |
36 def splittingstrings(string, length): | |
37 return (string[0+i:length+i] for i in range(0, len(string), length)) | |
38 | |
39 | |
44 def qc_shortener(df): | 40 def qc_shortener(df): |
45 for count in df.index: | 41 for i, row in df.iterrows(): |
46 message = str(df.at[count, 'qc_message']) | 42 message = str(row['qc_message']) |
47 if len(message) > 150: | 43 if len(message) > 150: |
48 results = message.find('|') | 44 message_list = list(splittingstrings(message, 150)) |
49 new_message = "Truncated after first '|' : " + message[0:results] | 45 df.at[i, 'qc_message'] = message_list[0] |
50 df['qc_message'] = df['qc_message'].replace(message, new_message) | 46 for val in range(1, len(message_list)): |
47 df.at[i, 'qc_message_{}'.format(val)] = message_list[val] | |
51 return df | 48 return df |
52 | 49 |
53 | 50 |
54 if __name__ == '__main__': | 51 if __name__ == '__main__': |
55 main() | 52 main() |