Mercurial > repos > nml > biohansel_bionumeric_converter
comparison bionumeric_converter.py @ 0:b000a3130db8 draft
planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
author | nml |
---|---|
date | Mon, 18 Mar 2019 13:15:57 -0400 |
parents | |
children | 07dfb8fd47f4 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:b000a3130db8 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 # Import dependancies needed | |
4 import argparse | |
5 | |
6 import pandas as pd | |
7 | |
8 # Define the main function: | |
9 | |
10 | |
11 def main(): | |
12 parser = argparse.ArgumentParser() | |
13 parser.add_argument( | |
14 '-f', | |
15 '--filename', | |
16 required=True, | |
17 help='Specify your tsv input') | |
18 parser.add_argument( | |
19 '-o', | |
20 '--output', | |
21 default='output.csv', | |
22 help='Specify output name') | |
23 args = parser.parse_args() | |
24 tsv_file = args.filename | |
25 out_name = args.output | |
26 | |
27 no_comma_tsv = comma_remover(tsv_file) | |
28 df = qc_shortener(no_comma_tsv) | |
29 df.to_csv(out_name, index=False) | |
30 | |
31 # Remove comma function: | |
32 | |
33 | |
34 def comma_remover(tsv_file): | |
35 # Create a table from the tsv file as an input into the dataframe. | |
36 df = pd.read_csv(tsv_file, sep='\t') | |
37 # Change all commas to / in the QC message | |
38 no_comma_tsv = df.replace(',', '/', regex=True) | |
39 return no_comma_tsv | |
40 | |
41 # Shorten QC results: | |
42 | |
43 | |
44 def qc_shortener(df): | |
45 for count in df.index: | |
46 message = str(df.at[count, 'qc_message']) | |
47 if len(message) > 150: | |
48 results = message.find('|') | |
49 new_message = "Truncated after first '|' : " + message[0:results] | |
50 df['qc_message'] = df['qc_message'].replace(message, new_message) | |
51 return df | |
52 | |
53 | |
54 if __name__ == '__main__': | |
55 main() |