annotate bionumeric_converter.py @ 0:b000a3130db8 draft

planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
author nml
date Mon, 18 Mar 2019 13:15:57 -0400
parents
children 07dfb8fd47f4
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
1 #!/usr/bin/env python
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
2
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
3 # Import dependancies needed
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
4 import argparse
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
5
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
6 import pandas as pd
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
7
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
8 # Define the main function:
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
9
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
10
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
11 def main():
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
12 parser = argparse.ArgumentParser()
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
13 parser.add_argument(
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
14 '-f',
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
15 '--filename',
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
16 required=True,
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
17 help='Specify your tsv input')
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
18 parser.add_argument(
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
19 '-o',
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
20 '--output',
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
21 default='output.csv',
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
22 help='Specify output name')
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
23 args = parser.parse_args()
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
24 tsv_file = args.filename
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
25 out_name = args.output
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
26
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
27 no_comma_tsv = comma_remover(tsv_file)
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
28 df = qc_shortener(no_comma_tsv)
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
29 df.to_csv(out_name, index=False)
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
30
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
31 # Remove comma function:
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
32
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
33
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
34 def comma_remover(tsv_file):
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
35 # Create a table from the tsv file as an input into the dataframe.
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
36 df = pd.read_csv(tsv_file, sep='\t')
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
37 # Change all commas to / in the QC message
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
38 no_comma_tsv = df.replace(',', '/', regex=True)
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
39 return no_comma_tsv
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
40
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
41 # Shorten QC results:
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
42
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
43
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
44 def qc_shortener(df):
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
45 for count in df.index:
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
46 message = str(df.at[count, 'qc_message'])
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
47 if len(message) > 150:
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
48 results = message.find('|')
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
49 new_message = "Truncated after first '|' : " + message[0:results]
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
50 df['qc_message'] = df['qc_message'].replace(message, new_message)
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
51 return df
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
52
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
53
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
54 if __name__ == '__main__':
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
55 main()