annotate bionumeric_converter.py @ 1:07dfb8fd47f4 draft default tip

planemo upload commit e0d65bf0850ce95ffb89982e61f2136fcf0359ee
author nml
date Mon, 13 May 2019 12:59:15 -0400
parents b000a3130db8
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
1 #!/usr/bin/env python
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
2
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
3 # Import dependancies needed
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
4 import argparse
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
5
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
6 import pandas as pd
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
7
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
8 # Define the main function:
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
9
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
10
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
11 def main():
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
12 parser = argparse.ArgumentParser()
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
13 parser.add_argument(
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
14 '-f',
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
15 '--filename',
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
16 required=True,
1
07dfb8fd47f4 planemo upload commit e0d65bf0850ce95ffb89982e61f2136fcf0359ee
nml
parents: 0
diff changeset
17 help='Specify your biohansel tsv or other tabular separated input')
0
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
18 parser.add_argument(
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
19 '-o',
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
20 '--output',
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
21 default='output.csv',
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
22 help='Specify output name')
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
23 args = parser.parse_args()
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
24 tsv_file = args.filename
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
25 out_name = args.output
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
26
1
07dfb8fd47f4 planemo upload commit e0d65bf0850ce95ffb89982e61f2136fcf0359ee
nml
parents: 0
diff changeset
27 df_input = pd.read_csv(tsv_file, sep='\t')
0
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
28
1
07dfb8fd47f4 planemo upload commit e0d65bf0850ce95ffb89982e61f2136fcf0359ee
nml
parents: 0
diff changeset
29 df_no_comma = df_input.replace(',', '/', regex=True)
07dfb8fd47f4 planemo upload commit e0d65bf0850ce95ffb89982e61f2136fcf0359ee
nml
parents: 0
diff changeset
30 df = qc_shortener(df_no_comma)
07dfb8fd47f4 planemo upload commit e0d65bf0850ce95ffb89982e61f2136fcf0359ee
nml
parents: 0
diff changeset
31 df.to_csv(out_name, index=False)
0
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
32
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
33 # Shorten QC results:
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
34
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
35
1
07dfb8fd47f4 planemo upload commit e0d65bf0850ce95ffb89982e61f2136fcf0359ee
nml
parents: 0
diff changeset
36 def splittingstrings(string, length):
07dfb8fd47f4 planemo upload commit e0d65bf0850ce95ffb89982e61f2136fcf0359ee
nml
parents: 0
diff changeset
37 return (string[0+i:length+i] for i in range(0, len(string), length))
07dfb8fd47f4 planemo upload commit e0d65bf0850ce95ffb89982e61f2136fcf0359ee
nml
parents: 0
diff changeset
38
07dfb8fd47f4 planemo upload commit e0d65bf0850ce95ffb89982e61f2136fcf0359ee
nml
parents: 0
diff changeset
39
0
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
40 def qc_shortener(df):
1
07dfb8fd47f4 planemo upload commit e0d65bf0850ce95ffb89982e61f2136fcf0359ee
nml
parents: 0
diff changeset
41 for i, row in df.iterrows():
07dfb8fd47f4 planemo upload commit e0d65bf0850ce95ffb89982e61f2136fcf0359ee
nml
parents: 0
diff changeset
42 message = str(row['qc_message'])
0
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
43 if len(message) > 150:
1
07dfb8fd47f4 planemo upload commit e0d65bf0850ce95ffb89982e61f2136fcf0359ee
nml
parents: 0
diff changeset
44 message_list = list(splittingstrings(message, 150))
07dfb8fd47f4 planemo upload commit e0d65bf0850ce95ffb89982e61f2136fcf0359ee
nml
parents: 0
diff changeset
45 df.at[i, 'qc_message'] = message_list[0]
07dfb8fd47f4 planemo upload commit e0d65bf0850ce95ffb89982e61f2136fcf0359ee
nml
parents: 0
diff changeset
46 for val in range(1, len(message_list)):
07dfb8fd47f4 planemo upload commit e0d65bf0850ce95ffb89982e61f2136fcf0359ee
nml
parents: 0
diff changeset
47 df.at[i, 'qc_message_{}'.format(val)] = message_list[val]
0
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
48 return df
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
49
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
50
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
51 if __name__ == '__main__':
b000a3130db8 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
nml
parents:
diff changeset
52 main()