annotate process_xlsx.py @ 4:26ccb678abc8 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
author iuc
date Tue, 19 Oct 2021 15:57:14 +0000
parents 59bb6d34fca6
children 4aab5ae907b6
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
1 import argparse
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
2 import pathlib
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
3 import sys
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
4
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
5 import xlrd
1
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
6 import yaml
4
26ccb678abc8 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents: 3
diff changeset
7 from check_remote import check_remote_entry
2
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
8 from mappings import optional_samples_cols_mapping
0
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
9
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
10 FILE_FORMAT = 'fastq'
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
11
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
12
4
26ccb678abc8 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents: 3
diff changeset
13 def identify_action(entry_type, alias):
26ccb678abc8 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents: 3
diff changeset
14 ''' define action ['add' | 'modify'] that needs to be perfomed for this entry '''
26ccb678abc8 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents: 3
diff changeset
15 query = {entry_type + '_alias': alias}
26ccb678abc8 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents: 3
diff changeset
16 remote_accessions = check_remote_entry(entry_type, query)
26ccb678abc8 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents: 3
diff changeset
17 if len(remote_accessions) > 0:
26ccb678abc8 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents: 3
diff changeset
18 print(f'Found: {entry_type} entry with alias {alias}')
26ccb678abc8 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents: 3
diff changeset
19 return 'modify'
26ccb678abc8 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents: 3
diff changeset
20 else:
26ccb678abc8 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents: 3
diff changeset
21 print(f'No {entry_type} entry found with alias {alias}')
26ccb678abc8 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents: 3
diff changeset
22 return 'add'
26ccb678abc8 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents: 3
diff changeset
23
26ccb678abc8 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents: 3
diff changeset
24
2
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
25 def extract_data(xl_sheet, expected_columns, optional_cols=None):
0
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
26 """
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
27 1. Check that the columns I expect are present in the sheet
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
28 (any order and mixed with others, it's just a verification that
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
29 the user filled the correct template)
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
30 2. Fill a dictionary with the rows data indexed by first column in list"""
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
31 sheet_columns = {}
2
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
32 if optional_cols is None:
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
33 optional_cols = []
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
34 optional_cols_loaded = []
0
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
35 for sh_col in range(xl_sheet.ncols):
2
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
36 if (xl_sheet.cell(0, sh_col).value in expected_columns) \
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
37 or (xl_sheet.cell(0, sh_col).value in optional_cols):
0
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
38 if xl_sheet.cell(0, sh_col).value in sheet_columns.keys():
2
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
39 sys.exit("Duplicated columns found")
0
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
40 else:
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
41 sheet_columns[xl_sheet.cell(0, sh_col).value] = sh_col
2
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
42 if xl_sheet.cell(0, sh_col).value in optional_cols:
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
43 # store the list of optional cols available
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
44 optional_cols_loaded.append(xl_sheet.cell(0, sh_col).value)
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
45 provided_cols = expected_columns + optional_cols_loaded
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
46
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
47 # check that the required columns are all present
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
48 # TODO: revise this for optional columns
0
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
49 for col in range(len(expected_columns)):
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
50 assert expected_columns[col] in sheet_columns.keys(), \
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
51 "Expected column %s not found" % expected_columns[col]
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
52
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
53 # fetch rows in a dict
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
54 data_dict = {}
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
55 # the first of the expected columns will be the index
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
56 index_col = sheet_columns[expected_columns[0]]
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
57 # skip first 2 rows: column names + comments rows
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
58 for row_id in range(2, xl_sheet.nrows):
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
59 row_dict = {}
2
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
60 for col in range(1, len(provided_cols)):
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
61 sheet_col_index = sheet_columns[provided_cols[col]]
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
62 row_dict[provided_cols[col]] = xl_sheet.cell(row_id, sheet_col_index).value
0
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
63 # should check for duplicate alias/ids?
1
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
64 if xl_sheet.cell(row_id, index_col).value in data_dict.keys():
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
65 tmp = data_dict[xl_sheet.cell(row_id, index_col).value]
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
66 data_dict[xl_sheet.cell(row_id, index_col).value] = [tmp]
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
67 data_dict[xl_sheet.cell(row_id, index_col).value].append(row_dict)
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
68 else:
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
69 data_dict[xl_sheet.cell(row_id, index_col).value] = row_dict
2
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
70 return data_dict, optional_cols_loaded
0
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
71
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
72
1
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
73 def paste_xls2yaml(xlsx_path):
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
74 print('YAML -------------')
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
75 xls = xlrd.open_workbook(xlsx_path)
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
76 content_dict = {}
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
77 for sheet_name in xls.sheet_names():
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
78 if sheet_name == 'controlled_vocabulary':
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
79 continue
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
80 xls_sheet = xls.sheet_by_name(sheet_name)
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
81 sheet_contents_dict = {}
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
82 colnames = []
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
83 for col in range(xls_sheet.ncols):
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
84 colnames.append(xls_sheet.cell(0, col).value)
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
85 # skip first 2 rows (column names and suggestions)
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
86 for row_id in range(2, xls_sheet.nrows):
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
87 row_dict = {}
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
88 for col_id in range(0, xls_sheet.ncols):
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
89 row_dict[colnames[col_id]] = xls_sheet.cell(row_id, col_id).value
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
90 # should check for duplicate alias/ids?
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
91 sheet_contents_dict[row_id] = row_dict
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
92 content_dict[sheet_name] = sheet_contents_dict
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
93 yaml.dump(content_dict, sys.stdout)
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
94 print('YAML -------------')
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
95
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
96
0
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
97 parser = argparse.ArgumentParser()
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
98 parser.add_argument('--form', dest='xlsx_path', required=True)
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
99 parser.add_argument('--out_dir', dest='out_path', required=True)
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
100 parser.add_argument('--action', dest='action', required=True)
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
101 parser.add_argument('--vir', dest='viral_submission', required=False, action='store_true')
4
26ccb678abc8 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents: 3
diff changeset
102 parser.add_argument('--dev', dest='dev_submission', required=False, action='store_true')
1
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
103 parser.add_argument('--verbose', dest='verbose', required=False, action='store_true')
0
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
104 args = parser.parse_args()
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
105
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
106 xl_workbook = xlrd.open_workbook(args.xlsx_path)
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
107
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
108 # PARSE STUDIES
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
109 #################
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
110 xl_sheet = xl_workbook.sheet_by_name('ENA_study')
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
111 if xl_sheet.nrows < 3:
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
112 raise ValueError('No entries found in studies sheet')
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
113 studies_dict = {}
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
114 studies_col = ['alias', 'title', 'study_type', 'study_abstract']
2
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
115 studies_dict, _ = extract_data(xl_sheet, studies_col)
0
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
116
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
117 # PARSE SAMPLES
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
118 #################
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
119 xl_sheet = xl_workbook.sheet_by_name('ENA_sample')
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
120 if xl_sheet.nrows < 3:
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
121 raise ValueError('No entries found in samples')
2
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
122
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
123 samples_cols_excel = ['alias', 'title', 'scientific_name', 'sample_description']
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
124 # optional_samples_cols_mapping = {}
0
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
125 if args.viral_submission:
2
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
126 # load columns names from the table
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
127 samples_cols_excel = samples_cols_excel + ['geographic location (country and/or sea)',
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
128 'host common name', 'host health state',
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
129 'host sex', 'host scientific name', 'collector name',
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
130 'collecting institution', 'isolate']
0
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
131
2
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
132 samples_dict, samples_optional_cols_loaded = extract_data(xl_sheet, samples_cols_excel,
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
133 optional_samples_cols_mapping.keys())
0
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
134 # PARSE EXPERIMENTS
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
135 #################
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
136 xl_sheet = xl_workbook.sheet_by_name('ENA_experiment')
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
137 if xl_sheet.nrows < 3:
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
138 raise ValueError('No experiments found in experiments sheet')
1
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
139 exp_columns = ['alias', 'title', 'study_alias', 'sample_alias', 'design_description',
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
140 'library_name', 'library_strategy', 'library_source', 'library_selection',
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
141 'library_layout', 'insert_size', 'library_construction_protocol',
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
142 'platform', 'instrument_model']
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
143
2
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
144 experiments_dict, _ = extract_data(xl_sheet, exp_columns)
0
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
145
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
146 # PARSE RUNS SHEET
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
147 #################
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
148 xl_sheet = xl_workbook.sheet_by_name('ENA_run')
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
149 if xl_sheet.nrows < 3:
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
150 raise ValueError('No entries found in runs sheet')
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
151 run_cols = ['alias', 'experiment_alias', 'file_name', 'file_format']
2
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
152 runs_dict, _ = extract_data(xl_sheet, run_cols)
0
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
153
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
154 # WRITE HEADERS TO TABLES
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
155 studies_table = open(pathlib.Path(args.out_path) / 'studies.tsv', 'w')
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
156 studies_table.write('\t'.join(['alias', 'status', 'accession', 'title', 'study_type',
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
157 'study_abstract', 'pubmed_id', 'submission_date']) + '\n')
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
158 samples_table = open(pathlib.Path(args.out_path) / 'samples.tsv', 'w')
2
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
159
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
160 samples_cols = ['alias', 'title', 'scientific_name', 'sample_description']
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
161 # extend the samples_cols list to add the ones that are filled by the CLI
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
162 samples_cols = samples_cols + ['status', 'accession', 'taxon_id', 'submission_date']
0
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
163 if args.viral_submission:
2
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
164 # extend the samples columns with the viral specific data
4
26ccb678abc8 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents: 3
diff changeset
165 samples_cols = samples_cols + ['geographic location (country and/or sea)', 'host common name',
26ccb678abc8 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents: 3
diff changeset
166 'host subject id', 'host health state', 'host sex',
26ccb678abc8 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents: 3
diff changeset
167 'host scientific name', 'collector name',
26ccb678abc8 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents: 3
diff changeset
168 'collecting institution', 'isolate']
2
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
169 if len(samples_optional_cols_loaded) > 0:
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
170 for optional_cols_excel in samples_optional_cols_loaded:
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
171 samples_cols.append(optional_samples_cols_mapping[optional_cols_excel])
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
172 samples_table.write('\t'.join(samples_cols) + '\n')
0
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
173
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
174 experiments_table = open(pathlib.Path(args.out_path) / 'experiments.tsv', 'w')
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
175 experiments_table.write('\t'.join(['alias', 'status', 'accession', 'title', 'study_alias',
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
176 'sample_alias', 'design_description', 'library_name',
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
177 'library_strategy', 'library_source', 'library_selection',
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
178 'library_layout', 'insert_size', 'library_construction_protocol',
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
179 'platform', 'instrument_model', 'submission_date']) + '\n')
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
180
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
181 runs_table = open(pathlib.Path(args.out_path) / 'runs.tsv', 'w')
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
182 runs_table.write('\t'.join(['alias', 'status', 'accession', 'experiment_alias', 'file_name',
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
183 'file_format', 'file_checksum', 'submission_date']) + '\n')
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
184 action = args.action
4
26ccb678abc8 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents: 3
diff changeset
185 # actionable_items
0
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
186 # WRITE DICTIONARIES TO TABLE FILES
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
187
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
188 # ADD A TIMESTAMP TO THE ALIAS? SEEMS LIKE ENA REQUIRES ALL ENTRIES FOR A WEBIN TO HAVE UNIQUE IDS?
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
189 # dt_oobj = datetime.now(tz=None)
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
190 # timestamp = dt_oobj.strftime("%Y%m%d_%H:%M:%S")
1
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
191 runs_included = []
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
192 exp_included = []
0
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
193 for study_alias, study in studies_dict.items():
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
194 # study_alias = study_alias + '_' + timestamp
4
26ccb678abc8 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents: 3
diff changeset
195 if args.dev_submission:
26ccb678abc8 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents: 3
diff changeset
196 entry_action = args.action
26ccb678abc8 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents: 3
diff changeset
197 else:
26ccb678abc8 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents: 3
diff changeset
198 entry_action = identify_action('study', study_alias)
26ccb678abc8 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents: 3
diff changeset
199 studies_table.write('\t'.join([study_alias, entry_action, 'ENA_accession', study['title'],
0
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
200 study['study_type'], study['study_abstract'], '',
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
201 'ENA_submission_data']) + '\n') # assuming no pubmed_id
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
202 for sample_alias, sample in samples_dict.items():
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
203 # sample_alias = sample_alias + '_' + timestamp
4
26ccb678abc8 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents: 3
diff changeset
204 if args.dev_submission:
26ccb678abc8 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents: 3
diff changeset
205 entry_action = args.action
26ccb678abc8 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents: 3
diff changeset
206 else:
26ccb678abc8 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents: 3
diff changeset
207 entry_action = identify_action('sample', sample_alias)
2
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
208 samples_row_values = [sample_alias, sample['title'], sample['scientific_name'],
4
26ccb678abc8 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents: 3
diff changeset
209 sample['sample_description'], entry_action, 'ena_accession',
26ccb678abc8 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents: 3
diff changeset
210 '', 'ENA_submission_date']
0
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
211 if args.viral_submission:
2
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
212 # add the values that are unique for the viral samples
0
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
213 if sample['collector name'] == '':
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
214 sample['collector name'] = 'unknown'
2
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
215 samples_row_values = samples_row_values + \
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
216 [sample['geographic location (country and/or sea)'], sample['host common name'],
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
217 'host subject id', sample['host health state'], sample['host sex'],
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
218 sample['host scientific name'], sample['collector name'],
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
219 sample['collecting institution'], sample['isolate']]
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
220 # add the (possible) optional columns values
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
221 if len(samples_optional_cols_loaded) > 0:
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
222 for optional_col in samples_optional_cols_loaded:
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
223 # parse values stored as in excel date format (=float)
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
224 if optional_col in ('collection date', 'receipt date'):
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
225 # check if excel stored it as date
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
226 if isinstance(sample[optional_col], float):
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
227 year, month, day, hour, minute, second = xlrd.xldate_as_tuple(
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
228 sample[optional_col], xl_workbook.datemode)
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
229 month = "{:02d}".format(month)
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
230 day = "{:02d}".format(day)
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
231 hour = "{:02d}".format(hour)
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
232 minute = "{:02d}".format(minute)
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
233 second = "{:02d}".format(second)
3
59bb6d34fca6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 28d2a0cfcbb04d88b6e19a7d898a2bada3bce149"
iuc
parents: 2
diff changeset
234 if optional_col in ('collection date'):
59bb6d34fca6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 28d2a0cfcbb04d88b6e19a7d898a2bada3bce149"
iuc
parents: 2
diff changeset
235 # collection date uses the format 2008-01-23T19:23:10
59bb6d34fca6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 28d2a0cfcbb04d88b6e19a7d898a2bada3bce149"
iuc
parents: 2
diff changeset
236 sample[optional_col] = str(year) + '-' + str(month) + '-' + str(day) + \
59bb6d34fca6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 28d2a0cfcbb04d88b6e19a7d898a2bada3bce149"
iuc
parents: 2
diff changeset
237 'T' + str(hour) + ':' + str(minute) + ':' + str(second)
59bb6d34fca6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 28d2a0cfcbb04d88b6e19a7d898a2bada3bce149"
iuc
parents: 2
diff changeset
238 if optional_col in ('receipt date'):
59bb6d34fca6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 28d2a0cfcbb04d88b6e19a7d898a2bada3bce149"
iuc
parents: 2
diff changeset
239 # receipt date uses forma: 2008-01-23
59bb6d34fca6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 28d2a0cfcbb04d88b6e19a7d898a2bada3bce149"
iuc
parents: 2
diff changeset
240 sample[optional_col] = str(year) + '-' + str(month) + '-' + str(day)
2
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
241 # excel stores everything as float so I need to check if
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
242 # the value was actually an int and keep it as int
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
243 if isinstance(sample[optional_col], float):
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
244 if int(sample[optional_col]) == sample[optional_col]:
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
245 # it is not really a float but an int
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
246 sample[optional_col] = int(sample[optional_col])
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
247 samples_row_values.append(str(sample[optional_col]))
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
248 samples_table.write('\t'.join(samples_row_values) + '\n')
9e2df763086c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents: 1
diff changeset
249
0
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
250 for exp_alias, exp in experiments_dict.items():
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
251 # should I check here if any experiment has a study or sample alias that is incorrect?
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
252 # (not listed in the samples or study dict)
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
253 # process the experiments for this sample
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
254 if exp['sample_alias'] == sample_alias:
4
26ccb678abc8 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents: 3
diff changeset
255 # check the remote status
26ccb678abc8 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents: 3
diff changeset
256 if args.dev_submission:
26ccb678abc8 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents: 3
diff changeset
257 entry_action = args.action
26ccb678abc8 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents: 3
diff changeset
258 else:
26ccb678abc8 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents: 3
diff changeset
259 entry_action = identify_action('experiment', exp_alias)
26ccb678abc8 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents: 3
diff changeset
260 experiments_table.write('\t'.join([exp_alias, entry_action, 'accession_ena', exp['title'],
0
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
261 exp['study_alias'], sample_alias,
3
59bb6d34fca6 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 28d2a0cfcbb04d88b6e19a7d898a2bada3bce149"
iuc
parents: 2
diff changeset
262 exp['design_description'], exp['library_name'],
0
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
263 exp['library_strategy'], exp['library_source'],
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
264 exp['library_selection'],
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
265 exp['library_layout'].lower(),
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
266 str(int(exp['insert_size'])),
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
267 exp['library_construction_protocol'],
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
268 exp['platform'], exp['instrument_model'],
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
269 'submission_date_ENA']) + '\n')
1
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
270 exp_included.append(exp_alias)
0
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
271 for run_alias, run in runs_dict.items():
1
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
272 # check that the experiments library_layout is set to paired
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
273 # when multiple entries are associated with the same run alias
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
274 if not isinstance(run, list):
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
275 runs_list = [run]
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
276 else:
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
277 runs_list = run
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
278 for run_entry in runs_list:
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
279 if run_entry['experiment_alias'] == exp_alias:
4
26ccb678abc8 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents: 3
diff changeset
280 if args.dev_submission:
26ccb678abc8 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents: 3
diff changeset
281 entry_action = args.action
26ccb678abc8 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents: 3
diff changeset
282 else:
26ccb678abc8 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents: 3
diff changeset
283 entry_action = identify_action('run', run_alias)
26ccb678abc8 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents: 3
diff changeset
284 runs_table.write('\t'.join([run_alias, entry_action, 'ena_run_accession',
1
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
285 exp_alias, run_entry['file_name'],
4
26ccb678abc8 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents: 3
diff changeset
286 FILE_FORMAT, '',
1
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
287 'submission_date_ENA']) + '\n')
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
288 runs_included.append(run_alias)
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
289
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
290 # check if any experiment or run was not associated with any sample
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
291 for run in runs_dict.keys():
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
292 if run not in runs_included:
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
293 print(f'The run {run} is listed in the runs section but not associated with any \
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
294 used experiment')
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
295
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
296 for exp in experiments_dict.keys():
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
297 if exp not in exp_included:
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
298 print(f'The experiment {exp} is listed in the experiments section but not associated \
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
299 with any used sample')
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
300
0
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
301 studies_table.close()
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
302 samples_table.close()
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
303 experiments_table.close()
382518f24d6d "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff changeset
304 runs_table.close()
1
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
305
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
306 if args.verbose:
57251c760cab "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents: 0
diff changeset
307 paste_xls2yaml(args.xlsx_path)