Mercurial > repos > iuc > ena_upload
annotate process_xlsx.py @ 4:26ccb678abc8 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
author | iuc |
---|---|
date | Tue, 19 Oct 2021 15:57:14 +0000 |
parents | 59bb6d34fca6 |
children | 4aab5ae907b6 |
rev | line source |
---|---|
0
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
1 import argparse |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
2 import pathlib |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
3 import sys |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
4 |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
5 import xlrd |
1
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
6 import yaml |
4
26ccb678abc8
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents:
3
diff
changeset
|
7 from check_remote import check_remote_entry |
2
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
8 from mappings import optional_samples_cols_mapping |
0
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
9 |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
10 FILE_FORMAT = 'fastq' |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
11 |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
12 |
4
26ccb678abc8
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents:
3
diff
changeset
|
13 def identify_action(entry_type, alias): |
26ccb678abc8
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents:
3
diff
changeset
|
14 ''' define action ['add' | 'modify'] that needs to be perfomed for this entry ''' |
26ccb678abc8
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents:
3
diff
changeset
|
15 query = {entry_type + '_alias': alias} |
26ccb678abc8
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents:
3
diff
changeset
|
16 remote_accessions = check_remote_entry(entry_type, query) |
26ccb678abc8
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents:
3
diff
changeset
|
17 if len(remote_accessions) > 0: |
26ccb678abc8
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents:
3
diff
changeset
|
18 print(f'Found: {entry_type} entry with alias {alias}') |
26ccb678abc8
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents:
3
diff
changeset
|
19 return 'modify' |
26ccb678abc8
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents:
3
diff
changeset
|
20 else: |
26ccb678abc8
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents:
3
diff
changeset
|
21 print(f'No {entry_type} entry found with alias {alias}') |
26ccb678abc8
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents:
3
diff
changeset
|
22 return 'add' |
26ccb678abc8
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents:
3
diff
changeset
|
23 |
26ccb678abc8
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents:
3
diff
changeset
|
24 |
2
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
25 def extract_data(xl_sheet, expected_columns, optional_cols=None): |
0
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
26 """ |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
27 1. Check that the columns I expect are present in the sheet |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
28 (any order and mixed with others, it's just a verification that |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
29 the user filled the correct template) |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
30 2. Fill a dictionary with the rows data indexed by first column in list""" |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
31 sheet_columns = {} |
2
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
32 if optional_cols is None: |
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
33 optional_cols = [] |
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
34 optional_cols_loaded = [] |
0
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
35 for sh_col in range(xl_sheet.ncols): |
2
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
36 if (xl_sheet.cell(0, sh_col).value in expected_columns) \ |
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
37 or (xl_sheet.cell(0, sh_col).value in optional_cols): |
0
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
38 if xl_sheet.cell(0, sh_col).value in sheet_columns.keys(): |
2
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
39 sys.exit("Duplicated columns found") |
0
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
40 else: |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
41 sheet_columns[xl_sheet.cell(0, sh_col).value] = sh_col |
2
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
42 if xl_sheet.cell(0, sh_col).value in optional_cols: |
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
43 # store the list of optional cols available |
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
44 optional_cols_loaded.append(xl_sheet.cell(0, sh_col).value) |
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
45 provided_cols = expected_columns + optional_cols_loaded |
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
46 |
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
47 # check that the required columns are all present |
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
48 # TODO: revise this for optional columns |
0
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
49 for col in range(len(expected_columns)): |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
50 assert expected_columns[col] in sheet_columns.keys(), \ |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
51 "Expected column %s not found" % expected_columns[col] |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
52 |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
53 # fetch rows in a dict |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
54 data_dict = {} |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
55 # the first of the expected columns will be the index |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
56 index_col = sheet_columns[expected_columns[0]] |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
57 # skip first 2 rows: column names + comments rows |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
58 for row_id in range(2, xl_sheet.nrows): |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
59 row_dict = {} |
2
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
60 for col in range(1, len(provided_cols)): |
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
61 sheet_col_index = sheet_columns[provided_cols[col]] |
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
62 row_dict[provided_cols[col]] = xl_sheet.cell(row_id, sheet_col_index).value |
0
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
63 # should check for duplicate alias/ids? |
1
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
64 if xl_sheet.cell(row_id, index_col).value in data_dict.keys(): |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
65 tmp = data_dict[xl_sheet.cell(row_id, index_col).value] |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
66 data_dict[xl_sheet.cell(row_id, index_col).value] = [tmp] |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
67 data_dict[xl_sheet.cell(row_id, index_col).value].append(row_dict) |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
68 else: |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
69 data_dict[xl_sheet.cell(row_id, index_col).value] = row_dict |
2
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
70 return data_dict, optional_cols_loaded |
0
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
71 |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
72 |
1
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
73 def paste_xls2yaml(xlsx_path): |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
74 print('YAML -------------') |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
75 xls = xlrd.open_workbook(xlsx_path) |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
76 content_dict = {} |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
77 for sheet_name in xls.sheet_names(): |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
78 if sheet_name == 'controlled_vocabulary': |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
79 continue |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
80 xls_sheet = xls.sheet_by_name(sheet_name) |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
81 sheet_contents_dict = {} |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
82 colnames = [] |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
83 for col in range(xls_sheet.ncols): |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
84 colnames.append(xls_sheet.cell(0, col).value) |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
85 # skip first 2 rows (column names and suggestions) |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
86 for row_id in range(2, xls_sheet.nrows): |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
87 row_dict = {} |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
88 for col_id in range(0, xls_sheet.ncols): |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
89 row_dict[colnames[col_id]] = xls_sheet.cell(row_id, col_id).value |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
90 # should check for duplicate alias/ids? |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
91 sheet_contents_dict[row_id] = row_dict |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
92 content_dict[sheet_name] = sheet_contents_dict |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
93 yaml.dump(content_dict, sys.stdout) |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
94 print('YAML -------------') |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
95 |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
96 |
0
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
97 parser = argparse.ArgumentParser() |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
98 parser.add_argument('--form', dest='xlsx_path', required=True) |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
99 parser.add_argument('--out_dir', dest='out_path', required=True) |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
100 parser.add_argument('--action', dest='action', required=True) |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
101 parser.add_argument('--vir', dest='viral_submission', required=False, action='store_true') |
4
26ccb678abc8
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents:
3
diff
changeset
|
102 parser.add_argument('--dev', dest='dev_submission', required=False, action='store_true') |
1
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
103 parser.add_argument('--verbose', dest='verbose', required=False, action='store_true') |
0
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
104 args = parser.parse_args() |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
105 |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
106 xl_workbook = xlrd.open_workbook(args.xlsx_path) |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
107 |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
108 # PARSE STUDIES |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
109 ################# |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
110 xl_sheet = xl_workbook.sheet_by_name('ENA_study') |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
111 if xl_sheet.nrows < 3: |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
112 raise ValueError('No entries found in studies sheet') |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
113 studies_dict = {} |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
114 studies_col = ['alias', 'title', 'study_type', 'study_abstract'] |
2
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
115 studies_dict, _ = extract_data(xl_sheet, studies_col) |
0
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
116 |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
117 # PARSE SAMPLES |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
118 ################# |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
119 xl_sheet = xl_workbook.sheet_by_name('ENA_sample') |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
120 if xl_sheet.nrows < 3: |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
121 raise ValueError('No entries found in samples') |
2
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
122 |
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
123 samples_cols_excel = ['alias', 'title', 'scientific_name', 'sample_description'] |
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
124 # optional_samples_cols_mapping = {} |
0
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
125 if args.viral_submission: |
2
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
126 # load columns names from the table |
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
127 samples_cols_excel = samples_cols_excel + ['geographic location (country and/or sea)', |
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
128 'host common name', 'host health state', |
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
129 'host sex', 'host scientific name', 'collector name', |
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
130 'collecting institution', 'isolate'] |
0
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
131 |
2
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
132 samples_dict, samples_optional_cols_loaded = extract_data(xl_sheet, samples_cols_excel, |
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
133 optional_samples_cols_mapping.keys()) |
0
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
134 # PARSE EXPERIMENTS |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
135 ################# |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
136 xl_sheet = xl_workbook.sheet_by_name('ENA_experiment') |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
137 if xl_sheet.nrows < 3: |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
138 raise ValueError('No experiments found in experiments sheet') |
1
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
139 exp_columns = ['alias', 'title', 'study_alias', 'sample_alias', 'design_description', |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
140 'library_name', 'library_strategy', 'library_source', 'library_selection', |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
141 'library_layout', 'insert_size', 'library_construction_protocol', |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
142 'platform', 'instrument_model'] |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
143 |
2
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
144 experiments_dict, _ = extract_data(xl_sheet, exp_columns) |
0
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
145 |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
146 # PARSE RUNS SHEET |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
147 ################# |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
148 xl_sheet = xl_workbook.sheet_by_name('ENA_run') |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
149 if xl_sheet.nrows < 3: |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
150 raise ValueError('No entries found in runs sheet') |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
151 run_cols = ['alias', 'experiment_alias', 'file_name', 'file_format'] |
2
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
152 runs_dict, _ = extract_data(xl_sheet, run_cols) |
0
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
153 |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
154 # WRITE HEADERS TO TABLES |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
155 studies_table = open(pathlib.Path(args.out_path) / 'studies.tsv', 'w') |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
156 studies_table.write('\t'.join(['alias', 'status', 'accession', 'title', 'study_type', |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
157 'study_abstract', 'pubmed_id', 'submission_date']) + '\n') |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
158 samples_table = open(pathlib.Path(args.out_path) / 'samples.tsv', 'w') |
2
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
159 |
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
160 samples_cols = ['alias', 'title', 'scientific_name', 'sample_description'] |
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
161 # extend the samples_cols list to add the ones that are filled by the CLI |
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
162 samples_cols = samples_cols + ['status', 'accession', 'taxon_id', 'submission_date'] |
0
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
163 if args.viral_submission: |
2
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
164 # extend the samples columns with the viral specific data |
4
26ccb678abc8
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents:
3
diff
changeset
|
165 samples_cols = samples_cols + ['geographic location (country and/or sea)', 'host common name', |
26ccb678abc8
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents:
3
diff
changeset
|
166 'host subject id', 'host health state', 'host sex', |
26ccb678abc8
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents:
3
diff
changeset
|
167 'host scientific name', 'collector name', |
26ccb678abc8
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents:
3
diff
changeset
|
168 'collecting institution', 'isolate'] |
2
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
169 if len(samples_optional_cols_loaded) > 0: |
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
170 for optional_cols_excel in samples_optional_cols_loaded: |
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
171 samples_cols.append(optional_samples_cols_mapping[optional_cols_excel]) |
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
172 samples_table.write('\t'.join(samples_cols) + '\n') |
0
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
173 |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
174 experiments_table = open(pathlib.Path(args.out_path) / 'experiments.tsv', 'w') |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
175 experiments_table.write('\t'.join(['alias', 'status', 'accession', 'title', 'study_alias', |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
176 'sample_alias', 'design_description', 'library_name', |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
177 'library_strategy', 'library_source', 'library_selection', |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
178 'library_layout', 'insert_size', 'library_construction_protocol', |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
179 'platform', 'instrument_model', 'submission_date']) + '\n') |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
180 |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
181 runs_table = open(pathlib.Path(args.out_path) / 'runs.tsv', 'w') |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
182 runs_table.write('\t'.join(['alias', 'status', 'accession', 'experiment_alias', 'file_name', |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
183 'file_format', 'file_checksum', 'submission_date']) + '\n') |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
184 action = args.action |
4
26ccb678abc8
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents:
3
diff
changeset
|
185 # actionable_items |
0
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
186 # WRITE DICTIONARIES TO TABLE FILES |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
187 |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
188 # ADD A TIMESTAMP TO THE ALIAS? SEEMS LIKE ENA REQUIRES ALL ENTRIES FOR A WEBIN TO HAVE UNIQUE IDS? |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
189 # dt_oobj = datetime.now(tz=None) |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
190 # timestamp = dt_oobj.strftime("%Y%m%d_%H:%M:%S") |
1
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
191 runs_included = [] |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
192 exp_included = [] |
0
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
193 for study_alias, study in studies_dict.items(): |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
194 # study_alias = study_alias + '_' + timestamp |
4
26ccb678abc8
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents:
3
diff
changeset
|
195 if args.dev_submission: |
26ccb678abc8
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents:
3
diff
changeset
|
196 entry_action = args.action |
26ccb678abc8
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents:
3
diff
changeset
|
197 else: |
26ccb678abc8
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents:
3
diff
changeset
|
198 entry_action = identify_action('study', study_alias) |
26ccb678abc8
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents:
3
diff
changeset
|
199 studies_table.write('\t'.join([study_alias, entry_action, 'ENA_accession', study['title'], |
0
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
200 study['study_type'], study['study_abstract'], '', |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
201 'ENA_submission_data']) + '\n') # assuming no pubmed_id |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
202 for sample_alias, sample in samples_dict.items(): |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
203 # sample_alias = sample_alias + '_' + timestamp |
4
26ccb678abc8
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents:
3
diff
changeset
|
204 if args.dev_submission: |
26ccb678abc8
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents:
3
diff
changeset
|
205 entry_action = args.action |
26ccb678abc8
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents:
3
diff
changeset
|
206 else: |
26ccb678abc8
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents:
3
diff
changeset
|
207 entry_action = identify_action('sample', sample_alias) |
2
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
208 samples_row_values = [sample_alias, sample['title'], sample['scientific_name'], |
4
26ccb678abc8
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents:
3
diff
changeset
|
209 sample['sample_description'], entry_action, 'ena_accession', |
26ccb678abc8
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents:
3
diff
changeset
|
210 '', 'ENA_submission_date'] |
0
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
211 if args.viral_submission: |
2
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
212 # add the values that are unique for the viral samples |
0
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
213 if sample['collector name'] == '': |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
214 sample['collector name'] = 'unknown' |
2
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
215 samples_row_values = samples_row_values + \ |
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
216 [sample['geographic location (country and/or sea)'], sample['host common name'], |
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
217 'host subject id', sample['host health state'], sample['host sex'], |
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
218 sample['host scientific name'], sample['collector name'], |
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
219 sample['collecting institution'], sample['isolate']] |
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
220 # add the (possible) optional columns values |
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
221 if len(samples_optional_cols_loaded) > 0: |
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
222 for optional_col in samples_optional_cols_loaded: |
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
223 # parse values stored as in excel date format (=float) |
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
224 if optional_col in ('collection date', 'receipt date'): |
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
225 # check if excel stored it as date |
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
226 if isinstance(sample[optional_col], float): |
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
227 year, month, day, hour, minute, second = xlrd.xldate_as_tuple( |
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
228 sample[optional_col], xl_workbook.datemode) |
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
229 month = "{:02d}".format(month) |
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
230 day = "{:02d}".format(day) |
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
231 hour = "{:02d}".format(hour) |
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
232 minute = "{:02d}".format(minute) |
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
233 second = "{:02d}".format(second) |
3
59bb6d34fca6
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 28d2a0cfcbb04d88b6e19a7d898a2bada3bce149"
iuc
parents:
2
diff
changeset
|
234 if optional_col in ('collection date'): |
59bb6d34fca6
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 28d2a0cfcbb04d88b6e19a7d898a2bada3bce149"
iuc
parents:
2
diff
changeset
|
235 # collection date uses the format 2008-01-23T19:23:10 |
59bb6d34fca6
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 28d2a0cfcbb04d88b6e19a7d898a2bada3bce149"
iuc
parents:
2
diff
changeset
|
236 sample[optional_col] = str(year) + '-' + str(month) + '-' + str(day) + \ |
59bb6d34fca6
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 28d2a0cfcbb04d88b6e19a7d898a2bada3bce149"
iuc
parents:
2
diff
changeset
|
237 'T' + str(hour) + ':' + str(minute) + ':' + str(second) |
59bb6d34fca6
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 28d2a0cfcbb04d88b6e19a7d898a2bada3bce149"
iuc
parents:
2
diff
changeset
|
238 if optional_col in ('receipt date'): |
59bb6d34fca6
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 28d2a0cfcbb04d88b6e19a7d898a2bada3bce149"
iuc
parents:
2
diff
changeset
|
239 # receipt date uses forma: 2008-01-23 |
59bb6d34fca6
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 28d2a0cfcbb04d88b6e19a7d898a2bada3bce149"
iuc
parents:
2
diff
changeset
|
240 sample[optional_col] = str(year) + '-' + str(month) + '-' + str(day) |
2
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
241 # excel stores everything as float so I need to check if |
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
242 # the value was actually an int and keep it as int |
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
243 if isinstance(sample[optional_col], float): |
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
244 if int(sample[optional_col]) == sample[optional_col]: |
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
245 # it is not really a float but an int |
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
246 sample[optional_col] = int(sample[optional_col]) |
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
247 samples_row_values.append(str(sample[optional_col])) |
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
248 samples_table.write('\t'.join(samples_row_values) + '\n') |
9e2df763086c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 1eed23745846ce215e9bdc4a4934d6bc8f41b24e"
iuc
parents:
1
diff
changeset
|
249 |
0
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
250 for exp_alias, exp in experiments_dict.items(): |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
251 # should I check here if any experiment has a study or sample alias that is incorrect? |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
252 # (not listed in the samples or study dict) |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
253 # process the experiments for this sample |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
254 if exp['sample_alias'] == sample_alias: |
4
26ccb678abc8
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents:
3
diff
changeset
|
255 # check the remote status |
26ccb678abc8
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents:
3
diff
changeset
|
256 if args.dev_submission: |
26ccb678abc8
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents:
3
diff
changeset
|
257 entry_action = args.action |
26ccb678abc8
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents:
3
diff
changeset
|
258 else: |
26ccb678abc8
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents:
3
diff
changeset
|
259 entry_action = identify_action('experiment', exp_alias) |
26ccb678abc8
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents:
3
diff
changeset
|
260 experiments_table.write('\t'.join([exp_alias, entry_action, 'accession_ena', exp['title'], |
0
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
261 exp['study_alias'], sample_alias, |
3
59bb6d34fca6
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 28d2a0cfcbb04d88b6e19a7d898a2bada3bce149"
iuc
parents:
2
diff
changeset
|
262 exp['design_description'], exp['library_name'], |
0
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
263 exp['library_strategy'], exp['library_source'], |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
264 exp['library_selection'], |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
265 exp['library_layout'].lower(), |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
266 str(int(exp['insert_size'])), |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
267 exp['library_construction_protocol'], |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
268 exp['platform'], exp['instrument_model'], |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
269 'submission_date_ENA']) + '\n') |
1
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
270 exp_included.append(exp_alias) |
0
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
271 for run_alias, run in runs_dict.items(): |
1
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
272 # check that the experiments library_layout is set to paired |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
273 # when multiple entries are associated with the same run alias |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
274 if not isinstance(run, list): |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
275 runs_list = [run] |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
276 else: |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
277 runs_list = run |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
278 for run_entry in runs_list: |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
279 if run_entry['experiment_alias'] == exp_alias: |
4
26ccb678abc8
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents:
3
diff
changeset
|
280 if args.dev_submission: |
26ccb678abc8
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents:
3
diff
changeset
|
281 entry_action = args.action |
26ccb678abc8
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents:
3
diff
changeset
|
282 else: |
26ccb678abc8
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents:
3
diff
changeset
|
283 entry_action = identify_action('run', run_alias) |
26ccb678abc8
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents:
3
diff
changeset
|
284 runs_table.write('\t'.join([run_alias, entry_action, 'ena_run_accession', |
1
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
285 exp_alias, run_entry['file_name'], |
4
26ccb678abc8
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ba358013c83e7dfffec895946d36585f237e54c5"
iuc
parents:
3
diff
changeset
|
286 FILE_FORMAT, '', |
1
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
287 'submission_date_ENA']) + '\n') |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
288 runs_included.append(run_alias) |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
289 |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
290 # check if any experiment or run was not associated with any sample |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
291 for run in runs_dict.keys(): |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
292 if run not in runs_included: |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
293 print(f'The run {run} is listed in the runs section but not associated with any \ |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
294 used experiment') |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
295 |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
296 for exp in experiments_dict.keys(): |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
297 if exp not in exp_included: |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
298 print(f'The experiment {exp} is listed in the experiments section but not associated \ |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
299 with any used sample') |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
300 |
0
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
301 studies_table.close() |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
302 samples_table.close() |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
303 experiments_table.close() |
382518f24d6d
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 57b434bcf493554d060a99b65e66f274d5c00e0a"
iuc
parents:
diff
changeset
|
304 runs_table.close() |
1
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
305 |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
306 if args.verbose: |
57251c760cab
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
iuc
parents:
0
diff
changeset
|
307 paste_xls2yaml(args.xlsx_path) |