annotate process_input.py @ 1:93fac0ae859f draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
author iuc
date Mon, 06 Oct 2025 12:13:19 +0000
parents c9f87770ba8f
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
1 import json
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
2 import os
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
3 import sys
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
4
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
5 import yaml
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
6
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
7
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
8 def get_section_string(f, start_line, end_line, return_string=False):
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
9 # consume starting lines
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
10 start_string = iter(f.readline, start_line)
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
11 start_string = ''.join(line for line in start_string)
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
12 # read YAML lines
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
13 yaml_string = iter(f.readline, end_line)
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
14 if return_string:
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
15 return ''.join(x for x in yaml_string)
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
16 else:
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
17 return [x for x in yaml_string]
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
18
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
19
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
20 def fill_from_yaml_data(yaml_only_dict, studies_samples_dict):
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
21 # fill experiment information (platform)
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
22 for index, exp in yaml_only_dict['ENA_experiment'].items():
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
23 study_alias = exp['study_alias']
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
24 sample_alias = exp['sample_alias']
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
25 if study_alias in studies_samples_dict.keys():
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
26 if sample_alias in studies_samples_dict[study_alias].keys():
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
27 studies_samples_dict[study_alias][sample_alias]['experiments'].append({'platform': exp['platform']})
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
28 else:
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
29 studies_samples_dict[study_alias][sample_alias] = {'experiments': [{'platform': exp['platform']}]}
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
30 else:
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
31 studies_samples_dict[study_alias] = {
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
32 sample_alias: {'experiments': [{'platform': exp['platform']}]}
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
33 }
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
34
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
35
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
36 def load_receipt_data(input_file_path):
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
37 # should do some health check of the input file?
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
38 # load yaml section
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
39 loaded_data = {}
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
40 yaml_delimiter = 'YAML -------------\n'
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
41 with open(input_file_path) as input_file:
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
42 yaml_only_section = yaml.safe_load(
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
43 get_section_string(input_file, start_line=yaml_delimiter, end_line=yaml_delimiter, return_string=True)
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
44 )
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
45 fill_from_yaml_data(yaml_only_section, loaded_data)
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
46 # read study accessions
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
47 study_delimiter = 'Study accession details:\n'
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
48 end_line = '\n'
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
49 with open(input_file_path) as input_file:
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
50 studies_accession_lines = get_section_string(input_file, start_line=study_delimiter, end_line=end_line)
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
51
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
52 for study_line in studies_accession_lines:
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
53 if study_line != '\n':
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
54 alias, accession, *_ = study_line.split('\t')
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
55 try:
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
56 loaded_data[alias]['accession'] = accession
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
57 except KeyError:
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
58 print(f"Experiment {alias} has unknown study or sample")
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
59
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
60 samples_delimiter = 'Sample accession details:\n'
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
61 with open(input_file_path) as input_file:
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
62 samples_accession_lines = get_section_string(input_file, start_line=samples_delimiter, end_line=end_line)
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
63
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
64 for sample_line in samples_accession_lines:
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
65 if sample_line != '\n':
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
66 alias, accession, *_ = sample_line.split('\t')
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
67 for study in loaded_data.keys():
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
68 if alias in loaded_data[study].keys():
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
69 loaded_data[study][alias]['accession'] = accession
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
70 break
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
71
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
72 return loaded_data
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
73
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
74
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
75 def main():
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
76 input_file_path = sys.argv[1]
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
77 fasta_names_list_path = sys.argv[2]
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
78 out_manifest_base = sys.argv[3]
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
79 manifest_template = sys.argv[4]
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
80
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
81 # load submitted data from receipt file
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
82 data_dict = load_receipt_data(input_file_path)
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
83
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
84 # iterate over the list of fasta files
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
85 with open(fasta_names_list_path, 'r') as fasta_files_json_file:
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
86 fasta_files_list = json.load(fasta_files_json_file)
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
87
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
88 with open('submit_list.tab', 'w') as written_manifests_out:
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
89 for fasta_file in fasta_files_list:
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
90 if fasta_file.endswith('.fasta.gz'):
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
91 sample_alias = fasta_file[:-9]
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
92 else:
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
93 sample_alias = fasta_file[:-6]
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
94
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
95 print(f'Processing {sample_alias}')
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
96 found_metadata = False
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
97
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
98 for study_alias in data_dict.keys():
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
99 if sample_alias in data_dict[study_alias].keys():
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
100 sample_accession = data_dict[study_alias][sample_alias]['accession']
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
101 study_accession = data_dict[study_alias]['accession']
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
102 # TODO: get a string that concatenates platform information from multiple experiments
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
103 platform = data_dict[study_alias][sample_alias]['experiments'][0]['platform']
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
104 manifest_path = os.path.join(out_manifest_base, sample_alias + '.manifest.txt')
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
105
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
106 with open(manifest_path, "w") as output_handle:
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
107 # dump the contents of manifest template containing global vars
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
108 with open(manifest_template) as m_template:
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
109 output_handle.write(m_template.read())
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
110
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
111 output_handle.write("ASSEMBLYNAME\tconsensus_" + sample_alias + "\n")
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
112 output_handle.write("PLATFORM\t" + platform + "\n")
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
113 output_handle.write("STUDY\t" + study_accession + "\n")
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
114 output_handle.write("SAMPLE\t" + sample_accession + "\n")
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
115 output_handle.write("FASTA\t" + sample_alias + '.fasta.gz' + "\n")
1
93fac0ae859f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents: 0
diff changeset
116 agp_path = os.path.join("./fasta", sample_alias + ".agp")
93fac0ae859f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents: 0
diff changeset
117 if os.path.exists(agp_path):
93fac0ae859f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents: 0
diff changeset
118 output_handle.write("AGP\t" + sample_alias + ".agp\n")
93fac0ae859f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents: 0
diff changeset
119 chr_list_path = os.path.join("./fasta", sample_alias + ".tsv.gz")
93fac0ae859f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents: 0
diff changeset
120 if os.path.exists(chr_list_path):
93fac0ae859f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
iuc
parents: 0
diff changeset
121 output_handle.write("CHROMOSOME_LIST\t" + sample_alias + ".tsv.gz\n")
0
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
122
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
123 found_metadata = True
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
124 written_manifests_out.write(manifest_path + '\n')
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
125 break
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
126
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
127 if not found_metadata:
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
128 print(f'No metadata found for sample {sample_alias}')
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
129
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
130
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
131 if __name__ == '__main__':
c9f87770ba8f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit 35b4a063e227ea4a2440e23c4df5c27e42c077cb
iuc
parents:
diff changeset
132 main()