Mercurial > repos > iuc > ena_upload
changeset 1:57251c760cab draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
author | iuc |
---|---|
date | Fri, 30 Apr 2021 12:09:25 +0000 |
parents | 382518f24d6d |
children | 9e2df763086c |
files | ena_upload.xml extract_tables.py process_xlsx.py samples_macros.xml test-data/2.fastqsanger.gz |
diffstat | 5 files changed, 263 insertions(+), 87 deletions(-) [+] |
line wrap: on
line diff
--- a/ena_upload.xml Sat Nov 28 09:45:44 2020 +0000 +++ b/ena_upload.xml Fri Apr 30 12:09:25 2021 +0000 @@ -1,24 +1,31 @@ -<tool id="ena_upload" name="ENA Upload tool" version="0.3" profile="20.01" license="MIT"> +<tool id="ena_upload" name="ENA Upload tool" version="0.3.1" profile="20.01" license="MIT"> <macros> - <token name="@VERSION@">0.2.4</token> + <token name="@VERSION@">0.2.7</token> <import>samples_macros.xml</import> </macros> <requirements> <requirement type="package" version="@VERSION@">ena-upload-cli</requirement> <requirement type="package" version="1.2.0">xlrd</requirement> </requirements> + <stdio> + <regex match="Oops" source="stderr" level="fatal"/> + <regex match="different file names between command line and RUN table" source="stderr" level="fatal"/> + </stdio> <command detect_errors="exit_code"><![CDATA[ mkdir ./submission_files; +#set $studies_table_path = './submission_files/studies.tsv' +#set $samples_table_path = './submission_files/samples.tsv' +#set $experiments_table_path = './submission_files/experiments.tsv' +#set $runs_table_path = './submission_files/runs.tsv' + +#set $studies_table_path_updated = './submission_files/studies_updated.tsv' +#set $samples_table_path_updated = './submission_files/samples_updated.tsv' +#set $experiments_table_path_updated = './submission_files/experiments_updated.tsv' +#set $runs_table_path_updated = './submission_files/runs_updated.tsv' #set working_dir = os.getcwd() -#set $dry_run_option = "False" -#set viral_submission = "False" #if $action_options.input_format_conditional.input_format == "build_tables": python '$__tool_directory__/extract_tables.py' --action $action_options.action --out_dir ./submission_files --studies $studies_json; - #set $studies_table_path = './submission_files/studies.tsv' - #set $samples_table_path = './submission_files/samples.tsv' - #set $experiments_table_path = './submission_files/experiments.tsv' - #set $runs_table_path = './submission_files/runs.tsv' #end if #if $action_options.input_format_conditional.input_format == "excel_tables": @@ -26,35 +33,30 @@ #if $action_options.input_format_conditional.viral_submission == "true": --vir #end if - --action '$action_options.action' --form '$action_options.input_format_conditional.xlsx_file' --out_dir ./submission_files ; - #set $studies_table_path = './submission_files/studies.tsv' - #set $samples_table_path = './submission_files/samples.tsv' - #set $experiments_table_path = './submission_files/experiments.tsv' - #set $runs_table_path = './submission_files/runs.tsv' - #if $action_options.input_format_conditional.dry_run == "true": - #set $dry_run_option = "True" - #end if + --action '$action_options.action' --form '$action_options.input_format_conditional.xlsx_file' --out_dir ./submission_files --verbose > '$output'; #end if #if $action_options.input_format_conditional.input_format != "user_generated_tables": - cp $studies_table_path $studies_table_out; - cp $samples_table_path $samples_table_out; - cp $experiments_table_path $experiments_table_out; - cp $runs_table_path $runs_table_out; - #if $action_options.input_format_conditional.dry_run == "true": - #set $dry_run_option = "True" - #end if + cp $studies_table_path $studies_table_out && + cp $samples_table_path $samples_table_out && + cp $experiments_table_path $experiments_table_out && + cp $runs_table_path $runs_table_out && +#else: + ln -s '$action_options.input_format_conditional.experiments_users_table' $experiments_table_path && + ln -s '$action_options.input_format_conditional.studies_users_table' $studies_table_path && + ln -s '$action_options.input_format_conditional.runs_users_table' $runs_table_path && + ln -s '$action_options.input_format_conditional.samples_users_table' $samples_table_path && #end if -#if $dry_run_option == "False" and $action_options.test_submit == "False": +#if $action_options.test_submit_parameters.dry_run == "false" and $action_options.test_submit == "False": webin_id=`grep 'username' $credentials`; if [ "\$webin_id" = "" ]; then ## No credentials in user defined preferences ## Fallback to global defined credentials (if exist) #import os #if os.path.isfile(os.environ.get('GALAXY_ENA_SECRETS', '')): - credentials_path=\${ENA_SECRETS}; + credentials_path=\${GALAXY_ENA_SECRETS}; webin_id=`grep 'username' \$GALAXY_ENA_SECRETS`; if [ "\$webin_id" = "" ]; then echo "No global credentials defined. Check your GALAXY_ENA_SECRETS file or set your credentials via: User -> Preferences -> Manage Information"; @@ -91,15 +93,41 @@ #end for #end for #else: - #for $file in $action_options.input_format_conditional.data: - #set $safename_reads_file = re.sub('[^\w\-_\.]', '_', $file.element_identifier) - ln -s '$file' $safename_reads_file && - $files_to_upload.append(str($safename_reads_file)) - #end for + #if $action_options.input_format_conditional.run_input_format_conditional.run_input_format == 'paired_list': + #for $pair in $action_options.input_format_conditional.run_input_format_conditional.paired_end_collection: + #set $safename_reads_file = re.sub('[^\w\-_\.]', '_', $pair.name) + #if $pair.forward.is_of_type('fastq.gz', 'fastqsanger.gz'): + #set $safename_fwd_reads_file = $safename_reads_file + '_1.fastq.gz' + #elif $pair.forward.is_of_type('fastqsanger.bz2', 'fastq.bz2'): + #set $safename_fwd_reads_file = $safename_reads_file + '_1.fastq.bz2' + #else: + #set $safename_fwd_reads_file = $safename_reads_file + '_1.fastq' + #end if + #if $pair.reverse.is_of_type('fastq.gz', 'fastqsanger.gz'): + #set $safename_rev_reads_file = $safename_reads_file + '_2.fastq.gz' + #elif $pair.reverse.is_of_type('fastqsanger.bz2', 'fastq.bz2'): + #set $safename_rev_reads_file = $safename_reads_file + '_2.fastq.bz2' + #else: + #set $safename_rev_reads_file = $safename_reads_file + '_2.fastq' + #end if + + ln -s '$pair.forward' $safename_fwd_reads_file && + $files_to_upload.append(str($safename_fwd_reads_file)) + ln -s '$pair.reverse' $safename_rev_reads_file && + $files_to_upload.append(str($safename_rev_reads_file)) + #end for + #end if + #if $action_options.input_format_conditional.run_input_format_conditional.run_input_format == 'multiple_selection_list': + #for $file in $action_options.input_format_conditional.run_input_format_conditional.data: + #set $safename_reads_file = re.sub('[^\w\-_\.]', '_', $file.element_identifier) + ln -s '$file' $safename_reads_file && + $files_to_upload.append(str($safename_reads_file)) + #end for + #end if #end if -#if $dry_run_option == "False": +#if $action_options.test_submit_parameters.dry_run == "false": ena-upload-cli --tool 'ena-upload-cli v@VERSION@ @ Galaxy' --action '$action_options.action' @@ -109,19 +137,15 @@ #for $dataset in $files_to_upload: '$dataset' #end for +--experiment '$experiments_table_path' +--study '$studies_table_path' +--run '$runs_table_path' +--sample '$samples_table_path' #if $action_options.input_format_conditional.input_format == "user_generated_tables": - --experiment '$action_options.input_format_conditional.experiments_users_table' - --study '$action_options.input_format_conditional.studies_users_table' - --run '$action_options.input_format_conditional.runs_users_table' - --sample '$action_options.input_format_conditional.samples_users_table' #if "$action_options.input_format_conditional.viral_submission" == "true": --vir #end if #else: - --experiment '$experiments_table_path' - --study '$studies_table_path' - --run '$runs_table_path' - --sample '$samples_table_path' #if $action_options.input_format_conditional.input_format == "build_tables": #if $action_options.input_format_conditional.conditional_viral_metadata.viral_sample == "true": --vir @@ -133,12 +157,19 @@ #end if #end if -#if $action_options.submit_dev == "true": +#if $action_options.test_submit_parameters.submit_dev == "true": -d #end if - > '$output' + >> '$output'; + echo -e 'center_name\t$action_options.center' >> '$output'; + echo -e 'action_option\t$action_options.action' >> '$output'; + #if $action_options.input_format_conditional.input_format != "user_generated_tables": + cp $studies_table_path_updated $studies_table_out 2>/dev/null; + cp $samples_table_path_updated $samples_table_out 2>/dev/null; + cp $experiments_table_path_updated $experiments_table_out 2>/dev/null; + cp $runs_table_path_updated $runs_table_out 2>/dev/null; + #end if #else: - echo "" exit 0; #end if @@ -170,9 +201,9 @@ #set $safename_reads_file = re.sub('[^\w\-_\.]', '_', $file.element_identifier) $run_files.append(str($safename_reads_file)) #end for - $runs.append($run_files) + $runs.append((str($run.run_base_name),$run_files)) #end for - + $experiments.append({'title':str($experiment.experiment_title),'experiment_design':str($experiment.experiment_design),'library_strategy':str($experiment.library_strategy),'library_source':str($experiment.library_source),'library_selection':str($experiment.library_selection),'library_layout':str($experiment.library_layout),'insert_size':str($experiment.insert_size),'library_construction_protocol':str($experiment.library_construction_protocol),'platform':str($experiment.platform),'instrument_model':str($experiment.instrument_model),'runs':$runs}) #end for #if $action_options.input_format_conditional.conditional_viral_metadata.viral_sample == "true": @@ -196,17 +227,18 @@ <option value="modify">Modify metadata</option> </param> <when value="add"> - <param name="submit_dev" type="boolean" label="Submit to test ENA server?" help="By selecting yes the reads will be submitted " /> + <expand macro="test_submit_section"/> <param name="test_submit" type="hidden" value="False" /> <expand macro="table_inputs_macro" /> </when> <when value="modify"> + <expand macro="test_submit_section"/> <expand macro="table_inputs_macro" /> </when> </conditional> </inputs> <outputs> - <data name="output" format="data" label="${tool.name} on ${on_string}: Upload summary"/> + <data name="output" format="txt" label="ENA submission receipt"/> <data name="studies_table_out" format="tabular" label="Studies table"> <filter> action_options['input_format_conditional']['input_format'] == "build_tables" or action_options['input_format_conditional']['input_format'] == "excel_tables"</filter> </data> @@ -225,13 +257,18 @@ <test> <conditional name="action_options"> <param name="action" value="add"/> - <param name="submit_dev" value="False"/> + <section name="test_submit_parameters"> + <param name="submit_dev" value="false" /> + <param name="dry_run" value="true" /> + </section> <conditional name="input_format_conditional"> <param name="input_format" value="excel_tables"/> <param name="viral_submission" value="True"/> - <param name="dry_run" value="True"/> <param name="xlsx_file" value="metadata_test_viral.xlsx"/> - <param name="data" value="sample.fq"/> + <conditional name="run_input_format_conditional"> + <param name="run_input_format" value="multiple_selection_list"/> + <param name="data" value="sample.fq"/> + </conditional> </conditional> </conditional> <param name="center" value="Some research center"/> @@ -271,13 +308,18 @@ <test> <conditional name="action_options"> <param name="action" value="add"/> - <param name="submit_dev" value="False"/> + <section name="test_submit_parameters"> + <param name="submit_dev" value="false" /> + <param name="dry_run" value="true" /> + </section> <conditional name="input_format_conditional"> <param name="input_format" value="excel_tables"/> <param name="viral_submission" value="False"/> - <param name="dry_run" value="True"/> <param name="xlsx_file" value="metadata_test_nonviral.xlsx"/> - <param name="data" value="sample.fq"/> + <conditional name="run_input_format_conditional"> + <param name="run_input_format" value="multiple_selection_list"/> + <param name="data" value="sample.fq"/> + </conditional> </conditional> </conditional> <param name="center" value="Some research center"/> @@ -311,14 +353,50 @@ </assert_contents> </output> </test> + <!--Test failure on excel input of NON-VIRAL samples with runs PAIRED collection --> + <test expect_failure="true"> + <conditional name="action_options"> + <param name="action" value="add"/> + <section name="test_submit_parameters"> + <param name="submit_dev" value="false" /> + <param name="dry_run" value="false" /> + </section> + <conditional name="input_format_conditional"> + <param name="input_format" value="excel_tables"/> + <param name="viral_submission" value="False"/> + <param name="xlsx_file" value="metadata_test_nonviral.xlsx"/> + <conditional name="run_input_format_conditional"> + <param name="run_input_format" value="paired_list"/> + <param name="paired_end_collection"> + <collection type="list:paired"> + <element name="paired_run_name"> + <collection type="paired"> + <element name="forward" value="1.fastqsanger.gz" ftype="fastqsanger.gz" /> + <element name="reverse" value="2.fastqsanger.gz" ftype="fastqsanger.gz" /> + </collection> + </element> + </collection> + </param> + </conditional> + </conditional> + </conditional> + <param name="center" value="Some research center"/> + <assert_command> + <has_text_matching expression="ena-upload-cli"/> + <has_text_matching expression="--data 'paired_run_name_1.fastq.gz' 'paired_run_name_2.fastq.gz'"/> + <has_text_matching expression="--action 'add' --center 'Some research center'"/> + </assert_command> + </test> <!--Test build tables from user input fields NON-VIRAL samples--> <test> <conditional name="action_options"> <param name="action" value="add"/> - <param name="submit_dev" value="False"/> + <section name="test_submit_parameters"> + <param name="submit_dev" value="false" /> + <param name="dry_run" value="true" /> + </section> <conditional name="input_format_conditional"> <param name="input_format" value="build_tables"/> - <param name="dry_run" value="True"/> <conditional name="conditional_viral_metadata"> <param name="viral_sample" value="False"/> <repeat name="rep_study"> @@ -383,12 +461,14 @@ <test expect_failure="true"> <conditional name="action_options"> <param name="action" value="add"/> - <param name="submit_dev" value="True"/> + <section name="test_submit_parameters"> + <param name="submit_dev" value="true" /> + <param name="dry_run" value="false" /> + </section> <conditional name="input_format_conditional"> <param name="input_format" value="build_tables"/> - <param name="dry_run" value="False"/> <conditional name="conditional_viral_metadata"> - <param name="viral_sample" value="False"/> + <param name="viral_sample" value="false"/> <repeat name="rep_study"> <param name="study_title" value="Test study title"/> <param name="study_abstract" value="Test study abstract"/> @@ -428,13 +508,15 @@ <test expect_failure="true"> <conditional name="action_options"> <param name="action" value="add"/> - <param name="submit_dev" value="True"/> + <section name="test_submit_parameters"> + <param name="submit_dev" value="true" /> + <param name="dry_run" value="false" /> + </section> <param name="test_submit" value="True"/> <conditional name="input_format_conditional"> <param name="input_format" value="build_tables"/> - <param name="dry_run" value="False"/> <conditional name="conditional_viral_metadata"> - <param name="viral_sample" value="False"/> + <param name="viral_sample" value="false"/> <repeat name="rep_study"> <param name="study_title" value="Test study title"/> <param name="study_abstract" value="Test study abstract"/> @@ -457,6 +539,7 @@ <param name="platform" value="ILLUMINA"/> <param name="instrument_model" value="Illumina HiSeq 4000"/> <repeat name="rep_runs"> + <param name="run_base_name" value="run_from_hospital_X"/> <param name="upload_files" value="1.fastqsanger.gz,sample.fq" ftype="fastqsanger"/> </repeat> </repeat> @@ -472,18 +555,20 @@ <has_text_matching expression="--action 'add' --center 'Some research center'"/> </assert_command> <assert_stderr> - <has_text_matching expression="ENA_upload: error: Oops, the file test_fake_path does not exist"/> + <has_text_matching expression="Oops, the file test_fake_path does not exist"/> </assert_stderr> </test> <!--test viral submission - User input metadata--> <test expect_failure="true"> <conditional name="action_options"> <param name="action" value="add"/> - <param name="submit_dev" value="False"/> + <section name="test_submit_parameters"> + <param name="submit_dev" value="false" /> + <param name="dry_run" value="false" /> + </section> <param name="test_submit" value="True"/> <conditional name="input_format_conditional"> <param name="input_format" value="build_tables"/> - <param name="dry_run" value="False"/> <conditional name="conditional_viral_metadata"> <param name="viral_sample" value="True"/> <repeat name="rep_study"> @@ -518,6 +603,7 @@ <param name="platform" value="ILLUMINA"/> <param name="instrument_model" value="Illumina HiSeq 4000"/> <repeat name="rep_runs"> + <param name="run_base_name" value="run_from_hospital_X"/> <param name="upload_files" value="1.fastqsanger.gz,sample.fq" ftype="fastqsanger"/> </repeat> </repeat> @@ -534,7 +620,7 @@ <has_text_matching expression="--vir"/> </assert_command> <assert_stderr> - <has_text_matching expression="ENA_upload: error: Oops, the file test_fake_path does not exist"/> + <has_text_matching expression="Oops, the file test_fake_path does not exist"/> </assert_stderr> </test> </tests>
--- a/extract_tables.py Sat Nov 28 09:45:44 2020 +0000 +++ b/extract_tables.py Fri Apr 30 12:09:25 2021 +0000 @@ -3,6 +3,11 @@ import pathlib from datetime import datetime +""" +Parse the configfile generated by the Galaxy tool. +This file is JSON-formatted and should be converted to a set of tabular files. +""" + FILE_FORMAT = 'fastq' parser = argparse.ArgumentParser() @@ -78,11 +83,15 @@ 'submission_date_ENA']) + '\n') run_index = 0 # exp['runs'] is a list of lists - for run in exp['runs']: + for (base_run, run_files) in exp['runs']: run_index += 1 - run_alias = '.'.join(['run_' + str(run_index), str(exp_index), str(sample_index)]) \ - + '_' + timestamp - for file_entry in run: + if base_run != '': + run_alias = base_run + else: + # no alias provided, generated a unique one + run_alias = '_'.join(['run_' + str(run_index), str(exp_index), + str(sample_index)]) + '_' + timestamp + for file_entry in run_files: runs_table.write('\t'.join([run_alias, action, 'ena_run_accession', exp_alias, file_entry, FILE_FORMAT, 'file_checksum', 'submission_date_ENA']) + '\n')
--- a/process_xlsx.py Sat Nov 28 09:45:44 2020 +0000 +++ b/process_xlsx.py Fri Apr 30 12:09:25 2021 +0000 @@ -3,7 +3,7 @@ import sys import xlrd - +import yaml FILE_FORMAT = 'fastq' @@ -36,15 +36,45 @@ sheet_col_index = sheet_columns[expected_columns[col]] row_dict[expected_columns[col]] = xl_sheet.cell(row_id, sheet_col_index).value # should check for duplicate alias/ids? - data_dict[xl_sheet.cell(row_id, index_col).value] = row_dict + if xl_sheet.cell(row_id, index_col).value in data_dict.keys(): + tmp = data_dict[xl_sheet.cell(row_id, index_col).value] + data_dict[xl_sheet.cell(row_id, index_col).value] = [tmp] + data_dict[xl_sheet.cell(row_id, index_col).value].append(row_dict) + else: + data_dict[xl_sheet.cell(row_id, index_col).value] = row_dict return data_dict +def paste_xls2yaml(xlsx_path): + print('YAML -------------') + xls = xlrd.open_workbook(xlsx_path) + content_dict = {} + for sheet_name in xls.sheet_names(): + if sheet_name == 'controlled_vocabulary': + continue + xls_sheet = xls.sheet_by_name(sheet_name) + sheet_contents_dict = {} + colnames = [] + for col in range(xls_sheet.ncols): + colnames.append(xls_sheet.cell(0, col).value) + # skip first 2 rows (column names and suggestions) + for row_id in range(2, xls_sheet.nrows): + row_dict = {} + for col_id in range(0, xls_sheet.ncols): + row_dict[colnames[col_id]] = xls_sheet.cell(row_id, col_id).value + # should check for duplicate alias/ids? + sheet_contents_dict[row_id] = row_dict + content_dict[sheet_name] = sheet_contents_dict + yaml.dump(content_dict, sys.stdout) + print('YAML -------------') + + parser = argparse.ArgumentParser() parser.add_argument('--form', dest='xlsx_path', required=True) parser.add_argument('--out_dir', dest='out_path', required=True) parser.add_argument('--action', dest='action', required=True) parser.add_argument('--vir', dest='viral_submission', required=False, action='store_true') +parser.add_argument('--verbose', dest='verbose', required=False, action='store_true') args = parser.parse_args() xl_workbook = xlrd.open_workbook(args.xlsx_path) @@ -77,9 +107,11 @@ xl_sheet = xl_workbook.sheet_by_name('ENA_experiment') if xl_sheet.nrows < 3: raise ValueError('No experiments found in experiments sheet') -exp_columns = ['alias', 'title', 'study_alias', 'sample_alias', 'design_description', 'library_name', - 'library_strategy', 'library_source', 'library_selection', 'library_layout', - 'insert_size', 'library_construction_protocol', 'platform', 'instrument_model'] +exp_columns = ['alias', 'title', 'study_alias', 'sample_alias', 'design_description', + 'library_name', 'library_strategy', 'library_source', 'library_selection', + 'library_layout', 'insert_size', 'library_construction_protocol', + 'platform', 'instrument_model'] + experiments_dict = extract_data(xl_sheet, exp_columns) # PARSE RUNS SHEET @@ -123,6 +155,8 @@ # ADD A TIMESTAMP TO THE ALIAS? SEEMS LIKE ENA REQUIRES ALL ENTRIES FOR A WEBIN TO HAVE UNIQUE IDS? # dt_oobj = datetime.now(tz=None) # timestamp = dt_oobj.strftime("%Y%m%d_%H:%M:%S") +runs_included = [] +exp_included = [] for study_alias, study in studies_dict.items(): # study_alias = study_alias + '_' + timestamp studies_table.write('\t'.join([study_alias, action, 'ENA_accession', study['title'], @@ -162,12 +196,37 @@ exp['library_construction_protocol'], exp['platform'], exp['instrument_model'], 'submission_date_ENA']) + '\n') + exp_included.append(exp_alias) for run_alias, run in runs_dict.items(): - if run['experiment_alias'] == exp_alias: - runs_table.write('\t'.join([run_alias, action, 'ena_run_accession', exp_alias, - run['file_name'], FILE_FORMAT, 'file_checksum', - 'submission_date_ENA']) + '\n') + # check that the experiments library_layout is set to paired + # when multiple entries are associated with the same run alias + if not isinstance(run, list): + runs_list = [run] + else: + runs_list = run + for run_entry in runs_list: + if run_entry['experiment_alias'] == exp_alias: + runs_table.write('\t'.join([run_alias, action, 'ena_run_accession', + exp_alias, run_entry['file_name'], + FILE_FORMAT, 'file_checksum', + 'submission_date_ENA']) + '\n') + runs_included.append(run_alias) + +# check if any experiment or run was not associated with any sample +for run in runs_dict.keys(): + if run not in runs_included: + print(f'The run {run} is listed in the runs section but not associated with any \ + used experiment') + +for exp in experiments_dict.keys(): + if exp not in exp_included: + print(f'The experiment {exp} is listed in the experiments section but not associated \ + with any used sample') + studies_table.close() samples_table.close() experiments_table.close() runs_table.close() + +if args.verbose: + paste_xls2yaml(args.xlsx_path)
--- a/samples_macros.xml Sat Nov 28 09:45:44 2020 +0000 +++ b/samples_macros.xml Fri Apr 30 12:09:25 2021 +0000 @@ -1,5 +1,24 @@ <macros> - + <xml name="test_submit_section"> + <section name="test_submit_parameters" expanded="true" title="Testing options"> + <param name="submit_dev" type="boolean" truevalue="true" falsevalue="false" label="Submit to test ENA server?" help="By selecting yes the reads will be submitted to the ENA test server. Uploads to test platform will not be public and will be removed in 24hrs. Performing a preliminary test upload is advised to check for errors with metadata structure. You can find the uploads to the test platform at https://wwwdev.ebi.ac.uk/ena/" /> + <param name="dry_run" type="boolean" truevalue="true" falsevalue="false" label="Print the tables but do not submit the datasets" help="If yes is selected then NO submission will be performed."/> + </section> + </xml> + <xml name="run_inputs_macro"> + <conditional name="run_input_format_conditional"> + <param name="run_input_format" type="select" label="Select runs input format"> + <option value="multiple_selection_list" selected="True">Select individual datasets or datasets collection</option> + <option value="paired_list" selected="False">Input from a paired collection</option> + </param> + <when value="multiple_selection_list"> + <param name="data" type="data" format="fastqsanger.gz,fastqsanger.bz2,fastq.gz,fastq.bz2" multiple="true" label="Select individual datasets or a dataset collection" help="Names should match the compressed run's files names defined in the metadata"/> + </when> + <when value="paired_list"> + <param name="paired_end_collection" collection_type="list:paired" type="data_collection" format="fastqsanger.gz,fastqsanger.bz2,fastq.gz,fastq.bz2" label="List of paired-end runs files" help="Names should match the compressed run's files names defined in the metadata" /> + </when> + </conditional> + </xml> <xml name="table_inputs_macro"> <conditional name="input_format_conditional"> <param name="input_format" type="select" label="Would you like to submit pregenerated table files or interactively define the input structures?"> @@ -9,22 +28,20 @@ </param> <when value="excel_tables"> <param name="viral_submission" type="boolean" label="Does your submission data belong to a viral sample?" help="If you select yes then your data will be submitted using the ENA virus pathogen reporting standard checklist (see: https://ena-browser-docs.readthedocs.io/en/latest/help_and_guides/sars-cov-2-submissions.html)" /> - <param name="dry_run" type="boolean" label="Print the tables but do not submit the datasets" help="If yes is selected then NO submission will be performed."/> - <param name="xlsx_file" type="data" format="xlsx" /> - <param name="data" type="data" format="fastqsanger.gz,fastqsanger.bz2,fastq.gz,fastq.bz2" multiple="true" label="Select all datasets to upload" help="Compressed reads files listed in the runs table"/> + <param name="xlsx_file" type="data" format="xlsx" label="Select Excel (xlsx) file based on templates" /> + <expand macro="run_inputs_macro" /> </when> <when value="user_generated_tables"> <param name="viral_submission" type="boolean" label="Does your submission data belong to a viral sample?" help="If you select yes then your data will be submitted using the ENA virus pathogen reporting standard checklist (see: https://ena-browser-docs.readthedocs.io/en/latest/help_and_guides/sars-cov-2-submissions.html)" /> - <param name="data" type="data" format="fastqsanger.gz,fastqsanger.bz2,fastq.gz,fastq.bz2" multiple="true" label="Select all datasets to upload" help="Compressed reads files listed in the runs table"/> + <expand macro="run_inputs_macro" /> <param name="studies_users_table" type="data" format="tabular" multiple="false" label="Studies table" help="Studies metadata file"/> <param name="samples_users_table" type="data" format="tabular" multiple="false" label="Samples table" help="Samples metadata file"/> <param name="experiments_users_table" type="data" format="tabular" multiple="false" label="Experiments table" help="Experiments metadata file"/> <param name="runs_users_table" type="data" format="tabular" multiple="false" label="Runs table" help="Runs metadata file"/> </when> <when value="build_tables"> - <param name="dry_run" type="boolean" label="Print the tables but do not submit the datasets" help="If yes is selected then NO submission will be performed."/> <conditional name="conditional_viral_metadata"> - <param name="viral_sample" type="boolean" label="Does your submission contains viral samples?" /> + <param name="viral_sample" type="boolean" truevalue="true" falsevalue="false" label="Does your submission contains viral samples?" /> <when value="true"> <expand macro="viral_samples" /> </when> @@ -51,8 +68,11 @@ <param name="sample_description" type="text" help="e.g: liver cells" label="Describe the type of sample"/> <param name="scientific_name" type="text" label="Enter the species of the sample" help="e.g Severe acute respiratory syndrome coronavirus 2"/> <param name="tax_id" type="text" label="Enter the taxonomic ID corresponding to the sample species" /> - <param name="collection_date" type="text" label="Collection date" optional="True" help="options are: YYYY, YYYY/MM, YYYY/MM/DD, not collected, restricted access or leave blank"> - <validator type="regex" message="Data format is not valid">(^[0-9]{4}(-[0-9]{2}(-[0-9]{2}(T[0-9]{2}:[0-9]{2}(:[0-9]{2})?Z?([+-][0-9]{1,2})?)?)?)?(/[0-9]{4}(-[0-9]{2}(-[0-9]{2}(T[0-9]{2}:[0-9]{2}(:[0-9]{2})?Z?([+-][0-9]{1,2})?)?)?)?)?$)|(^not colected$)|(^not provided$)|(^restricted access$)</validator> + <param name="collection_date" type="text" label="Collection date" optional="True" help="options are: YYYY, YYYY-MM, YYYY-MM-DD, not collected, restricted access or not provided"> + <option value="not collected">not collected</option> + <option value="restricted access">restricted access</option> + <option value="not provided">not provided</option> + <validator type="regex" message="Data format is not valid">(^[0-9]{4}(-[0-9]{2}(-[0-9]{2}(T[0-9]{2}:[0-9]{2}(:[0-9]{2})?Z?([+-][0-9]{1,2})?)?)?)?(/[0-9]{4}(-[0-9]{2}(-[0-9]{2}(T[0-9]{2}:[0-9]{2}(:[0-9]{2})?Z?([+-][0-9]{1,2})?)?)?)?)?$)|(^not collected$)|(^not provided$)|(^restricted access$)</validator> </param> <param name="geo_location_country" type="select" label="Select the country where the sample was obtained"> <options from_data_table="geographic_location_1"> @@ -118,6 +138,7 @@ </options> </param> <repeat name="rep_runs" title="Runs executed within this experiment" min="1" > + <param name="run_base_name" type="text" optional="False" default="" label="Run alias" help="If an alias is not provided it will be generated combining the sample and experiment indexes"/> <param name="upload_files" type="data" format="fastqsanger.gz,fastqsanger.bz2,fastq.gz,fastq.bz2" multiple="true" label="File(s) associated with this run"/> </repeat> </repeat> @@ -166,7 +187,7 @@ <param name="library_construction_protocol" type="text" label="Please describe the library construction protocol"/> <param name="platform" type="select" label="Select the sequencing platform used"> <option value="LS454">LS454</option> - <option value="ILLUMINA">Illumina</option> + <option value="ILLUMINA" selected="True">Illumina</option> <option value="HELICOS">Helicos</option> <option value="ABI_SOLID">ABI Solid</option> <option value="COMPLETE_GENOMICS">Complete Genomics</option> @@ -182,6 +203,7 @@ </options> </param> <repeat name="rep_runs" title="Runs executed within this experiment" min="1" > + <param name="run_base_name" type="text" optional="False" default="" label="Run alias" help="If an alias is not provided it will be generated combining the sample and experiment indexes"/> <param name="upload_files" type="data" format="fastqsanger.gz,fastqsanger.bz2,fastq.gz,fastq.bz2" multiple="true" label="File(s) associated with this run"/> </repeat> </repeat>