view ena_upload.xml @ 9:a62c4a11a67d draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 6770d277b4136b4068293c4260022d4ae33b2379
author iuc
date Thu, 10 Nov 2022 15:18:00 +0000
parents d147d6455873
children 480d9e9d156b
line wrap: on
line source

<tool id="ena_upload" name="ENA Upload tool" version="@VERSION@" profile="20.01" license="MIT">
    <macros>
        <token name="@VERSION@">0.6.1</token>
        <import>samples_macros.xml</import>
    </macros>
    <requirements>
        <requirement type="package" version="@VERSION@">ena-upload-cli</requirement>
    </requirements>
    <stdio>
        <regex match="Oops" source="stderr" level="fatal"/>
        <regex match="different file names between command line and RUN table" source="stderr" level="fatal"/>
    </stdio>
    <command detect_errors="exit_code"><![CDATA[
mkdir ./submission_files; 
#set $studies_table_path = './submission_files/studies.tsv'
#set $samples_table_path =   './submission_files/samples.tsv'
#set $experiments_table_path = './submission_files/experiments.tsv'
#set $runs_table_path =  './submission_files/runs.tsv'
        
#set $studies_table_path_updated = './submission_files/studies_updated.tsv'
#set $samples_table_path_updated =   './submission_files/samples_updated.tsv'
#set $experiments_table_path_updated = './submission_files/experiments_updated.tsv'
#set $runs_table_path_updated =  './submission_files/runs_updated.tsv'

#set working_dir = os.getcwd()
#if $action_options.input_format_conditional.input_format == "build_tables":
  python '$__tool_directory__/extract_tables.py' --action $action_options.action --out_dir ./submission_files --studies $studies_json;
#end if
        
credentials_path='test_fake_path';
echo "username: test_user" > \$credentials_path;
echo "password: test_password" >> \$credentials_path;


#if $action_options.input_format_conditional.input_format == "build_tables":
    cp $studies_table_path $studies_table_out &&
    cp $samples_table_path $samples_table_out &&
    cp $experiments_table_path $experiments_table_out &&
    cp $runs_table_path $runs_table_out &&
#end if
#if $action_options.input_format_conditional.input_format == "user_generated_tables":
    ln -s '$action_options.input_format_conditional.experiments_users_table' $experiments_table_path &&
    ln -s '$action_options.input_format_conditional.studies_users_table' $studies_table_path &&
    ln -s '$action_options.input_format_conditional.runs_users_table' $runs_table_path &&
    ln -s '$action_options.input_format_conditional.samples_users_table' $samples_table_path &&
#end if
#if $action_options.input_format_conditional.input_format == "excel_tables":
    ln -s '$action_options.input_format_conditional.xlsx_file' ./xlsx_input.xlsx &&
#end if
#if $action_options.test_submit_parameters.dry_run == "false" and $action_options.test_submit == "False":
    webin_id=`grep 'username' $credentials`;
    if [ "\$webin_id" = "" ]; then
      ## No credentials in user defined preferences    
      ## Fallback to global defined credentials (if exist)   
      #import os
      #if os.path.isfile(os.environ.get('GALAXY_ENA_SECRETS', '')):
          credentials_path=\${GALAXY_ENA_SECRETS};     
          webin_id=`grep 'username' \$GALAXY_ENA_SECRETS`;
          if [ "\$webin_id" = "" ]; then
              echo "No global credentials defined. Check your GALAXY_ENA_SECRETS file or set your credentials via: User -> Preferences -> Manage Information";
              exit 1;
          fi;
      #else:
          echo "No ENA credentials defined. Set your credentials via: User -> Preferences -> Manage Information";
          exit 1;
      #end if
    else
      credentials_path='$credentials';
    fi;
#end if


## create the list of files to upload and make the symlinks 
#import re      
#set $files_to_upload = list()
#if $action_options.input_format_conditional.input_format == "build_tables":
    #for $study in $action_options.input_format_conditional.conditional_viral_metadata.rep_study:
      #for $sample in $study.rep_sample:
        #for $experiment in $sample.rep_experiment:
          #for $run in $experiment.rep_runs:
            #for $file in $run.upload_files:
                #set $safename_reads_file = re.sub('[^\w\-_\.]', '_', $file.element_identifier)
                #if $action_options.input_format_conditional.add_extension == "true":
                    #set $extension = '.fastq'
                #else
                    #set $extension = ''
                #end if
                #if $file.is_of_type('fastq', 'fastqsanger'):
                    ## compression output is defined as safename_reads_file so no need to symlink
                    #set $safename_reads_file = $safename_reads_file + $extension + '.gz'
                    gzip -c '$file' > $safename_reads_file &&
                #else:
                    #if $action_options.input_format_conditional.add_extension == "true":
                        #if $file.is_of_type('fastq.gz', 'fastqsanger.gz'):
                            #set $compression = '.gz' 
                        #elif $file.is_of_type('fastqsanger.bz2', 'fastq.bz2'):
                            #set $compression = '.bz2' 
                        #end if
                        #set $safename_reads_file = $safename_reads_file + $extension + $compression 
                    #end if
                    ln -s '$file' $safename_reads_file &&
                #end if
                $files_to_upload.append(str($safename_reads_file))
            #end for
          #end for
        #end for
      #end for
    #end for
#else:
    #if $action_options.input_format_conditional.run_input_format_conditional.run_input_format == 'paired_list':
        #for $pair in $action_options.input_format_conditional.run_input_format_conditional.paired_end_collection:
            #set $safename_reads_file = re.sub('[^\w\-_\.]', '_', $pair.name)
            ## Always need to add .fastq + compression suffix because the name is based on the pair name which has no extensions
            #if $pair.forward.is_of_type('fastq', 'fastqsanger'):
                ## compress the file, no need to create the link then
                ## always add the compression suffix (.gz)
                #set $safename_fwd_reads_file = $safename_reads_file + '_1' + 'fastq' + '.gz'
                gzip -c '$file' > $safename_fwd_reads_file &&
            #else
                #if $pair.forward.is_of_type('fastq.gz', 'fastqsanger.gz'):
                    #set $compression = '.gz'
                #elif $pair.forward.is_of_type('fastqsanger.bz2', 'fastq.bz2'):
                    #set $compression = '.bz2'
                #end if
                #set $safename_fwd_reads_file = $safename_reads_file + '_1' + '.fastq' + $compression
                ln -s '$pair.forward' $safename_fwd_reads_file &&
            #end if

            #if $pair.reverse.is_of_type('fastq', 'fastqsanger'):
                ## compress the file, no need to create the link then
                #set $safename_reverse_reads_file = $safename_reads_file + '_1' + '.fastq' + '.gz'
                gzip -c '$file' > $safename_rev_reads_file &&
            #else
                #if $pair.reverse.is_of_type('fastqsanger.bz2', 'fastq.bz2'):
                    #set $compression = '.bz2'
                #elif $pair.reverse.is_of_type('fastqsanger.gz', 'fastq.gz'):
                    #set $compression = '.gz'
                #end if
                #set $safename_rev_reads_file = $safename_reads_file + '_2' + '.fastq' + $compression
                ln -s '$pair.reverse' $safename_rev_reads_file &&
            #end if
            $files_to_upload.append(str($safename_fwd_reads_file))
            $files_to_upload.append(str($safename_rev_reads_file))
        #end for
    #end if
    #if $action_options.input_format_conditional.run_input_format_conditional.run_input_format == 'multiple_selection_list':
        #for $file in $action_options.input_format_conditional.run_input_format_conditional.data:
            #set $safename_reads_file = re.sub('[^\w\-_\.]', '_', $file.element_identifier)
            #if $file.is_of_type('fastq', 'fastqsanger'):
                ## always compress add the gz extension
                #if $action_options.input_format_conditional.run_input_format_conditional.add_extension == "true":
                    #set $safename_reads_file = $safename_reads_file + '.fastq.gz'
                #else
                    #set $safename_reads_file = $safename_reads_file + '.gz'
                #end if   
                gzip -c '$file' > $safename_reads_file &&
            #else
                #if $action_options.input_format_conditional.run_input_format_conditional.add_extension == "true":
                    #if $file.is_of_type('fastq.gz', 'fastqsanger.gz'):
                        #set $extension = '.fastq.gz'
                    #elif $file.is_of_type('fastqsanger.bz2', 'fastq.bz2'):
                        #set $extension = '.fastq.bz2'
                    #end if
                    #set $safename_reads_file = $safename_reads_file + $extension
                #end if
                ln -s '$file' $safename_reads_file &&
            #end if      
            $files_to_upload.append(str($safename_reads_file))
        #end for
    #end if
#end if

#if $action_options.action == "add":
ena-upload-cli
    --tool 'ena-upload-cli v@VERSION@ @ Galaxy'
    --action '$action_options.action'
    --center '$action_options.center'
    --secret \${credentials_path}
    --data
    #for $dataset in $files_to_upload:
        '$dataset'
    #end for
#if $action_options.test_submit_parameters.dry_run == "true":
    --draft
#end if
#if $action_options.input_format_conditional.input_format != "excel_tables":
    --experiment '$experiments_table_path'
    --study '$studies_table_path'
    --run '$runs_table_path'
    --sample '$samples_table_path'
#else
    --xlsx ./xlsx_input.xlsx 
#end if
--action add
#if $action_options.input_format_conditional.input_format == "user_generated_tables":
        --checklist $action_options.input_format_conditional.checklist_id
#else:
    #if $action_options.input_format_conditional.input_format == "build_tables":
        #if $action_options.input_format_conditional.conditional_viral_metadata.viral_sample == "true":
          --checklist ERC000033
        #end if
    #else:
          --checklist $action_options.input_format_conditional.checklist_id
    #end if
#end if
#if $action_options.idempotent == "true":
    --auto_action
#end if
#if $action_options.test_submit_parameters.submit_dev == "true":
    -d
#end if
    >> '$output';
#end if



#if $action_options.action == "modify":
    ena-upload-cli
    --tool 'ena-upload-cli v@VERSION@ @ Galaxy'
    --action '$action_options.action'
    --center '$action_options.center'
    --secret \${credentials_path}
    --data
    #for $dataset in $files_to_upload:
        '$dataset'
    #end for
#if $action_options.test_submit_parameters.dry_run == "true":
    --draft
#end if
#if $action_options.input_format_conditional.input_format != "excel_tables":
    --experiment '$experiments_table_path'
    --study '$studies_table_path'
    --run '$runs_table_path'
    --sample '$samples_table_path'
#else
    --xlsx ./xlsx_input.xlsx 
    --auto_action
#end if
--action 'modify'
#if $action_options.input_format_conditional.input_format == "user_generated_tables":
        --checklist $action_options.input_format_conditional.checklist_id
#else:
    #if $action_options.input_format_conditional.input_format == "build_tables":
        #if $action_options.input_format_conditional.conditional_viral_metadata.viral_sample == "true":
          --checklist ERC000033
        #end if
    #else:
          --checklist $action_options.input_format_conditional.checklist_id
    #end if
#end if
    >> '$output';
#end if

#if $action_options.test_submit_parameters.dry_run == "false":
    echo -e 'center_name\t$action_options.center' >> '$output';
    echo -e 'action_option\t$action_options.action' >> '$output';
#end if
        
## copy updated files
#if $action_options.input_format_conditional.input_format == "excel_tables":
    ## for the excel input case, copy the upload-cli generated tables to the output files
    ## this applies for both draft and real submissions
    cp './ENA_template_experiment_updated.tsv' $experiments_table_out;
    cp './ENA_template_sample_updated.tsv' $samples_table_out;
    cp './ENA_template_study_updated.tsv' $studies_table_out;
    cp './ENA_template_run_updated.tsv' $runs_table_out;
#else
    cp $studies_table_path_updated $studies_table_out 2>/dev/null;
    cp $samples_table_path_updated $samples_table_out 2>/dev/null;
    cp $experiments_table_path_updated $experiments_table_out 2>/dev/null;
    cp $runs_table_path_updated $runs_table_out 2>/dev/null;
#end if

python '$__tool_directory__/dump_yaml.py' $studies_table_out $samples_table_out $experiments_table_out $runs_table_out >> $output;
]]></command>
    <configfiles>
        <configfile name="credentials"><![CDATA[
#set $webin_id = $__user__.extra_preferences.get('ena_webin_account|webin_id', "").strip()
#set $password = $__user__.extra_preferences.get('ena_webin_account|password', "").strip()
#if $webin_id != "":
    username: "$webin_id"
    password: "$password"
#end if
        ]]></configfile>
        <configfile name="studies_json">
#import json
#import re
#if $action_options.input_format_conditional.input_format == "build_tables":
  #set $files_to_upload = list()
  #set $studies = list()
  #for $study in $action_options.input_format_conditional.conditional_viral_metadata.rep_study:
    #set samples = list()
    #for $sample in $study.rep_sample:
      #set experiments = list()
      #for $experiment in $sample.rep_experiment:
        #set runs = list()
        #for $run in $experiment.rep_runs:
            #set run_files = list()
            #for $file in $run.upload_files:
              #set $safename_reads_file = re.sub('[^\w\-_\.]', '_', $file.element_identifier)
              $run_files.append(str($safename_reads_file))
            #end for
            $runs.append((str($run.run_base_name),$run_files))
        #end for
    
$experiments.append({'title':str($experiment.experiment_title),'experiment_design':str($experiment.experiment_design),'library_strategy':str($experiment.library_strategy),'library_source':str($experiment.library_source),'library_selection':str($experiment.library_selection),'library_layout':str($experiment.library_layout),'insert_size':str($experiment.insert_size),'library_construction_protocol':str($experiment.library_construction_protocol),'platform':str($experiment.platform),'instrument_model':str($experiment.instrument_model),'runs':$runs})
      #end for
      #if $action_options.input_format_conditional.conditional_viral_metadata.viral_sample == "true":

$samples.append({'title':str($sample.sample_title),'description':str($sample.sample_description),'tax_name':str($sample.scientific_name),'tax_id':str($sample.tax_id),'collection_date':str($sample.collection_date),'geo_location':str($sample.geo_location_country),'host_common_name':str($sample.host_common_name),'host_subject_id':str($sample.host_subject_id),'host_health_state':str($sample.host_health_state),'host_sex':str($sample.host_sex),'host_scientific_name':str($sample.host_scientific_name),'collector_name':str($sample.collector_name),'collecting_institution':str($sample.collecting_institution),'isolate':str($sample.isolate),'experiments':$experiments})
      #else:

$samples.append({'title':str($sample.sample_title),'description':str($sample.sample_description),'tax_name':str($sample.scientific_name),'tax_id':str($sample.tax_id),'experiments':$experiments})
      #end if
    #end for
    $studies.append({'title':str($study.study_title),'type':str($study.study_type),'abstract':str($study.study_abstract),'pubmed_id':str($study.study_pubmed_id),'samples':$samples})
  #end for
  #echo $json.dumps($studies)
#end if
        </configfile>
    </configfiles>
    <inputs>
        <conditional name="action_options">
            <param name="action" type="select" label="Action to execute">
                <option value="add" selected="True">Add new data</option>
                <option value="modify">Modify metadata</option>
            </param>
            <when value="add">
                <param type="boolean" name="idempotent" checked="False" label="Idempotent submissions: only add the elements that were not submitted before" help="NOTE: this feature is based on a beta parameter of the CLI that checks the remote ENA repository for entries using the alias. This remote detection can have false positives, i.e assumes that it's present in the repository but it's not"/>
                <expand macro="test_submit_section"/>    
                <param name="test_submit" type="hidden" value="False" />
                <expand macro="table_inputs_macro" />
            </when>
            <when value="modify">
                <expand macro="test_submit_section"/>    
                <param name="test_submit" type="hidden" value="False" />
                <expand macro="table_inputs_macro" />
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data name="output" format="txt" label="ENA submission receipt"/>
        <data name="studies_table_out" format="tabular" label="Studies table">
            <filter> action_options['input_format_conditional']['input_format'] == "build_tables" or action_options['input_format_conditional']['input_format'] == "excel_tables"</filter>
        </data>
        <data name="samples_table_out" format="tabular" label="Samples table">
            <filter> action_options['input_format_conditional']['input_format'] == "build_tables" or action_options['input_format_conditional']['input_format'] == "excel_tables"</filter>
        </data>
        <data name="experiments_table_out" format="tabular" label="Experiments table">
            <filter> action_options['input_format_conditional']['input_format'] == "build_tables" or action_options['input_format_conditional']['input_format'] == "excel_tables"</filter>
        </data>
        <data name="runs_table_out" format="tabular" label="Runs table">
            <filter> action_options['input_format_conditional']['input_format'] == "build_tables" or action_options['input_format_conditional']['input_format'] == "excel_tables"</filter>
        </data>
    </outputs>
    <tests>
       <!--Test 1:  excel input of VIRAL samples -->
        <test>
            <conditional name="action_options">
                <param name="action" value="add"/>
                <section name="test_submit_parameters">
                    <param name="submit_dev" value="false" />
                    <param name="dry_run" value="true" />
                </section>
                <conditional name="input_format_conditional">
                    <param name="input_format" value="excel_tables"/>
                    <param name="checklist_id" value="ERC000033"/>
                    <param name="xlsx_file" value="metadata_test_viral.xlsx"/>
                    <conditional name="run_input_format_conditional">
                        <param name="add_extension" value="false"/>
                        <param name="run_input_format" value="multiple_selection_list"/>
                        <param name="data" value="C030_exp5_clean.fastq,C053_exp5_clean.fastq,C026_exp5_clean.fastq,C067_exp5_clean.fastq"/>
                    </conditional>
                </conditional>
            </conditional>
            <param name="center" value="Some research center"/>
            <output name="experiments_table_out">
                <assert_contents>
                    <has_n_lines n="5"/>
                    <has_n_columns n="17"/>
                    <has_line_matching expression="alias\ttitle\tstudy_alias\tsample_alias\tdesign_description\tlibrary_name\tlibrary_strategy\tlibrary_source\tlibrary_selection\tlibrary_layout\tinsert_size\tlibrary_construction_protocol\tplatform\tinstrument_model\taccession\tsubmission_date\tstatus"/>
                    <has_line_matching expression="e_(.*)_026\tNanopore sequencing\tSARS-CoV-2_genomes_01\ts_(.*)"/>
                </assert_contents>
            </output>
            <output name="studies_table_out">
                <assert_contents>
                    <has_n_lines n="2"/>
                    <has_n_columns n="7"/>
                    <has_line_matching expression="SARS-CoV-2_genomes_01\tWhole-genome sequencing of SARS-CoV-2 from Covid-19 patients\tWhole Genome Sequencing\tWhole-genome sequences of SARS-CoV-2 from oro-pharyngeal swabs obtained from Covid-19 patients(.*)"/>
                </assert_contents>
            </output>
            <output name="samples_table_out">
                <assert_contents>
                    <has_n_lines n="5"/>
                    <has_n_columns n="18"/>
                </assert_contents>
            </output>
            <output name="runs_table_out">
                <assert_contents>
                    <has_n_lines n="5"/>
                    <has_n_columns n="8"/>
                    <has_line_matching expression="alias\texperiment_alias\tfile_name\tfile_format\taccession\tsubmission_date\tstatus\tfile_checksum"/>
                    <has_line_matching expression="r_20201007_026\te_20201007_026\tC026_exp5_clean.fastq.gz\tFASTQ(.*)"/>
                </assert_contents>
            </output>
        </test>
        <!--Test 2: excel input of VIRAL samples with extended columns-->
        <test>
            <conditional name="action_options">
                <param name="action" value="add"/>
                <section name="test_submit_parameters">
                    <param name="submit_dev" value="false" />
                    <param name="dry_run" value="true" />
                </section>
                <conditional name="input_format_conditional">
                    <param name="input_format" value="excel_tables"/>
                    <param name="checklist_id" value="ERC000033"/>
                    <param name="xlsx_file" value="metadata_test_viral_optional_columns.xlsx"/>
                    <conditional name="run_input_format_conditional">
                        <param name="add_extension" value="false"/>
                        <param name="run_input_format" value="multiple_selection_list"/>
                        <param name="data" value="C030_exp5_clean.fastq,C053_exp5_clean.fastq,C026_exp5_clean.fastq,C067_exp5_clean.fastq"/>
                    </conditional>
                </conditional>
            </conditional>
            <param name="center" value="Some research center"/>
            <output name="experiments_table_out">
                <assert_contents>
                    <has_n_lines n="5"/>
                    <has_n_columns n="17"/>
                    <has_line_matching expression="alias\ttitle\tstudy_alias\tsample_alias\tdesign_description\tlibrary_name\tlibrary_strategy\tlibrary_source\tlibrary_selection\tlibrary_layout\tinsert_size\tlibrary_construction_protocol\tplatform\tinstrument_model\taccession\tsubmission_date\tstatus"/>
                    <has_line_matching expression="e_(.*)_026\tNanopore sequencing\tSARS-CoV-2_genomes_01\ts_(.*)"/>
                </assert_contents>
            </output>
            <output name="studies_table_out">
                <assert_contents>
                    <has_n_lines n="2"/>
                    <has_n_columns n="7"/>
                    <has_line_matching expression="alias\ttitle\tstudy_type\tstudy_abstract\taccession\tsubmission_date\tstatus"/>
                    <has_line_matching expression="SARS-CoV-2_genomes_01\tWhole-genome sequencing of SARS-CoV-2 from Covid-19 patients\tWhole Genome Sequencing\tWhole-genome sequences of SARS-CoV-2 from oro-pharyngeal swabs obtained from Covid-19 patients(.*)"/>
                </assert_contents>
            </output>
            <output name="samples_table_out">
                <assert_contents>
                    <has_n_lines n="5"/>
                    <has_n_columns n="43"/>
                </assert_contents>
            </output>
            <output name="runs_table_out">
                <assert_contents>
                    <has_n_lines n="5"/>
                    <has_n_columns n="8"/>
                    <has_line_matching expression="alias\texperiment_alias\tfile_name\tfile_format\taccession\tsubmission_date\tstatus\tfile_checksum"/>
                    <has_line_matching expression="r_20201007_026\te_20201007_026\tC026_exp5_clean.fastq.gz\tFASTQ(.*)"/>
                </assert_contents>
            </output>
            <output name="output">
                <assert_contents>
                    <has_line_matching expression="YAML -------------"/>
                    <has_line_matching expression="ENA_experiment:"/>
                    <has_line_matching expression="ENA_sample:"/>
                    <has_line_matching expression="ENA_study:"/>
                    <has_line_matching expression="ENA_run:"/>
                </assert_contents>
            </output>
        </test>
        <!--Test 3: excel input of NON-VIRAL samples-->
        <test>
            <conditional name="action_options">
                <param name="action" value="add"/>
                <section name="test_submit_parameters">
                    <param name="submit_dev" value="false" />
                    <param name="dry_run" value="true" />
                </section>
                <conditional name="input_format_conditional">
                    <param name="input_format" value="excel_tables"/>
                    <param name="checklist_id" value="ERC000011"/>
                    <param name="xlsx_file" value="metadata_test_nonviral_1_run.xlsx"/>
                    <conditional name="run_input_format_conditional">
                        <param name="add_extension" value="true"/>
                        <param name="run_input_format" value="multiple_selection_list"/>
                        <param name="data" value="sample_no_extension"/>
                    </conditional>
                </conditional>
            </conditional>
            <param name="center" value="Some research center"/>
            <output name="experiments_table_out">
                <assert_contents>
                    <has_n_lines n="2"/>
                    <has_n_columns n="17"/>
                    <has_line_matching expression="alias\ttitle\tstudy_alias\tsample_alias\tdesign_description\tlibrary_name\tlibrary_strategy\tlibrary_source\tlibrary_selection\tlibrary_layout\tinsert_size\tlibrary_construction_protocol\tplatform\tinstrument_model\taccession\tsubmission_date\tstatus"/>
                </assert_contents>
            </output>
            <output name="studies_table_out">
                <assert_contents>
                    <has_n_lines n="2"/>
                    <has_n_columns n="7"/>
                    <has_line_matching expression="alias\ttitle\tstudy_type\tstudy_abstract\taccession\tsubmission_date\tstatus"/>
                </assert_contents>
            </output>
            <output name="samples_table_out">
                <assert_contents>
                    <has_n_lines n="2"/>
                    <has_n_columns n="8"/>
                    <has_line_matching expression="alias\ttitle\tscientific_name\tsample_description\taccession\tsubmission_date\tstatus\ttaxon_id"/>
                </assert_contents>
            </output>
            <output name="runs_table_out">
                <assert_contents>
                    <has_n_lines n="2"/>
                    <has_n_columns n="8"/>
                    <has_line_matching expression="alias\texperiment_alias\tfile_name\tfile_format\taccession\tsubmission_date\tstatus\tfile_checksum"/>
                    <has_line_matching expression="r_20201007_026\te_20201007_026\tsample_no_extension.fastq.gz\tFASTQ(.*)"/>
                </assert_contents>
            </output>
        </test>
        <!--Test 4: failure on excel input of NON-VIRAL samples with runs PAIRED collection -->
        <test expect_failure="true">
            <conditional name="action_options">
                <param name="action" value="add"/>
                <section name="test_submit_parameters">
                    <param name="submit_dev" value="false" />
                    <param name="dry_run" value="false" />
                </section>
                <conditional name="input_format_conditional">
                    <param name="add_extension" value="true"/>
                    <param name="input_format" value="excel_tables"/>
                    <param name="checklist_id" value="ERC000011"/>
                    <param name="xlsx_file" value="metadata_test_nonviral.xlsx"/>
                    <conditional name="run_input_format_conditional">
                        <param name="run_input_format" value="paired_list"/>
                        <param name="paired_end_collection">
                            <collection type="list:paired">
                                <element name="paired_run_name">
                                    <collection type="paired">
                                        <element name="forward" value="1.fastqsanger.gz" ftype="fastqsanger.gz" />
                                        <element name="reverse" value="2.fastqsanger.gz" ftype="fastqsanger.gz" />
                                    </collection>
                                </element>
                            </collection>
                        </param>
                    </conditional>
                </conditional>
            </conditional>
            <param name="center" value="Some research center"/>
            <assert_command>
                <has_text_matching expression="ena-upload-cli"/>
                <has_text_matching expression="--data 'paired_run_name_1.fastq.gz' 'paired_run_name_2.fastq.gz'"/>
                <has_text_matching expression="--action 'add' --center 'Some research center'"/>
            </assert_command>
        </test>
        <!--Test 4b: failure on excel input of NON-VIRAL samples with runs PAIRED collection + Idempotent submission -->
        <test expect_failure="true">
            <conditional name="action_options">
                <param name="action" value="add"/>
                <section name="test_submit_parameters">
                    <param name="submit_dev" value="false" />
                    <param name="dry_run" value="false" />
                </section>
                <param name="idempotent" value="true"/>
                <conditional name="input_format_conditional">
                    <param name="add_extension" value="true"/>
                    <param name="input_format" value="excel_tables"/>
                    <param name="checklist_id" value="ERC000011"/>
                    <param name="xlsx_file" value="metadata_test_nonviral.xlsx"/>
                    <conditional name="run_input_format_conditional">
                        <param name="run_input_format" value="paired_list"/>
                        <param name="paired_end_collection">
                            <collection type="list:paired">
                                <element name="paired_run_name">
                                    <collection type="paired">
                                        <element name="forward" value="1.fastqsanger.gz" ftype="fastqsanger.gz" />
                                        <element name="reverse" value="2.fastqsanger.gz" ftype="fastqsanger.gz" />
                                    </collection>
                                </element>
                            </collection>
                        </param>
                    </conditional>
                </conditional>
            </conditional>
            <param name="center" value="Some research center"/>
            <assert_command>
                <has_text_matching expression="ena-upload-cli"/>
                <has_text_matching expression="--data 'paired_run_name_1.fastq.gz' 'paired_run_name_2.fastq.gz'"/>
                <has_text_matching expression="--action 'add' --center 'Some research center'"/>
                <has_text_matching expression="--auto_action"/>
            </assert_command>
        </test>
        <!--Test 5: build tables from user input fields NON-VIRAL samples-->
        <test>
            <conditional name="action_options">
                <param name="action" value="add"/>
                <section name="test_submit_parameters">
                    <param name="submit_dev" value="false" />
                    <param name="dry_run" value="true" />
                </section>
                <conditional name="input_format_conditional">
                    <param name="input_format" value="build_tables"/>
                    <param name="add_extension" value="false"/>
                    <conditional name="conditional_viral_metadata">
                        <param name="viral_sample" value="False"/>
                        <repeat name="rep_study">
                            <param name="study_title" value="Test study title"/>
                            <param name="study_abstract" value="Test study abstract"/>
                            <param name="study_type" value="Epigenetics"/>
                            <param name="study_pubmed_id" value="Test study pubmedID"/>
                            <repeat name="rep_sample">
                                <param name="sample_title" value="Test Sample title"/>
                                <param name="sample_description" value="Test Sample description"/>
                                <param name="scientific_name" value="Test Sample scientific name"/>
                                <param name="tax_id" value="2697049"/>
                                <repeat name="rep_experiment">
                                    <param name="experiment_title" value="Test experiment title"/>
                                    <param name="experiment_design" value="Test experiment design description"/>
                                    <param name="library_strategy" value="CTS"/>
                                    <param name="library_source" value="GENOMIC"/>
                                    <param name="library_selection" value="PCR"/>
                                    <param name="library_layout" value="SINGLE"/>
                                    <param name="insert_size" value="150"/>
                                    <param name="library_construction_protocol" value="Test library construction"/>
                                    <param name="platform" value="ILLUMINA"/>
                                    <param name="instrument_model" value="Illumina HiSeq 4000"/>
                                    <repeat name="rep_runs">
                                        <param name="upload_files" value="1.fastqsanger.gz,sample.fq" ftype="fastqsanger.gz"/>
                                    </repeat>
                                </repeat>
                            </repeat>
                        </repeat>
                    </conditional>
                </conditional>
            </conditional>
            <param name="center" value="Some research center"/>
            <output name="experiments_table_out">
                <assert_contents>
                    <has_n_lines n="2"/>
                    <has_n_columns n="17"/>
                </assert_contents>
            </output>
            <output name="studies_table_out">
                <assert_contents>
                    <has_n_lines n="2"/>
                    <has_n_columns n="8"/>
                    <has_line_matching expression="alias\tstatus\ttitle\tstudy_type\tstudy_abstract\tpubmed_id\taccession\tsubmission_date"/>
                </assert_contents>
            </output>
            <output name="samples_table_out">
                <assert_contents>
                    <has_n_lines n="2"/>
                    <has_n_columns n="8"/>
                </assert_contents>
            </output>
            <output name="runs_table_out">
                <assert_contents>
                    <has_n_lines n="3"/>
                    <has_n_columns n="8"/>
                    <has_line_matching expression="alias\tstatus\texperiment_alias\tfile_name\tfile_format\taccession\tsubmission_date\tfile_checksum"/>
                </assert_contents>
            </output>
        </test>
        <!--Test 6: RUN failing build tables from user input fields NON-VIRAL samples-->
        <test expect_failure="true">
            <conditional name="action_options">
                <param name="action" value="add"/>
                <section name="test_submit_parameters">
                    <param name="submit_dev" value="true" />
                    <param name="dry_run" value="false" />
                </section>
                <conditional name="input_format_conditional">
                    <param name="input_format" value="build_tables"/>
                    <param name="add_extension" value="true"/>
                    <conditional name="conditional_viral_metadata">
                        <param name="viral_sample" value="false"/>
                        <repeat name="rep_study">
                            <param name="study_title" value="Test study title"/>
                            <param name="study_abstract" value="Test study abstract"/>
                            <param name="study_type" value="Epigenetics"/>
                            <param name="study_pubmed_id" value="Test study pubmedID"/>
                            <repeat name="rep_sample">
                                <param name="sample_title" value="Test Sample title"/>
                                <param name="sample_description" value="Test Sample description"/>
                                <param name="scientific_name" value="Test Sample scientific name"/>
                                <param name="tax_id" value="2697049"/>
                                <repeat name="rep_experiment">
                                    <param name="experiment_title" value="Test experiment title"/>
                                    <param name="experiment_design" value="Test experiment design description"/>
                                    <param name="library_strategy" value="CTS"/>
                                    <param name="library_source" value="GENOMIC"/>
                                    <param name="library_selection" value="PCR"/>
                                    <param name="library_layout" value="SINGLE"/>
                                    <param name="insert_size" value="150"/>
                                    <param name="library_construction_protocol" value="Test library construction"/>
                                    <param name="platform" value="ILLUMINA"/>
                                    <param name="instrument_model" value="Illumina HiSeq 4000"/>
                                    <repeat name="rep_runs">
                                        <param name="upload_files" value="1.fastqsanger.gz,sample.fq" ftype="fastqsanger"/>
                                    </repeat>
                                </repeat>
                            </repeat>
                        </repeat>
                    </conditional>
                </conditional>
            </conditional>
            <param name="center" value="Some research center"/>
            <assert_stdout>
                <has_text_matching expression="No ENA credentials defined"/>
            </assert_stdout>
        </test>
        <!--Test 7: with submit_test to skip credentials checksRUN failing build tables from user input fields NON-VIRAL samples
            also tests compression of uncompressed inputs and adding the .gz suffix -->
        <test expect_failure="false">
            <conditional name="action_options">
                <param name="action" value="add"/>
                <section name="test_submit_parameters">
                    <param name="submit_dev" value="true" />
                    <param name="dry_run" value="true" />
                </section>
                <param name="test_submit" value="True"/>
                <conditional name="input_format_conditional">
                    <param name="add_extension" value="false"/>
                    <param name="input_format" value="build_tables"/>
                    <conditional name="conditional_viral_metadata">
                        <param name="viral_sample" value="false"/>
                        <repeat name="rep_study">
                            <param name="study_title" value="Test study title"/>
                            <param name="study_abstract" value="Test study abstract"/>
                            <param name="study_type" value="Epigenetics"/>
                            <param name="study_pubmed_id" value="Test study pubmedID"/>
                            <repeat name="rep_sample">
                                <param name="sample_title" value="Test Sample title"/>
                                <param name="sample_description" value="Test Sample description"/>
                                <param name="scientific_name" value="Test Sample scientific name"/>
                                <param name="tax_id" value="2697049"/>
                                <repeat name="rep_experiment">
                                    <param name="experiment_title" value="Test experiment title"/>
                                    <param name="experiment_design" value="Test experiment design description"/>
                                    <param name="library_strategy" value="CTS"/>
                                    <param name="library_source" value="GENOMIC"/>
                                    <param name="library_selection" value="PCR"/>
                                    <param name="library_layout" value="SINGLE"/>
                                    <param name="insert_size" value="150"/>
                                    <param name="library_construction_protocol" value="Test library construction"/>
                                    <param name="platform" value="ILLUMINA"/>
                                    <param name="instrument_model" value="Illumina HiSeq 4000"/>
                                    <repeat name="rep_runs">
                                        <param name="run_base_name" value="run_from_hospital_X"/>
                                        <param name="upload_files" value="1.fastqsanger.gz,2.fastqsanger.gz" ftype="fastqsanger.gz"/>
                                    </repeat>
                                </repeat>
                            </repeat>
                        </repeat>
                    </conditional>
                </conditional>
            </conditional>
            <param name="center" value="Some research center"/>
            <assert_command>
                <has_text_matching expression="ena-upload-cli"/>
                <has_text_matching expression="--data '1.fastqsanger.gz' '2.fastqsanger.gz'"/>
                <has_text_matching expression="--action 'add' --center 'Some research center'"/>
                <not_has_text text="modify" />
            </assert_command>
        </test>
        <!--Test 8: viral submission - User input metadata - Add extension = False-->
        <test expect_failure="false">
            <conditional name="action_options">
                <param name="action" value="add"/>
                <section name="test_submit_parameters">
                    <param name="submit_dev" value="false" />
                    <param name="dry_run" value="true" />
                </section>
                <param name="test_submit" value="True"/>
                <conditional name="input_format_conditional">
                    <param name="add_extension" value="false"/>
                    <param name="input_format" value="build_tables"/>
                    <conditional name="conditional_viral_metadata">
                        <param name="viral_sample" value="true"/>
                        <repeat name="rep_study">
                            <param name="study_title" value="Test study title"/>
                            <param name="study_abstract" value="Test study abstract"/>
                            <param name="study_type" value="Epigenetics"/>
                            <param name="study_pubmed_id" value="Test study pubmedID"/>
                            <repeat name="rep_sample">
                                <param name="sample_title" value="Test Sample title"/>
                                <param name="sample_description" value="Test Sample description"/>
                                <param name="scientific_name" value="Test Sample scientific name"/>
                                <param name="tax_id" value="2697049"/>
                                <param name="collection_date" value="2020"/>
                                <param name="geo_location_country" value="Belgium"/>
                                <param name="host_common_name" value="Human"/>
                                <param name="host_subject_id" value="Patient_001"/>
                                <param name="host_health_state" value="healthy"/>
                                <param name="host_sex" value="female"/>
                                <param name="host_scientific_name" value="homo sapiens"/>
                                <param name="collector_name" value="John The Collector"/>
                                <param name="collecting_institution" value="Hospital 01"/>
                                <param name="isolate" value="sample_001"/>
                                <repeat name="rep_experiment">
                                    <param name="experiment_title" value="Test experiment title"/>
                                    <param name="experiment_design" value="Test experiment design description"/>
                                    <param name="library_strategy" value="CTS"/>
                                    <param name="library_source" value="GENOMIC"/>
                                    <param name="library_selection" value="PCR"/>
                                    <param name="library_layout" value="SINGLE"/>
                                    <param name="insert_size" value="150"/>
                                    <param name="library_construction_protocol" value="Test library construction"/>
                                    <param name="platform" value="ILLUMINA"/>
                                    <param name="instrument_model" value="Illumina HiSeq 4000"/>
                                    <repeat name="rep_runs">
                                        <param name="run_base_name" value="run_from_hospital_X"/>
                                        <param name="upload_files" value="1.fastqsanger.gz,2.fastqsanger.gz" ftype="fastqsanger.gz"/>
                                    </repeat>
                                </repeat>
                            </repeat>
                        </repeat>
                    </conditional>
                </conditional>
            </conditional>
            <param name="center" value="Some research center"/>
            <assert_command>
                <has_text_matching expression="ena-upload-cli"/>
                <has_text_matching expression="--data '1.fastqsanger.gz' '2.fastqsanger.gz'"/>
                <has_text_matching expression="--action 'add' --center 'Some research center'"/>
                <has_text_matching expression="--checklist ERC000033"/>
            </assert_command>
        </test>
        <!--Test 9: modify option and auto compression - viral submission - User input metadata-->
        <test expect_failure="false">
            <conditional name="action_options">
                <param name="action" value="modify"/>
                <section name="test_submit_parameters">
                    <param name="submit_dev" value="false" />
                    <param name="dry_run" value="true" />
                </section>
                <param name="test_submit" value="True"/>
                <conditional name="input_format_conditional">
                    <param name="add_extension" value="fasle"/>
                    <param name="input_format" value="build_tables"/>
                    <conditional name="conditional_viral_metadata">
                        <param name="viral_sample" value="True"/>
                        <repeat name="rep_study">
                            <param name="study_title" value="Test study title"/>
                            <param name="study_abstract" value="Test study abstract"/>
                            <param name="study_type" value="Epigenetics"/>
                            <param name="study_pubmed_id" value="Test study pubmedID"/>
                            <repeat name="rep_sample">
                                <param name="sample_title" value="Test Sample title"/>
                                <param name="sample_description" value="Test Sample description"/>
                                <param name="scientific_name" value="Test Sample scientific name"/>
                                <param name="tax_id" value="2697049"/>
                                <param name="collection_date" value="2020"/>
                                <param name="geo_location_country" value="Belgium"/>
                                <param name="host_common_name" value="Human"/>
                                <param name="host_subject_id" value="Patient_001"/>
                                <param name="host_health_state" value="healthy"/>
                                <param name="host_sex" value="female"/>
                                <param name="host_scientific_name" value="homo sapiens"/>
                                <param name="collector_name" value="John The Collector"/>
                                <param name="collecting_institution" value="Hospital 01"/>
                                <param name="isolate" value="sample_001"/>
                                <repeat name="rep_experiment">
                                    <param name="experiment_title" value="Test experiment title"/>
                                    <param name="experiment_design" value="Test experiment design description"/>
                                    <param name="library_strategy" value="CTS"/>
                                    <param name="library_source" value="GENOMIC"/>
                                    <param name="library_selection" value="PCR"/>
                                    <param name="library_layout" value="SINGLE"/>
                                    <param name="insert_size" value="150"/>
                                    <param name="library_construction_protocol" value="Test library construction"/>
                                    <param name="platform" value="ILLUMINA"/>
                                    <param name="instrument_model" value="Illumina HiSeq 4000"/>
                                    <repeat name="rep_runs">
                                        <param name="run_base_name" value="run_from_hospital_X"/>
                                        <param name="upload_files" value="1.fastqsanger.gz" ftype="fastqsanger.gz"/>
                                    </repeat>
                                </repeat>
                            </repeat>
                        </repeat>
                    </conditional>
                </conditional>
            </conditional>
            <param name="center" value="Some research center"/>
            <assert_command>
                <has_text_matching expression="ena-upload-cli"/>
                <has_text_matching expression="--data '1.fastqsanger.gz'"/>
                <has_text_matching expression="--action 'modify' --center 'Some research center'"/>
                <has_text_matching expression="--checklist ERC000033"/>
                <not_has_text text="add" />
            </assert_command>
        </test>
    </tests>
    <help><![CDATA[
        This is a wrapper for the ENA upload tool in https://github.com/usegalaxy-eu/ena-upload-cli
        The input metadata can be submitted following the tabular format of the templates in https://github.com/usegalaxy-eu/ena-upload-cli/tree/master/example_tables
        It is also possible to submit an excel file by following the template in https://github.com/ELIXIR-Belgium/ENA-metadata-templates
    ]]></help>
    <citations>
    </citations>
</tool>