Mercurial > repos > ieguinoa > ena_upload

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/.shed.yml	Mon Feb 21 14:22:53 2022 +0000
@@ -0,0 +1,12 @@
+categories:
+    - Data Export
+description: |
+    Submits experimental data and respective metadata to the European Nucleotide Archive (ENA).
+long_description: |
+    The program submits experimental data and respective metadata to the European Nucleotide Archive (ENA).
+    The metadata should be provided in separate tables corresponding to ENA objects STUDY, SAMPLE, EXPERIMENT and RUN
+name: ena_upload
+owner: iuc
+remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload
+homepage_url: https://github.com/usegalaxy-eu/ena-upload-cli
+type: unrestricted
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/dump_yaml.py	Mon Feb 21 14:22:53 2022 +0000
@@ -0,0 +1,37 @@
+import sys
+import yaml
+
+def fetch_table_data(table_path):
+    data_dict = {}
+    with open(table_path) as table_to_load:
+        # load headers
+        headers = table_to_load.readline().strip('\n').split('\t')
+        row_id = 0
+        for line in table_to_load.readlines():
+            # print(line)
+            line_data = line.strip('\n').split('\t')
+            row_dict = {}
+            for col_num in range(len(headers)):
+                col_name = headers[col_num]
+                row_dict[col_name] = line_data[col_num]
+            data_dict[row_id] = row_dict
+            row_id += 1
+        return data_dict
+
+all_data_dict = {}
+print('YAML -------------')
+studies_table_path = sys.argv[1]
+table_data = fetch_table_data(studies_table_path)
+all_data_dict['ENA_study'] = table_data
+samples_table_path = sys.argv[2]
+table_data = fetch_table_data(samples_table_path)
+all_data_dict['ENA_sample'] = table_data
+experiments_table_path = sys.argv[3]
+table_data = fetch_table_data(experiments_table_path)
+all_data_dict['ENA_experiment'] = table_data
+runs_table_path = sys.argv[4]
+table_data = fetch_table_data(runs_table_path)
+all_data_dict['ENA_run'] = table_data
+# print(all_data_dict)
+print(yaml.dump(all_data_dict))
+print('YAML -------------')
--- a/ena_upload.xml	Wed Feb 02 17:30:12 2022 +0000
+++ b/ena_upload.xml	Mon Feb 21 14:22:53 2022 +0000
@@ -26,6 +26,10 @@
 #if $action_options.input_format_conditional.input_format == "build_tables":
   python '$__tool_directory__/extract_tables.py' --action $action_options.action --out_dir ./submission_files --studies $studies_json;
 #end if
+
+credentials_path='test_fake_path';
+echo "username: test_user" > \$credentials_path;
+echo "password: test_password" >> \$credentials_path;


 #if $action_options.input_format_conditional.input_format == "build_tables":
@@ -65,13 +69,6 @@
     fi;
 #end if

-#if $action_options.test_submit == "True" or $action_options.test_submit_parameters.dry_run == "true":
-    credentials_path='test_fake_path';
-    #if $action_options.test_submit_parameters.dry_run == "true" and $action_options.input_format_conditional.input_format == "excel_tables":
-        echo "username: test_user" > \$credentials_path;
-        echo "password: test_password" >> \$credentials_path;
-    #end if
-#end if

 ## create the list of files to upload and make the symlinks
 #import re
@@ -173,8 +170,6 @@
     #end if
 #end if

-
-#if $action_options.test_submit_parameters.dry_run == "false" or $action_options.input_format_conditional.input_format == "excel_tables":
 #if $action_options.action == "add":
 ena-upload-cli
     --tool 'ena-upload-cli v@VERSION@ @ Galaxy'
@@ -215,6 +210,8 @@
     >> '$output';
 #end if

+
+
 #if $action_options.action == "modify":
     ena-upload-cli
     --tool 'ena-upload-cli v@VERSION@ @ Galaxy'
@@ -251,27 +248,28 @@
 #end if
     >> '$output';
 #end if
-    #if $action_options.test_submit_parameters.dry_run == "false":
-        echo -e 'center_name\t$action_options.center' >> '$output';
-        echo -e 'action_option\t$action_options.action' >> '$output';
-        #if $action_options.input_format_conditional.input_format == "build_tables":
-            cp $studies_table_path_updated $studies_table_out 2>/dev/null;
-            cp $samples_table_path_updated $samples_table_out 2>/dev/null;
-            cp $experiments_table_path_updated $experiments_table_out 2>/dev/null;
-            cp $runs_table_path_updated $runs_table_out 2>/dev/null;
-        #end if
-    #else
-        ## for the excel input case, copy the upload-cli generated tables to the output files
-        ## this applies for both draft and real submissions
-        cp './ENA_template_experiment_updated.tsv' $experiments_table_out;
-        cp './ENA_template_sample_updated.tsv' $samples_table_out;
-        cp './ENA_template_study_updated.tsv' $studies_table_out;
-        cp './ENA_template_run_updated.tsv' $runs_table_out;
-    #end if
-#else:
-    exit 0;
+
+#if $action_options.test_submit_parameters.dry_run == "false":
+    echo -e 'center_name\t$action_options.center' >> '$output';
+    echo -e 'action_option\t$action_options.action' >> '$output';
+#end if
+
+## copy updated files
+#if $action_options.input_format_conditional.input_format == "excel_tables":
+    ## for the excel input case, copy the upload-cli generated tables to the output files
+    ## this applies for both draft and real submissions
+    cp './ENA_template_experiment_updated.tsv' $experiments_table_out;
+    cp './ENA_template_sample_updated.tsv' $samples_table_out;
+    cp './ENA_template_study_updated.tsv' $studies_table_out;
+    cp './ENA_template_run_updated.tsv' $runs_table_out;
+#else
+    cp $studies_table_path_updated $studies_table_out 2>/dev/null;
+    cp $samples_table_path_updated $samples_table_out 2>/dev/null;
+    cp $experiments_table_path_updated $experiments_table_out 2>/dev/null;
+    cp $runs_table_path_updated $runs_table_out 2>/dev/null;
 #end if

+python '$__tool_directory__/dump_yaml.py' $studies_table_out $samples_table_out $experiments_table_out $runs_table_out >> $output;
 ]]></command>
     <configfiles>
         <configfile name="credentials"><![CDATA[
@@ -453,6 +451,15 @@
                     <has_line_matching expression="r_20201007_026\te_20201007_026\tC026_exp5_clean.fastq.gz\tFASTQ(.*)"/>
                 </assert_contents>
             </output>
+            <output name="output">
+                <assert_contents>
+                    <has_line_matching expression="YAML -------------"/>
+                    <has_line_matching expression="ENA_experiment:"/>
+                    <has_line_matching expression="ENA_sample:"/>
+                    <has_line_matching expression="ENA_study:"/>
+                    <has_line_matching expression="ENA_run:"/>
+                </assert_contents>
+            </output>
         </test>
         <!--Test 3: excel input of NON-VIRAL samples-->
         <test>
@@ -549,7 +556,7 @@
                 </section>
                 <conditional name="input_format_conditional">
                     <param name="input_format" value="build_tables"/>
-                    <param name="add_extension" value="true"/>
+                    <param name="add_extension" value="false"/>
                     <conditional name="conditional_viral_metadata">
                         <param name="viral_sample" value="False"/>
                         <repeat name="rep_study">
@@ -561,7 +568,7 @@
                                 <param name="sample_title" value="Test Sample title"/>
                                 <param name="sample_description" value="Test Sample description"/>
                                 <param name="scientific_name" value="Test Sample scientific name"/>
-                                <param name="tax_id" value="Test Sample tax_id"/>
+                                <param name="tax_id" value="2697049"/>
                                 <repeat name="rep_experiment">
                                     <param name="experiment_title" value="Test experiment title"/>
                                     <param name="experiment_design" value="Test experiment design description"/>
@@ -574,7 +581,7 @@
                                     <param name="platform" value="ILLUMINA"/>
                                     <param name="instrument_model" value="Illumina HiSeq 4000"/>
                                     <repeat name="rep_runs">
-                                        <param name="upload_files" value="1.fastqsanger.gz,sample.fq" ftype="fastqsanger"/>
+                                        <param name="upload_files" value="1.fastqsanger.gz,sample.fq" ftype="fastqsanger.gz"/>
                                     </repeat>
                                 </repeat>
                             </repeat>
@@ -632,7 +639,7 @@
                                 <param name="sample_title" value="Test Sample title"/>
                                 <param name="sample_description" value="Test Sample description"/>
                                 <param name="scientific_name" value="Test Sample scientific name"/>
-                                <param name="tax_id" value="Test Sample tax_id"/>
+                                <param name="tax_id" value="2697049"/>
                                 <repeat name="rep_experiment">
                                     <param name="experiment_title" value="Test experiment title"/>
                                     <param name="experiment_design" value="Test experiment design description"/>
@@ -660,16 +667,16 @@
         </test>
         <!--Test 7: with submit_test to skip credentials checksRUN failing build tables from user input fields NON-VIRAL samples
             also tests compression of uncompressed inputs and adding the .gz suffix -->
-        <test expect_failure="true">
+        <test expect_failure="false">
             <conditional name="action_options">
                 <param name="action" value="add"/>
                 <section name="test_submit_parameters">
                     <param name="submit_dev" value="true" />
-                    <param name="dry_run" value="false" />
+                    <param name="dry_run" value="true" />
                 </section>
                 <param name="test_submit" value="True"/>
                 <conditional name="input_format_conditional">
-                    <param name="add_extension" value="true"/>
+                    <param name="add_extension" value="false"/>
                     <param name="input_format" value="build_tables"/>
                     <conditional name="conditional_viral_metadata">
                         <param name="viral_sample" value="false"/>
@@ -682,7 +689,7 @@
                                 <param name="sample_title" value="Test Sample title"/>
                                 <param name="sample_description" value="Test Sample description"/>
                                 <param name="scientific_name" value="Test Sample scientific name"/>
-                                <param name="tax_id" value="Test Sample tax_id"/>
+                                <param name="tax_id" value="2697049"/>
                                 <repeat name="rep_experiment">
                                     <param name="experiment_title" value="Test experiment title"/>
                                     <param name="experiment_design" value="Test experiment design description"/>
@@ -696,7 +703,7 @@
                                     <param name="instrument_model" value="Illumina HiSeq 4000"/>
                                     <repeat name="rep_runs">
                                         <param name="run_base_name" value="run_from_hospital_X"/>
-                                        <param name="upload_files" value="sample.fq,sample.fq" ftype="fastqsanger"/>
+                                        <param name="upload_files" value="1.fastqsanger.gz,2.fastqsanger.gz" ftype="fastqsanger.gz"/>
                                     </repeat>
                                 </repeat>
                             </repeat>
@@ -707,25 +714,22 @@
             <param name="center" value="Some research center"/>
             <assert_command>
                 <has_text_matching expression="ena-upload-cli"/>
-                <has_text_matching expression="--data 'sample.fq.fastq.gz' 'sample.fq.fastq.gz'"/>
+                <has_text_matching expression="--data '1.fastqsanger.gz' '2.fastqsanger.gz'"/>
                 <has_text_matching expression="--action 'add' --center 'Some research center'"/>
                 <not_has_text text="modify" />
             </assert_command>
-            <assert_stderr>
-                <has_text_matching expression="Oops, the file test_fake_path does not exist"/>
-            </assert_stderr>
         </test>
         <!--Test 8: viral submission - User input metadata - Add extension = False-->
-        <test expect_failure="true">
+        <test expect_failure="false">
             <conditional name="action_options">
                 <param name="action" value="add"/>
                 <section name="test_submit_parameters">
                     <param name="submit_dev" value="false" />
-                    <param name="dry_run" value="false" />
+                    <param name="dry_run" value="true" />
                 </section>
                 <param name="test_submit" value="True"/>
                 <conditional name="input_format_conditional">
-                    <param name="add_extension" value="False"/>
+                    <param name="add_extension" value="false"/>
                     <param name="input_format" value="build_tables"/>
                     <conditional name="conditional_viral_metadata">
                         <param name="viral_sample" value="true"/>
@@ -738,7 +742,7 @@
                                 <param name="sample_title" value="Test Sample title"/>
                                 <param name="sample_description" value="Test Sample description"/>
                                 <param name="scientific_name" value="Test Sample scientific name"/>
-                                <param name="tax_id" value="Test Sample tax_id"/>
+                                <param name="tax_id" value="2697049"/>
                                 <param name="collection_date" value="2020"/>
                                 <param name="geo_location_country" value="Belgium"/>
                                 <param name="host_common_name" value="Human"/>
@@ -777,21 +781,18 @@
                 <has_text_matching expression="--action 'add' --center 'Some research center'"/>
                 <has_text_matching expression="--checklist ERC000033"/>
             </assert_command>
-            <assert_stderr>
-                <has_text_matching expression="Oops, the file test_fake_path does not exist"/>
-            </assert_stderr>
         </test>
         <!--Test 9: modify option and auto compression - viral submission - User input metadata-->
-        <test expect_failure="true">
+        <test expect_failure="false">
             <conditional name="action_options">
                 <param name="action" value="modify"/>
                 <section name="test_submit_parameters">
                     <param name="submit_dev" value="false" />
-                    <param name="dry_run" value="false" />
+                    <param name="dry_run" value="true" />
                 </section>
                 <param name="test_submit" value="True"/>
                 <conditional name="input_format_conditional">
-                    <param name="add_extension" value="False"/>
+                    <param name="add_extension" value="fasle"/>
                     <param name="input_format" value="build_tables"/>
                     <conditional name="conditional_viral_metadata">
                         <param name="viral_sample" value="True"/>
@@ -804,7 +805,7 @@
                                 <param name="sample_title" value="Test Sample title"/>
                                 <param name="sample_description" value="Test Sample description"/>
                                 <param name="scientific_name" value="Test Sample scientific name"/>
-                                <param name="tax_id" value="Test Sample tax_id"/>
+                                <param name="tax_id" value="2697049"/>
                                 <param name="collection_date" value="2020"/>
                                 <param name="geo_location_country" value="Belgium"/>
                                 <param name="host_common_name" value="Human"/>
@@ -828,7 +829,7 @@
                                     <param name="instrument_model" value="Illumina HiSeq 4000"/>
                                     <repeat name="rep_runs">
                                         <param name="run_base_name" value="run_from_hospital_X"/>
-                                        <param name="upload_files" value="sample.fq" ftype="fastqsanger"/>
+                                        <param name="upload_files" value="1.fastqsanger.gz" ftype="fastqsanger.gz"/>
                                     </repeat>
                                 </repeat>
                             </repeat>
@@ -839,21 +840,17 @@
             <param name="center" value="Some research center"/>
             <assert_command>
                 <has_text_matching expression="ena-upload-cli"/>
-                <has_text_matching expression="--data 'sample.fq.gz'"/>
+                <has_text_matching expression="--data '1.fastqsanger.gz'"/>
                 <has_text_matching expression="--action 'modify' --center 'Some research center'"/>
                 <has_text_matching expression="--checklist ERC000033"/>
                 <not_has_text text="add" />
             </assert_command>
-            <assert_stderr>
-                <has_text_matching expression="Oops, the file test_fake_path does not exist"/>
-            </assert_stderr>
         </test>
     </tests>
     <help><![CDATA[
         This is a wrapper for the ENA upload tool in https://github.com/usegalaxy-eu/ena-upload-cli
         The input metadata can be submitted following the tabular format of the templates in https://github.com/usegalaxy-eu/ena-upload-cli/tree/master/example_tables
-        It is also possible to submit an excel file by following the template in https://drive.google.com/file/d/1ncC22--tW2v-EI-te_r86sAZujIPAjlX/view?usp=sharing
-        For viral submissions a larger set of metadata is required, you can find the template in https://drive.google.com/file/d/1Gx78GKh58PmRjdmJ05DBbpObAL-3oUFX/view?usp=sharing
+        It is also possible to submit an excel file by following the template in https://github.com/ELIXIR-Belgium/ENA-metadata-templates
     ]]></help>
     <citations>
     </citations>
--- a/extract_tables.py	Wed Feb 02 17:30:12 2022 +0000
+++ b/extract_tables.py	Mon Feb 21 14:22:53 2022 +0000
@@ -44,10 +44,10 @@
                                    'ENA_submission_data']))
     if "geo_location" in study['samples'][0].keys():           # sample belongs to a viral sample
         samples_table.write('\t'.join(['alias', 'status', 'accession', 'title', 'scientific_name',
-                                       'taxon_id', 'sample_description', 'collection_date',
-                                       'geographic_location', 'host_common_name', 'host_subject_id',
-                                       'host_health_state', 'host_sex', 'host_scientific_name',
-                                       'collector_name', 'collecting_institution', 'isolate',
+                                       'taxon_id', 'sample_description', 'collection date',
+                                       'geographic location (country and/or sea)', 'host common name', 'host subject id',
+                                       'host health state', 'host sex', 'host scientific name',
+                                       'collector name', 'collecting institution', 'isolate',
                                        'submission_date']) + '\n')
     else:
         samples_table.write('\t'.join(['alias', 'status', 'accession', 'title', 'scientific_name',
--- a/samples_macros.xml	Wed Feb 02 17:30:12 2022 +0000
+++ b/samples_macros.xml	Mon Feb 21 14:22:53 2022 +0000
@@ -67,14 +67,14 @@
     <xml name="table_inputs_macro">
         <conditional name="input_format_conditional">
             <param name="input_format" type="select" label="Would you like to submit pregenerated table files or interactively define the input structures?">
-                <option value="excel_tables" selected="True">User generated metadata tables based on Excel templates</option>
+                <option value="excel_tables" selected="True">User generated metadata tables based on Excel template</option>
                 <option value="build_tables" selected="False">Interactive generation of the study structure (recommended for small studies)</option>
                 <option value="user_generated_tables" selected="False">User generated tabular files (studies - samples - experiments - runs) </option>
             </param>
             <when value="excel_tables">
                 <!--<param name="viral_submission" type="boolean" label="Does your submission data belong to a viral sample?" help="If you select yes then your data will be submitted using the ENA virus pathogen reporting standard checklist (see: https://ena-browser-docs.readthedocs.io/en/latest/help_and_guides/sars-cov-2-submissions.html)" />-->
                 <expand macro="checklist_input_macro"/>
-                <param name="xlsx_file" type="data" format="xlsx" label="Select Excel (xlsx) file based on templates" />
+                <param name="xlsx_file" type="data" format="xlsx" label="Select Excel (xlsx) file based on template" />
                 <expand macro="run_inputs_macro" />
             </when>
             <when value="user_generated_tables">
@@ -207,7 +207,7 @@
             <param name="sample_title" type="text" label="Sample title"/>
             <param name="sample_description" type="text" help="e.g: liver cells" label="Describe the type of sample"/>
             <param name="scientific_name" type="text" label="Enter the species of the sample" help="e.g Severe acute respiratory syndrome coronavirus 2"/>
-            <param name="tax_id" type="text" label="Enter the taxonomic ID corresponding to the sample species" />
+            <param name="tax_id" type="integer" value="0" label="Enter the taxonomic ID corresponding to the sample species" />
             <repeat name="rep_experiment" title="Sequencing experiments performed with this sample" min="1" >
                 <param name="experiment_title" type="text" label="Specify an experiment title" />
                 <param name="experiment_design" type="text" label="Describe the experiment design" />