changeset 3:8bbe09a52904 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/pyega3 commit c472a64dc5e68fb058b71e7404f180847ba9f6d4
author iuc
date Thu, 27 Oct 2022 15:12:30 +0000
parents e82175804eb1
children 9564758e8638
files pyega3.xml test-data/filelist3.tabular test-data/filelist_EGAD00001003338.tabular
diffstat 3 files changed, 75 insertions(+), 12 deletions(-) [+]
line wrap: on
line diff
--- a/pyega3.xml	Tue Jun 14 17:06:07 2022 +0000
+++ b/pyega3.xml	Thu Oct 27 15:12:30 2022 +0000
@@ -1,11 +1,15 @@
-<tool id="pyega3" name="EGA Download Client" version="@TOOL_VERSION@+galaxy1" profile="21.01" >
+<tool id="pyega3" name="EGA Download Client" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.01" >
     <macros>
-        <token name="@TOOL_VERSION@">4.0.0</token>
+        <token name="@TOOL_VERSION@">4.0.5</token>
+        <token name="@VERSION_SUFFIX@">1</token>
     </macros>
     <requirements>
         <requirement type="package" version="@TOOL_VERSION@">pyega3</requirement>
     </requirements>
-        <command detect_errors="exit_code"><![CDATA[
+    <version_command><![CDATA[
+        pyega3 -v |& grep version | cut -d" " -f 10
+    ]]></version_command>
+    <command detect_errors="exit_code"><![CDATA[
 #set $username = $__user__.extra_preferences.get('ega_account|username', "")
 #if $username == "":
     #set $username = "ega-test-data@ebi.ac.uk (default user)"
@@ -43,9 +47,17 @@
     && rm -f downloads/*.md5  ## checksum validation already performed by pyEGA, cleanup downloads folder
 
 #elif $action.action_type == "download_files"
-    #set file_ids=[x.split('\t')[int(str($action.id_column))-1] for x in open(str($id_table)).readlines() if x.split('\t')[int(str($action.id_column))-1].startswith('EGAF') ]
+    #import re
+    #set file_ids=[x.split('\t')[int(str($action.id_column))-1] for x in open(str($id_table)).readlines()]
+    #set outfiles=[x.split('\t')[int(str($action.file_column))-1].replace('\n', '') for x in open(str($id_table)).readlines()]
     mkdir downloads
-    #for f in $file_ids
+    #for f, o in zip($file_ids, $outfiles)
+      #if not f.startswith("EGAF")
+        && >&2 echo "Ignoring \"$f\": no EGA file ID"
+        #continue
+      #end if
+      && 
+      echo 'Downloading $f: $o'
       &&
       pyega3 -c \${PYEGA_CONNECTIONS:-30} -cf '$credentials'
         fetch '$f'
@@ -60,6 +72,16 @@
           #end if
         #end if
         --output-dir downloads
+      #if re.match(".*vcf(_genomic_range_.*|).gz$", o)
+        #if $action.range.reference_name or ($action.range.start or $action.range.end)
+          && mv 'downloads/$f/'$o[:-3]'_genomic_range_'\$([ -z "$action.range.reference_name" ] && printf "None" || printf "$action.range.reference_name")'_'\$([ -z "$action.range.start" ] && printf "None" || printf "$action.range.reference_name")'_'\$([ -z "$action.range.reference_name" ] && printf "None" || printf "$action.range.end")'.gz' 'downloads/$f/'$o'.vcf_bgzip'
+        #else
+          && mv 'downloads/$f/$o' 'downloads/$f/'$o'.vcf_bgzip'
+        #end if
+      #end if
+      #if re.match(".*ped$", o)
+        && mv 'downloads/$f/$o' 'downloads/$f/'$o'.tabular'
+      #end if
     #end for
     && rm -f downloads/**/*.md5  ## checksum validation already performed by pyEGA, clean up downloads folder
 
@@ -95,7 +117,7 @@
             <when value="list_datasets"/>
             <when value="download_file">
                 <param name="file_id" type="text" optional="false" label="EGA File Accession Identifier" help="Identifier starting with 'EGAF'. For example: EGAF00001753735">
-                     <validator type="regex" message="EGA Accession ID must be a string of numbers prefixed by 'EGAD' (datasets) or 'EGAF' (files)">EGAF[0-9]+</validator>
+                     <validator type="regex" message="EGA Accession ID must be a string of numbers prefixed by 'EGAD' (datasets) or 'EGAF' (files)">EGA[DF][0-9]+</validator>
                 </param>
                 <section name="range" title="Request a specific Genomic range?" expanded="false">
                     <param argument="--reference-name" type="text" optional="true" label="Reference Sequence Name" help="For example 'chr1', '1', or 'chrX'. If unspecified, all data is returned." />
@@ -106,6 +128,7 @@
             <when value="download_files">
                 <param name="id_table" type="data" format="tabular" label="Table with IDs to download" help="A tabular file where one column contains the set of file IDs. This will output a collection. Please select files that are all the same format (e.g. all BAM or all VCF)."/>
                 <param name="id_column" type="data_column" data_ref="id_table" label="Column containing the file IDs" help="File Identifiers starting with 'EGAF'. For example: EGAF00001753735" />
+                <param name="file_column" type="data_column" data_ref="id_table" label="Column containing the file names" />
                 <section name="range" title="Request a specific Genomic range? (will be applied to ALL requested files)" expanded="false">
                     <param argument="--reference-name" type="text" optional="true" label="Reference Sequence Name" help="For example 'chr1', '1', or 'chrX'. If unspecified, all data is returned." />
                     <param argument="--start" type="integer" optional="true" min="0" label="Start Position" help="0-based, inclusive. Only used if a reference sequence name was specified"/>
@@ -134,7 +157,8 @@
         </collection>
     </outputs>
     <tests>
-        <test expect_num_outputs="1"><!-- list datasets with default credentials -->
+        <!-- list datasets with default credentials -->
+        <test expect_num_outputs="1">
             <param name="action_type" value="list_datasets"/>
             <output name="authorized_datasets" ftype="txt">
                 <assert_contents>
@@ -143,7 +167,8 @@
                 </assert_contents>
             </output>
         </test>
-        <test expect_num_outputs="2"><!-- list dataset files with default credentials, and request a log output file -->
+        <!-- list dataset files with default credentials, and request a log output file -->
+        <test expect_num_outputs="2">
             <param name="action_type" value="list_dataset_files"/>
             <param name="dataset_id" value="EGAD00001003338"/>
             <param name="output_log" value="true"/>
@@ -156,12 +181,14 @@
                 </assert_contents>
             </output>
         </test>
-        <test expect_num_outputs="1"> <!-- download a single file -->
+        <!-- download a single file -->
+        <test expect_num_outputs="1"> 
             <param name="action_type" value="download_file"/>
             <param name="file_id" value="EGAF00001775036"/>
             <output name="downloaded_file" md5="3b89b96387db5199fef6ba613f70e27c"/>
         </test>
-        <test expect_num_outputs="1"> <!-- download a single file, with genomic range specified -->
+        <!-- download a single file, with genomic range specified -->
+        <test expect_num_outputs="1">
             <param name="action_type" value="download_file"/>
             <param name="file_id" value="EGAF00001753756"/>
             <param name="reference_name" value="1"/>
@@ -169,19 +196,26 @@
             <param name="end" value="10000"/>
             <output name="downloaded_file" ftype="bam" md5="e576a38748feec45aa45191f6e902ce2"/>
         </test>
-        <test expect_num_outputs="1"> <!-- download multiple files -->
+        <!-- download multiple files -->
+        <test expect_num_outputs="1">
             <param name="action_type" value="download_files"/>
             <param name="id_table" value="filelist.tabular"/>
             <param name="id_column" value="1"/>
+            <param name="file_column" value="5"/>
             <output_collection name="downloaded_file_collection" type="list" count="2">
                 <element name="ENCFF000VWO.bam" md5="b8ae14d5d1f717ab17d45e8fc36946a0" />
                 <element name="ENCFF284YOU.bam" md5="3b89b96387db5199fef6ba613f70e27c" />
             </output_collection>
+            <assert_stderr>
+                <has_text text="Ignoring &quot;File ID&quot;: no EGA file ID"/>
+            </assert_stderr>
         </test>
-        <test expect_num_outputs="1"> <!-- download multiple files, in combination with a genomic range -->
+        <!-- download multiple files, in combination with a genomic range -->
+        <test expect_num_outputs="1">
             <param name="action_type" value="download_files"/>
             <param name="id_table" value="filelist2.tabular"/>
             <param name="id_column" value="1"/>
+            <param name="file_column" value="5"/>
             <param name="reference_name" value="1"/>
             <param name="start" value="0"/>
             <param name="end" value="10000"/>
@@ -189,6 +223,20 @@
                 <element name="NA19239_genomic_range_1_0_10000" md5="bcdcf18846233cbe5cc8afd95168552c" />
                 <element name="NA19240_genomic_range_1_0_10000" md5="e576a38748feec45aa45191f6e902ce2" />
             </output_collection>
+            <assert_stderr>
+                <has_text text="Ignoring &quot;File ID&quot;: no EGA file ID"/>
+            </assert_stderr>
+        </test>
+        <!-- download multiple vcf.gz files -->
+        <test expect_num_outputs="1">
+            <param name="action_type" value="download_files"/>
+            <param name="id_table" value="filelist3.tabular"/>
+            <param name="id_column" value="1"/>
+            <param name="file_column" value="5"/>
+            <output_collection name="downloaded_file_collection" type="list" count="2">
+                <element name="HG00408.novoBreak__256r__4.100100-10100100__7.200100-9000100.vcf.gz" md5="51cfb69bf3b9416ff425381a58c18a2b" />
+                <element name="HG01890.HGSVC__145r__1.900100-10001000__18.2001000-90001000.vcf.gz" md5="ebad4425191a89d3e970c02190a87175" />
+            </output_collection>
         </test>
     </tests>
     <help><![CDATA[
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/filelist3.tabular	Thu Oct 27 15:12:30 2022 +0000
@@ -0,0 +1,3 @@
+File ID	Status	Bytes	Check sum	File name
+EGAF00007243779	1	15340	ebad4425191a89d3e970c02190a87175	HG01890.HGSVC__145r__1.900100-10001000__18.2001000-90001000.vcf.gz
+EGAF00007243775	1	23033	51cfb69bf3b9416ff425381a58c18a2b	HG00408.novoBreak__256r__4.100100-10100100__7.200100-9000100.vcf.gz
\ No newline at end of file
--- a/test-data/filelist_EGAD00001003338.tabular	Tue Jun 14 17:06:07 2022 +0000
+++ b/test-data/filelist_EGAD00001003338.tabular	Thu Oct 27 15:12:30 2022 +0000
@@ -19,6 +19,18 @@
 EGAF00005007330	1	4722	110b493c17210ff3484ed2561a2fe21f	HG01775.chrY.bcf.csi
 EGAF00005007331	1	137465	fcf1cc38cd404ea1cdba3975d26f4a8b	HG01775.GRCh38DH.exome.cram.crai
 EGAF00005007332	1	229305	56e8de04466aba23ab5acbaf1c087045	NA18534.GRCh38DH.exome.cram.crai
+EGAF00007243773	1	194837	ed365c71461eac21a64d2c29e7216e50	HG00096.GRCh38DH__1097r__10.10000-10100__21.5000000-5050000.bam
+EGAF00007243774	1	135144	687996620a153a8dc451ab71701bb290	HG00096.GRCh38DH__1097r__10.10000-10100__21.5000000-5050000.cram
+EGAF00007243775	1	23033	51cfb69bf3b9416ff425381a58c18a2b	HG00408.novoBreak__256r__4.100100-10100100__7.200100-9000100.vcf.gz
+EGAF00007243776	1	1985	245fd6f73095ce4c80202d5815c84528	HG00408.novoBreak__256r__4.100100-10100100__7.200100-9000100.vcf.gz.tbi
+EGAF00007243777	1	122474	eee2883c95d3cd707961c80b6e44ee68	HG01500.GRCh38DH__90r__3.10000-10500__4.10000-10500.bam
+EGAF00007243778	1	112250	98798731624e6e402c800c276556bb95	HG01500.GRCh38DH__90r__3.10000-10500__4.10000-10500.cram
+EGAF00007243779	1	15340	ebad4425191a89d3e970c02190a87175	HG01890.HGSVC__145r__1.900100-10001000__18.2001000-90001000.vcf.gz
+EGAF00007243780	1	1978	73ab82ce05b4f6259256ae0d4eb33e3b	HG01890.HGSVC__145r__1.900100-10001000__18.2001000-90001000.vcf.gz.tbi
+EGAF00007243781	1	27104	10bc4d1dc9ca944339da830e4c868857	HG01500.GRCh38DH__90r__3.10000-10500__4.10000-10500.bam.bai
+EGAF00007243782	1	29568	7d6347593fb4ad59cb62cec9c7f97a7a	HG00096.GRCh38DH__1097r__10.10000-10100__21.5000000-5050000.bam.bai
+EGAF00007243783	1	83	c43fcb885eba78b62efb4bf5d1cc51bf	HG00096.GRCh38DH__1097r__10.10000-10100__21.5000000-5050000.cram.crai
+EGAF00007243784	1	80	3537be5d71b4e6f0a7f9127cf5bda9aa	HG01500.GRCh38DH__90r__3.10000-10500__4.10000-10500.cram.crai
 EGAF00001753734	1	45030910198	040ef7533533a3db67a35b9f454b9269	NA12878.cram
 EGAF00001753735	1	1575103	41fd8741e91924eae19c6baa7893eeb8	NA12878.crai
 EGAF00001753736	1	38215425935	bbc03793c9534a22f77e751d2723cb10	NA12891.cram