changeset 18:f005b6efd096 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_kraken2_database/ commit a6877055190331683367394d1d1ca6ff47cf4fa7-dirty
author dfornika
date Fri, 24 May 2019 13:39:08 -0400
parents 4c9f9d6098eb
children ffeb852407d6
files data_manager/kraken2_build_database.py data_manager/kraken2_build_database.xml data_manager_conf.xml test-data/adapter.fa test-data/adapter.fastq test-data/kraken2_custom_data_manager.json
diffstat 6 files changed, 95 insertions(+), 55 deletions(-) [+]
line wrap: on
line diff
--- a/data_manager/kraken2_build_database.py	Mon May 06 19:42:14 2019 -0400
+++ b/data_manager/kraken2_build_database.py	Fri May 24 13:39:08 2019 -0400
@@ -50,7 +50,7 @@
         return self.value
 
 
-def kraken2_build_standard(data_manager_dict, kraken2_args, target_directory, data_table_name=DATA_TABLE_NAME):
+def kraken2_build_standard(kraken2_args, target_directory, data_table_name=DATA_TABLE_NAME):
     now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ")
 
     database_value = "_".join([
@@ -92,15 +92,21 @@
     subprocess.check_call(['kraken2-build'] + args, cwd=target_directory)
 
     data_table_entry = {
-        "value": database_value,
-        "name": database_name,
-        "path": database_path,
+        'data_tables': {
+            data_table_name: [
+                {
+                    "value": database_value,
+                    "name": database_name,
+                    "path": database_path,
+                }
+            ]
+        }
     }
 
-    _add_data_table_entry(data_manager_dict, data_table_entry)
+    return data_table_entry
 
 
-def kraken2_build_minikraken(data_manager_dict, minikraken2_version, target_directory, data_table_name=DATA_TABLE_NAME):
+def kraken2_build_minikraken(minikraken2_version, target_directory, data_table_name=DATA_TABLE_NAME):
 
     now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ")
 
@@ -118,6 +124,8 @@
         now + ")"
     ])
 
+    database_path = database_value
+
     # download the minikraken2 data
     src = urlopen(
         'ftp://ftp.ccb.jhu.edu/pub/data/kraken2_dbs/minikraken2_%s_8GB_201904_UPDATE.tgz'
@@ -127,18 +135,27 @@
         shutil.copyfileobj(src, dst)
     # unpack the downloaded archive to the target directory
     with tarfile.open('tmp_data.tar.gz', 'r:gz') as fh:
-        fh.extractall(target_directory)
+        for member in fh.getmembers():
+            if member.isreg():
+                member.name = os.path.basename(member.name)
+                fh.extract(member, os.path.join(target_directory, database_path))
 
     data_table_entry = {
-        "value": database_value,
-        "name": database_name,
-        "path": database_value,
+        'data_tables': {
+            data_table_name: [
+                {
+                    "value": database_value,
+                    "name": database_name,
+                    "path": database_path,
+                }
+            ]
+        }
     }
 
-    _add_data_table_entry(data_manager_dict, data_table_entry)
+    return data_table_entry
 
 
-def kraken2_build_special(data_manager_dict, kraken2_args, target_directory, data_table_name=DATA_TABLE_NAME):
+def kraken2_build_special(kraken2_args, target_directory, data_table_name=DATA_TABLE_NAME):
 
     now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ")
 
@@ -187,22 +204,31 @@
     subprocess.check_call(['kraken2-build'] + args, cwd=target_directory)
 
     data_table_entry = {
-        "value": database_value,
-        "name": database_name,
-        "path": database_path,
+        'data_tables': {
+            data_table_name: [
+                {
+                    "value": database_value,
+                    "name": database_name,
+                    "path": database_path,
+                }
+            ]
+        }
     }
 
-    _add_data_table_entry(data_manager_dict, data_table_entry)
+    return data_table_entry
 
 
-def kraken2_build_custom(data_manager_dict, kraken2_args, custom_database_name, target_directory, data_table_name=DATA_TABLE_NAME):
+def kraken2_build_custom(kraken2_args, custom_database_name, target_directory, data_table_name=DATA_TABLE_NAME):
 
     args = [
         '--threads', str(kraken2_args["threads"]),
         '--download-taxonomy',
-        '--db', custom_database_name
+        '--db', custom_database_name,
     ]
 
+    if kraken2_args['skip_maps']:
+        args.append('--skip-maps')
+
     subprocess.check_call(['kraken2-build'] + args, cwd=target_directory)
 
     args = [
@@ -230,22 +256,21 @@
         '--db', custom_database_name
     ]
 
-    subprocess.check_call(['kraken2-build'] + args, target_directory)
+    subprocess.check_call(['kraken2-build'] + args, cwd=target_directory)
 
     data_table_entry = {
-        "value": custom_database_name,
-        "name": custom_database_name,
-        "path": custom_database_name
+        'data_tables': {
+            data_table_name: [
+                {
+                    "value": custom_database_name,
+                    "name": custom_database_name,
+                    "path": custom_database_name
+                }
+            ]
+        }
     }
 
-    _add_data_table_entry(data_manager_dict, data_table_name, data_table_entry)
-
-
-def _add_data_table_entry(data_manager_dict, data_table_entry, data_table_name=DATA_TABLE_NAME):
-    data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} )
-    data_manager_dict['data_tables'][data_table_name] = data_manager_dict['data_tables'].get( data_table_name, [] )
-    data_manager_dict['data_tables'][data_table_name].append( data_table_entry )
-    return data_manager_dict
+    return data_table_entry
 
 
 def main():
@@ -259,7 +284,8 @@
     parser.add_argument('--minikraken2-version', dest='minikraken2_version', type=Minikraken2Versions, choices=list(Minikraken2Versions), help='MiniKraken2 version (only applies to --database-type minikraken)')
     parser.add_argument('--special-database-type', dest='special_database_type', type=SpecialDatabaseTypes, choices=list(SpecialDatabaseTypes), help='type of special database to build (only applies to --database-type special)')
     parser.add_argument('--custom-fasta', dest='custom_fasta', help='fasta file for custom database (only applies to --database-type custom)')
-    parser.add_argument( '--custom-database-name', dest='custom_database_name', help='Name for custom database (only applies to --database-type custom)' )
+    parser.add_argument('--custom-database-name', dest='custom_database_name', help='Name for custom database (only applies to --database-type custom)')
+    parser.add_argument('--skip-maps', dest='skip_maps', action='store_true', help='')
     args = parser.parse_args()
 
     data_manager_input = json.loads(open(args.data_manager_json).read())
@@ -276,7 +302,6 @@
 
     data_manager_output = {}
 
-    print(args.database_type)
     if str(args.database_type) == 'standard':
         kraken2_args = {
             "kmer_len": args.kmer_len,
@@ -284,14 +309,12 @@
             "minimizer_spaces": args.minimizer_spaces,
             "threads": args.threads,
         }
-        kraken2_build_standard(
-            data_manager_output,
+        data_manager_output = kraken2_build_standard(
             kraken2_args,
             target_directory,
         )
     elif str(args.database_type) == 'minikraken':
-        kraken2_build_minikraken(
-            data_manager_output,
+        data_manager_output = kraken2_build_minikraken(
             str(args.minikraken2_version),
             target_directory
         )
@@ -303,21 +326,20 @@
             "minimizer_spaces": args.minimizer_spaces,
             "threads": args.threads,
         }
-        kraken2_build_special(
-            data_manager_output,
+        data_manager_output = kraken2_build_special(
             kraken2_args,
             target_directory,
         )
     elif str(args.database_type) == 'custom':
         kraken2_args = {
             "custom_fasta": args.custom_fasta,
+            "skip_maps": args.skip_maps,
             "kmer_len": args.kmer_len,
             "minimizer_len": args.minimizer_len,
             "minimizer_spaces": args.minimizer_spaces,
             "threads": args.threads,
         }
-        kraken2_build_custom(
-            data_manager_output,
+        data_manager_output = kraken2_build_custom(
             kraken2_args,
             args.custom_database_name,
             target_directory,
--- a/data_manager/kraken2_build_database.xml	Mon May 06 19:42:14 2019 -0400
+++ b/data_manager/kraken2_build_database.xml	Fri May 24 13:39:08 2019 -0400
@@ -1,5 +1,5 @@
 <?xml version="1.0"?>
-<tool id="kraken2_build_database" name="Kraken2 Database Builder" tool_type="manage_data" version="2.0.8_beta">
+<tool id="kraken2_build_database" name="Kraken2" tool_type="manage_data" version="2.0.8_beta">
     <macros>
         <xml name="common_params">
             <param name="kmer_len" type="integer" value="35" label="K-mer length in BP" />
@@ -32,6 +32,7 @@
             --minimizer-spaces ${database_type.minimizer_spaces}
           #else if $database_type.database_type == "custom"
             --threads \${GALAXY_SLOTS:-1}
+            ${database_type.skip_maps}
             --custom-fasta ${database_type.custom_fasta}
             --custom-database-name ${database_type.custom_database_name}
             --kmer-len ${database_type.kmer_len}
@@ -68,6 +69,7 @@
             <when value="custom">
                 <param name="custom_fasta" type="data" format="fasta" multiple="False" optional="true" label="Select history item" />
                 <param name="custom_database_name" type="text" label="Name for this database" />
+                <param name="skip_maps" type="boolean" truevalue="--skip-maps" falsevalue="" label="Skip downloading accession number to taxid maps during taxonomy download." /> 
                 <expand macro="common_params" />
             </when>
             
@@ -76,6 +78,15 @@
     <outputs>
         <data name="out_file" format="data_manager_json" />
     </outputs>
+    <tests>
+        <test>
+            <param name="database_type" value="custom" />
+            <param name="custom_fasta" value="adapter.fa" />
+            <param name="custom_database_name" value="database" />
+            <param name="skip_maps" value="true" />
+            <output name="out_file" value="kraken2_custom_data_manager.json" />
+        </test>
+    </tests>
     <help>
     </help>
     <citations>
--- a/data_manager_conf.xml	Mon May 06 19:42:14 2019 -0400
+++ b/data_manager_conf.xml	Fri May 24 13:39:08 2019 -0400
@@ -1,18 +1,18 @@
 <data_managers>
-    <data_manager tool_file="data_manager/kraken2_build_database.xml" id="kraken2_build_database" version="1.0">
-	<data_table name="kraken2_databases">
-	    <output>
-		<column name="value"/>
-		<column name="name"/>
-		<column name="path" output_ref="out_file">
-		    <move type="directory">
-		        <source>${path}</source>
-			<target base="${GALAXY_DATA_MANAGER_DATA_PATH}">kraken2_databases/${path}</target>
-		    </move>
+    <data_manager tool_file="data_manager/kraken2_build_database.xml" id="kraken2_build_database" version="2.0.8_beta">
+        <data_table name="kraken2_databases">
+            <output>
+                <column name="value"/>
+                <column name="name"/>
+                <column name="path" output_ref="out_file">
+                    <move type="directory">
+                        <source>${path}</source>
+                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">kraken2_databases/${path}</target>
+                    </move>
                     <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/kraken2_databases/${path}</value_translation>
-		    <value_translation type="function">abspath</value_translation>
-		</column>
-	    </output>
-	</data_table>
+                    <value_translation type="function">abspath</value_translation>
+                </column>
+            </output>
+        </data_table>
     </data_manager>
 </data_managers>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/adapter.fa	Fri May 24 13:39:08 2019 -0400
@@ -0,0 +1,2 @@
+>sequence16|kraken:taxid|32630  Adapter sequence
+CAAGCAGAAGACGGCATACGAGATCTTCGAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/adapter.fastq	Fri May 24 13:39:08 2019 -0400
@@ -0,0 +1,4 @@
+@sequence16
+CAAGCAGAAGACGGCATACGAGATCTTCGAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/kraken2_custom_data_manager.json	Fri May 24 13:39:08 2019 -0400
@@ -0,0 +1,1 @@
+{"data_tables": {"kraken2_databases": [{"path": "database", "name": "database", "value": "database"}]}}
\ No newline at end of file