# HG changeset patch
# User iuc
# Date 1563967020 14400
# Node ID 7d4fd734b078eac94027a573eed65727201b8700
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_salmon_index_builder commit 473215c1fea036954d18a7ce339f914f7cf0655b
diff -r 000000000000 -r 7d4fd734b078 data_manager/salmon_index_builder.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/salmon_index_builder.py Wed Jul 24 07:17:00 2019 -0400
@@ -0,0 +1,84 @@
+#!/usr/bin/env python
+# Based heavily on the kallisto data manager wrapper script by iuc
+from __future__ import print_function
+
+import argparse
+import json
+import os
+import subprocess
+import sys
+
+DEFAULT_DATA_TABLE_NAME = "salmon_indexes_versioned"
+
+
+def get_id_name(params, dbkey, fasta_description=None):
+ # TODO: ensure sequence_id is unique and does not already appear in location file
+ sequence_id = params['param_dict']['sequence_id']
+ if not sequence_id:
+ sequence_id = dbkey
+
+ sequence_name = params['param_dict']['sequence_name']
+ if not sequence_name:
+ sequence_name = fasta_description
+ if not sequence_name:
+ sequence_name = dbkey
+ return sequence_id, sequence_name
+
+
+def build_salmon_index(data_manager_dict, options, params, sequence_id, sequence_name):
+ data_table_name = options.data_table_name or DEFAULT_DATA_TABLE_NAME
+ target_directory = params['output_data' ][0]['extra_files_path']
+ if not os.path.exists(target_directory):
+ os.mkdir(target_directory)
+ args = ['salmon', 'index']
+ if options.kmer_size != '':
+ args.append('-k')
+ args.append(options.kmer_size)
+ args.extend(['-t', options.fasta_filename, '-i', target_directory])
+ return_code = subprocess.call(args=args, shell=False)
+ if return_code:
+ print("Error building index.", file=sys.stderr)
+ sys.exit(return_code)
+ data_table_entry = dict(value=sequence_id, dbkey=options.fasta_dbkey, name=sequence_name, path=sequence_id, version=options.index_version)
+ _add_data_table_entry(data_manager_dict, data_table_name, data_table_entry)
+
+
+def _add_data_table_entry(data_manager_dict, data_table_name, data_table_entry):
+ data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {})
+ data_manager_dict['data_tables'][data_table_name] = data_manager_dict['data_tables'].get(data_table_name, [])
+ data_manager_dict['data_tables'][data_table_name].append(data_table_entry)
+ return data_manager_dict
+
+
+def main():
+ # Parse Command Line
+ parser = argparse.ArgumentParser()
+ parser.add_argument('--output', dest='output', action='store', type=str, default=None)
+ parser.add_argument('--fasta_filename', dest='fasta_filename', action='store', type=str, default=None)
+ parser.add_argument('--fasta_dbkey', dest='fasta_dbkey', action='store', type=str, default=None)
+ parser.add_argument('--fasta_description', dest='fasta_description', action='store', type=str, default=None)
+ parser.add_argument('--data_table_name', dest='data_table_name', action='store', type=str, default='salmon_indexes')
+ parser.add_argument('-v', '--index_version', dest='index_version', action='store', type=str, help='Use IndexVersion attribute from header.json')
+ parser.add_argument('-k', '--kmer_size', dest='kmer_size', action='store', type=str, help='kmer_size')
+ options = parser.parse_args()
+
+ filename = options.output
+
+ with open(filename) as fh:
+ params = json.load(fh)
+ data_manager_dict = {}
+
+ if options.fasta_dbkey in [None, '', '?']:
+ raise Exception('"%s" is not a valid dbkey. You must specify a valid dbkey.' % options.fasta_dbkey)
+
+ sequence_id, sequence_name = get_id_name(params, dbkey=options.fasta_dbkey, fasta_description=options.fasta_description)
+ # build the index
+ build_salmon_index(data_manager_dict, options, params, sequence_id, sequence_name)
+
+ # save info to json file
+ with open(filename, 'w') as out:
+ json.dump(data_manager_dict, out, sort_keys=True)
+
+
+if __name__ == "__main__":
+ main()
diff -r 000000000000 -r 7d4fd734b078 data_manager/salmon_index_builder.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/salmon_index_builder.xml Wed Jul 24 07:17:00 2019 -0400
@@ -0,0 +1,50 @@
+
+ index builder
+
+ salmon
+ python
+
+
+ q5
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ https://doi.org/10.1038/nmeth.4197
+
+
diff -r 000000000000 -r 7d4fd734b078 data_manager_conf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_conf.xml Wed Jul 24 07:17:00 2019 -0400
@@ -0,0 +1,21 @@
+
+
+
+
+
+
+
+
diff -r 000000000000 -r 7d4fd734b078 test-data/all_fasta.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/all_fasta.loc Wed Jul 24 07:17:00 2019 -0400
@@ -0,0 +1,1 @@
+phiX174 phiX174 phiX174 ${__HERE__}/phiX174.fasta
diff -r 000000000000 -r 7d4fd734b078 test-data/phiX174.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/phiX174.fasta Wed Jul 24 07:17:00 2019 -0400
@@ -0,0 +1,79 @@
+>phiX174
+GAGTTTTATCGCTTCCATGACGCAGAAGTTAACACTTTCGGATATTTCTGATGAGTCGAAAAATTATCTT
+GATAAAGCAGGAATTACTACTGCTTGTTTACGAATTAAATCGAAGTGGACTGCTGGCGGAAAATGAGAAA
+ATTCGACCTATCCTTGCGCAGCTCGAGAAGCTCTTACTTTGCGACCTTTCGCCATCAACTAACGATTCTG
+TCAAAAACTGACGCGTTGGATGAGGAGAAGTGGCTTAATATGCTTGGCACGTTCGTCAAGGACTGGTTTA
+GATATGAGTCACATTTTGTTCATGGTAGAGATTCTCTTGTTGACATTTTAAAAGAGCGTGGATTACTATC
+TGAGTCCGATGCTGTTCAACCACTAATAGGTAAGAAATCATGAGTCAAGTTACTGAACAATCCGTACGTT
+TCCAGACCGCTTTGGCCTCTATTAAGCTCATTCAGGCTTCTGCCGTTTTGGATTTAACCGAAGATGATTT
+CGATTTTCTGACGAGTAACAAAGTTTGGATTGCTACTGACCGCTCTCGTGCTCGTCGCTGCGTTGAGGCT
+TGCGTTTATGGTACGCTGGACTTTGTGGGATACCCTCGCTTTCCTGCTCCTGTTGAGTTTATTGCTGCCG
+TCATTGCTTATTATGTTCATCCCGTCAACATTCAAACGGCCTGTCTCATCATGGAAGGCGCTGAATTTAC
+GGAAAACATTATTAATGGCGTCGAGCGTCCGGTTAAAGCCGCTGAATTGTTCGCGTTTACCTTGCGTGTA
+CGCGCAGGAAACACTGACGTTCTTACTGACGCAGAAGAAAACGTGCGTCAAAAATTACGTGCAGAAGGAG
+TGATGTAATGTCTAAAGGTAAAAAACGTTCTGGCGCTCGCCCTGGTCGTCCGCAGCCGTTGCGAGGTACT
+AAAGGCAAGCGTAAAGGCGCTCGTCTTTGGTATGTAGGTGGTCAACAATTTTAATTGCAGGGGCTTCGGC
+CCCTTACTTGAGGATAAATTATGTCTAATATTCAAACTGGCGCCGAGCGTATGCCGCATGACCTTTCCCA
+TCTTGGCTTCCTTGCTGGTCAGATTGGTCGTCTTATTACCATTTCAACTACTCCGGTTATCGCTGGCGAC
+TCCTTCGAGATGGACGCCGTTGGCGCTCTCCGTCTTTCTCCATTGCGTCGTGGCCTTGCTATTGACTCTA
+CTGTAGACATTTTTACTTTTTATGTCCCTCATCGTCACGTTTATGGTGAACAGTGGATTAAGTTCATGAA
+GGATGGTGTTAATGCCACTCCTCTCCCGACTGTTAACACTACTGGTTATATTGACCATGCCGCTTTTCTT
+GGCACGATTAACCCTGATACCAATAAAATCCCTAAGCATTTGTTTCAGGGTTATTTGAATATCTATAACA
+ACTATTTTAAAGCGCCGTGGATGCCTGACCGTACCGAGGCTAACCCTAATGAGCTTAATCAAGATGATGC
+TCGTTATGGTTTCCGTTGCTGCCATCTCAAAAACATTTGGACTGCTCCGCTTCCTCCTGAGACTGAGCTT
+TCTCGCCAAATGACGACTTCTACCACATCTATTGACATTATGGGTCTGCAAGCTGCTTATGCTAATTTGC
+ATACTGACCAAGAACGTGATTACTTCATGCAGCGTTACCGTGATGTTATTTCTTCATTTGGAGGTAAAAC
+CTCTTATGACGCTGACAACCGTCCTTTACTTGTCATGCGCTCTAATCTCTGGGCATCTGGCTATGATGTT
+GATGGAACTGACCAAACGTCGTTAGGCCAGTTTTCTGGTCGTGTTCAACAGACCTATAAACATTCTGTGC
+CGCGTTTCTTTGTTCCTGAGCATGGCACTATGTTTACTCTTGCGCTTGTTCGTTTTCCGCCTACTGCGAC
+TAAAGAGATTCAGTACCTTAACGCTAAAGGTGCTTTGACTTATACCGATATTGCTGGCGACCCTGTTTTG
+TATGGCAACTTGCCGCCGCGTGAAATTTCTATGAAGGATGTTTTCCGTTCTGGTGATTCGTCTAAGAAGT
+TTAAGATTGCTGAGGGTCAGTGGTATCGTTATGCGCCTTCGTATGTTTCTCCTGCTTATCACCTTCTTGA
+AGGCTTCCCATTCATTCAGGAACCGCCTTCTGGTGATTTGCAAGAACGCGTACTTATTCGCCACCATGAT
+TATGACCAGTGTTTCCAGTCCGTTCAGTTGTTGCAGTGGAATAGTCAGGTTAAATTTAATGTGACCGTTT
+ATCGCAATCTGCCGACCACTCGCGATTCAATCATGACTTCGTGATAAAAGATTGAGTGTGAGGTTATAAC
+GCCGAAGCGGTAAAAATTTTAATTTTTGCCGCTGAGGGGTTGACCAAGCGAAGCGCGGTAGGTTTTCTGC
+TTAGGAGTTTAATCATGTTTCAGACTTTTATTTCTCGCCATAATTCAAACTTTTTTTCTGATAAGCTGGT
+TCTCACTTCTGTTACTCCAGCTTCTTCGGCACCTGTTTTACAGACACCTAAAGCTACATCGTCAACGTTA
+TATTTTGATAGTTTGACGGTTAATGCTGGTAATGGTGGTTTTCTTCATTGCATTCAGATGGATACATCTG
+TCAACGCCGCTAATCAGGTTGTTTCTGTTGGTGCTGATATTGCTTTTGATGCCGACCCTAAATTTTTTGC
+CTGTTTGGTTCGCTTTGAGTCTTCTTCGGTTCCGACTACCCTCCCGACTGCCTATGATGTTTATCCTTTG
+AATGGTCGCCATGATGGTGGTTATTATACCGTCAAGGACTGTGTGACTATTGACGTCCTTCCCCGTACGC
+CGGGCAATAATGTTTATGTTGGTTTCATGGTTTGGTCTAACTTTACCGCTACTAAATGCCGCGGATTGGT
+TTCGCTGAATCAGGTTATTAAAGAGATTATTTGTCTCCAGCCACTTAAGTGAGGTGATTTATGTTTGGTG
+CTATTGCTGGCGGTATTGCTTCTGCTCTTGCTGGTGGCGCCATGTCTAAATTGTTTGGAGGCGGTCAAAA
+AGCCGCCTCCGGTGGCATTCAAGGTGATGTGCTTGCTACCGATAACAATACTGTAGGCATGGGTGATGCT
+GGTATTAAATCTGCCATTCAAGGCTCTAATGTTCCTAACCCTGATGAGGCCGCCCCTAGTTTTGTTTCTG
+GTGCTATGGCTAAAGCTGGTAAAGGACTTCTTGAAGGTACGTTGCAGGCTGGCACTTCTGCCGTTTCTGA
+TAAGTTGCTTGATTTGGTTGGACTTGGTGGCAAGTCTGCCGCTGATAAAGGAAAGGATACTCGTGATTAT
+CTTGCTGCTGCATTTCCTGAGCTTAATGCTTGGGAGCGTGCTGGTGCTGATGCTTCCTCTGCTGGTATGG
+TTGACGCCGGATTTGAGAATCAAAAAGAGCTTACTAAAATGCAACTGGACAATCAGAAAGAGATTGCCGA
+GATGCAAAATGAGACTCAAAAAGAGATTGCTGGCATTCAGTCGGCGACTTCACGCCAGAATACGAAAGAC
+CAGGTATATGCACAAAATGAGATGCTTGCTTATCAACAGAAGGAGTCTACTGCTCGCGTTGCGTCTATTA
+TGGAAAACACCAATCTTTCCAAGCAACAGCAGGTTTCCGAGATTATGCGCCAAATGCTTACTCAAGCTCA
+AACGGCTGGTCAGTATTTTACCAATGACCAAATCAAAGAAATGACTCGCAAGGTTAGTGCTGAGGTTGAC
+TTAGTTCATCAGCAAACGCAGAATCAGCGGTATGGCTCTTCTCATATTGGCGCTACTGCAAAGGATATTT
+CTAATGTCGTCACTGATGCTGCTTCTGGTGTGGTTGATATTTTTCATGGTATTGATAAAGCTGTTGCCGA
+TACTTGGAACAATTTCTGGAAAGACGGTAAAGCTGATGGTATTGGCTCTAATTTGTCTAGGAAATAACCG
+TCAGGATTGACACCCTCCCAATTGTATGTTTTCATGCCTCCAAATCTTGGAGGCTTTTTTATGGTTCGTT
+CTTATTACCCTTCTGAATGTCACGCTGATTATTTTGACTTTGAGCGTATCGAGGCTCTTAAACCTGCTAT
+TGAGGCTTGTGGCATTTCTACTCTTTCTCAATCCCCAATGCTTGGCTTCCATAAGCAGATGGATAACCGC
+ATCAAGCTCTTGGAAGAGATTCTGTCTTTTCGTATGCAGGGCGTTGAGTTCGATAATGGTGATATGTATG
+TTGACGGCCATAAGGCTGCTTCTGACGTTCGTGATGAGTTTGTATCTGTTACTGAGAAGTTAATGGATGA
+ATTGGCACAATGCTACAATGTGCTCCCCCAACTTGATATTAATAACACTATAGACCACCGCCCCGAAGGG
+GACGAAAAATGGTTTTTAGAGAACGAGAAGACGGTTACGCAGTTTTGCCGCAAGCTGGCTGCTGAACGCC
+CTCTTAAGGATATTCGCGATGAGTATAATTACCCCAAAAAGAAAGGTATTAAGGATGAGTGTTCAAGATT
+GCTGGAGGCCTCCACTATGAAATCGCGTAGAGGCTTTACTATTCAGCGTTTGATGAATGCAATGCGACAG
+GCTCATGCTGATGGTTGGTTTATCGTTTTTGACACTCTCACGTTGGCTGACGACCGATTAGAGGCGTTTT
+ATGATAATCCCAATGCTTTGCGTGACTATTTTCGTGATATTGGTCGTATGGTTCTTGCTGCCGAGGGTCG
+CAAGGCTAATGATTCACACGCCGACTGCTATCAGTATTTTTGTGTGCCTGAGTATGGTACAGCTAATGGC
+CGTCTTCATTTCCATGCGGTGCATTTTATGCGGACACTTCCTACAGGTAGCGTTGACCCTAATTTTGGTC
+GTCGGGTACGCAATCGCCGCCAGTTAAATAGCTTGCAAAATACGTGGCCTTATGGTTACAGTATGCCCAT
+CGCAGTTCGCTACACGCAGGACGCTTTTTCACGTTCTGGTTGGTTGTGGCCTGTTGATGCTAAAGGTGAG
+CCGCTTAAAGCTACCAGTTATATGGCTGTTGGTTTCTATGTGGCTAAATACGTTAACAAAAAGTCAGATA
+TGGACCTTGCTGCTAAAGGTCTAGGAGCTAAAGAATGGAACAACTCACTAAAAACCAAGCTGTCGCTACT
+TCCCAAGAAGCTGTTCAGAATCAGAATGAGCCGCAACTTCGGGATGAAAATGCTCACAATGACAAATCTG
+TCCACGGAGTGCTTAATCCAACTTACCAAGCTGGGTTACGACGCGACGCCGTTCAACCAGATATTGAAGC
+AGAACGCAAAAAGAGAGATGAGATTGAGGCTGGGAAAAGTTACTGTAGCCGACGTTTTGGCGGCGCAACC
+TGTGACGACAAATCTGCTCAAATTTATGCGCGCTTCGATAAAAATGATTGGCGTATCCAACCTGCA
+
diff -r 000000000000 -r 7d4fd734b078 test-data/salmon_indexes_versioned.loc
diff -r 000000000000 -r 7d4fd734b078 test-data/salmon_phiX174.data_manager_json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/salmon_phiX174.data_manager_json Wed Jul 24 07:17:00 2019 -0400
@@ -0,0 +1,1 @@
+{"data_tables": {"salmon_indexes_versioned": [{"dbkey": "phiX174", "name": "sequence_name", "path": "sequence_id", "value": "sequence_id", "version": "q5"}]}}
\ No newline at end of file
diff -r 000000000000 -r 7d4fd734b078 tool-data/all_fasta.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/all_fasta.loc.sample Wed Jul 24 07:17:00 2019 -0400
@@ -0,0 +1,18 @@
+#This file lists the locations and dbkeys of all the fasta files
+#under the "genome" directory (a directory that contains a directory
+#for each build). The script extract_fasta.py will generate the file
+#all_fasta.loc. This file has the format (white space characters are
+#TAB characters):
+#
+#
+#
+#So, all_fasta.loc could look something like this:
+#
+#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa
+#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa
+#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa
+#
+#Your all_fasta.loc file should contain an entry for each individual
+#fasta file. So there will be multiple fasta files for each build,
+#such as with hg19 above.
+#
diff -r 000000000000 -r 7d4fd734b078 tool-data/salmon_indexes_versioned.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/salmon_indexes_versioned.loc.sample Wed Jul 24 07:17:00 2019 -0400
@@ -0,0 +1,32 @@
+# salmon_indexes_versioned.loc.sample
+# This is a *.loc.sample file distributed with Galaxy that enables tools
+# to use a directory of indexed data files. This one is for Salmon.
+# See the wiki: http://wiki.galaxyproject.org/Admin/NGS%20Local%20Setup
+# First create these data files and save them in your own data directory structure.
+# Then, create a salmon_indexes_versioned.loc.sample file to use those indexes with tools.
+# Copy this file, save it with the same name (minus the .sample),
+# follow the format examples, and store the result in this directory.
+# The file should include an one line entry for each index set.
+# The path points to the "basename" for the set, not a specific file.
+# It has four text columns seperated by TABS.
+#
+#
+#
+# The column can be retrieved from the header.json file in the index folder. e.g "IndexVersion": "q5"
+
+
+
+# So, for example, if you had sacCer3 indexes stored in:
+#
+# /depot/data2/galaxy/sacCer3/salmon_indexes/
+#
+# then the salmon_indexes.loc entry could look like this:
+#
+#sacCer3 sacCer3 S. cerevisiae Apr. 2011 (SacCer_Apr2011/sacCer3) (sacCer3) /depot/data2/galaxy/sacCer3/salmon_indexes/version_3/ q6
+#
+#More examples:
+#
+#mm10 mm10 Mouse (mm10) /depot/data2/galaxy/salmon_indexes/mm10/version_2 q4
+#dm3 dm3 D. melanogaster (dm3) /depot/data2/galaxy/salmon_indexes/dm3/version_1 q5
+#
+#
diff -r 000000000000 -r 7d4fd734b078 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Wed Jul 24 07:17:00 2019 -0400
@@ -0,0 +1,11 @@
+
+
+
+ value, dbkey, name, path, version
+
+
+
+ value, dbkey, name, path
+
+
+
diff -r 000000000000 -r 7d4fd734b078 tool_data_table_conf.xml.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test Wed Jul 24 07:17:00 2019 -0400
@@ -0,0 +1,11 @@
+
+
+
+ value, dbkey, name, path, version
+
+
+
+ value, dbkey, name, path
+
+
+