Mercurial > repos > rhpvorderman > data_manager_select_index_by_path

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/data_manager_select_index_by_path.xml	Mon Sep 25 03:35:26 2017 -0400
@@ -0,0 +1,52 @@
+<tool id="data_manager_select_index_by_path" name="Select index by path manager" tool_type="manage_data" version="0.0.2">
+    <description>path inputer</description>
+    <command interpreter="python">
+        path_name_value_key_manager.py
+        --value "${value}"
+        --dbkey "${dbkey}"
+        --name "${name}"
+        --path "${path}"
+        --data_table_name "${data_table}"
+        --json_output_file "${json_output_file}"
+    </command>
+    <inputs>
+        <param name="value" type="text" value="" label="value field for the entry.  Defaults to name if left blank." />
+        <param name="dbkey" type="text" value="" label="dbkey field for the entry.  Defaults to value if left blank." />
+        <param name="name" type="text" value="" label="name field for the entry. Defaults to the file name from path if left blank." />
+        <param name="path" type="text" value="" label="path field for the entry" />
+        <param name="data_table" type="select" value="" label="data table for the index">
+          <option value='all_fasta'>all_fasta</option>
+          <option value='bowtie2_indexes'>bowtie2_indexes</option>
+          <option value='bowtie_indexes'>bowtie_indexes</option>
+          <option value='bowtie_indexes_color'>bowtie_indexes_color</option>
+          <option value='bwa_mem_indexes'>bwa_mem_indexes</option>
+          <option value='bwameth_indexes'>bwameth_indexes</option>
+          <option value='fasta_indexes'>fasta_indexes</option>
+          <option value='gatk_picard_indexes'>gatk_picard_indexes</option>
+          <option value='gene_transfer'>gene_transfer</option>
+          <option value='hisat2_indexes'>hisat2_indexes</option>
+          <option value='kallisto_indexes'>kallisto_indexes</option>
+          <option value='picard_indexes'>picard_indexes</option>
+          <option value='tophat2_indexes'>tophat2_indexes</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="json_output_file" format="data_manager_json"/>
+    </outputs>
+
+    <help>
+Adds a server path to the selected data table.
+
+The tool will check the path exists but NOT check that it holds the expected data type.
+
+If name is not provided the filename from path less the extension is used.
+
+If value is not provided, the name will be used (or its default)
+
+If dbkey is not provided, the value will be used (or its default)
+
+    </help>
+    <citations>
+    </citations>
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/indexes.yml	Mon Sep 25 03:35:26 2017 -0400
@@ -0,0 +1,50 @@
+all_fasta:
+  name: fasta file
+  extensions:
+    - .fa
+  no_prefix: True
+bowtie2_indexes:
+  name: bowtie2 index
+  extensions:
+    - .bt2
+bowtie_indexes:
+  name: bowtie index
+  extensions:
+    - .ebwt
+bowtie_indexes_color:
+  name: bowtie color index
+  extensions:
+    - .ebwt
+bwa_mem_indexes:
+  name: bwa mem index
+  extensions:
+    - .amb
+    - .ann
+    - .bwt
+    - .pac
+    - .sa
+bwameth_indexes:
+  name: bwa_meth_index
+fasta_indexes:
+  name: fasta index
+  extensions:
+    - .fai
+gatk_picard_index:
+  name: picard index for GATK
+gene_transfer:
+  name: Gene Transfer File
+  extensions:
+    - .gtf
+hisat2_indexes:
+  name: hisat2 index
+  extensions:
+    - .ht2
+kallisto_indexes:
+  name: kallisto index
+  no_prefix: True
+picard_indexes:
+  name: picard index
+tophat2_indexes:
+  name: tophat2 index
+  extensions:
+    - .bt2
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/path_name_value_key_manager.py	Mon Sep 25 03:35:26 2017 -0400
@@ -0,0 +1,104 @@
+#!/usr/bin/env python
+
+import json
+import argparse
+import os
+import yaml
+
+def _add_data_table_entry( data_manager_dict, data_table_name, data_table_entry ):
+    data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} )
+    data_manager_dict['data_tables'][ data_table_name ] = data_manager_dict['data_tables'].get( data_table_name, [] )
+    data_manager_dict['data_tables'][ data_table_name ].append( data_table_entry )
+    return data_manager_dict
+
+
+def check_param(name, value, default=None,  check_tab=True):
+    if value in [ None, '', '?' ]:
+        if default:
+            print "Using {0} for {1} as no value provided".format( default, name )
+            value = default
+        else:
+            raise Exception( '{0} is not a valid {1}. You must specify a valid {1}.'.format( value, name ) )
+    if check_tab and "\t" in value:
+        raise Exception( '{0} is not a valid {1}. It may not contain a tab because these are used as seperators by galaxy .'.format( value, name ) )
+    return value
+
+def prefix_exists(directory, prefix):
+    '''checks if files exist with prefix in a directory. Returns Boolean'''
+    matched_files = []
+    directory_files = os.listdir(directory)
+    for directory_file in directory_files:
+        if directory_file.startswith(prefix):
+            matched_files.append(directory_file)
+    # Empty list should return False
+    return bool(matched_files)
+
+def prefix_plus_extension_exists(directory, prefix, extension):
+    '''checks if files exist with prefix in a directory. Returns Boolean'''
+    matched_files = []
+    directory_files = os.listdir(directory)
+    for directory_file in directory_files:
+        if directory_file.startswith(prefix) and directory_file.endswith(extension):
+            matched_files.append(directory_file)
+    # Empty list should return False
+    return bool(matched_files)
+
+def main():
+
+    #value = "test_value"
+    #name = "test_name"
+    #print '{0} other {1} more{0}'.format(value, name )
+    #print '{0} is not a valid {1}. It may not contain a tab.'.format( value, name )
+
+    #Parse Command Line
+    parser = argparse.ArgumentParser()
+    parser.add_argument( '--value', action='store', type=str, default=None, help='value' )
+    parser.add_argument( '--dbkey', action='store', type=str, default=None, help='dbkey' )
+    parser.add_argument( '--name',  action='store', type=str, default=None, help='name' )
+    parser.add_argument( '--path', action='store', type=str, default=None, help='path' )
+    parser.add_argument( '--data_table_name', action='store', type=str, default=None, help='path' )
+    parser.add_argument( '--json_output_file', action='store', type=str, default=None, help='path' )
+    options = parser.parse_args()
+
+    path = check_param("path", options.path)
+    basename = os.path.basename(path)
+    filename = os.path.splitext(basename)[0]
+    name = check_param("name", options.name, default=filename)
+    value = check_param("value", options.value, default=name)
+    dbkey = check_param("dbkey", options.dbkey, default=value)
+    data_table_name = check_param("data_table_name", options.data_table_name)
+    json_output_file = check_param("json_output_file", options.json_output_file, check_tab=False)
+
+    # Check if file or prefix exists
+    indexes = yaml.load(file(os.path.join(os.path.dirname(__file__), 'indexes.yml')))
+    index_dict = indexes.get(data_table_name,{})
+    index_name = index_dict.get('name','index')
+    index_extensions = index_dict.get('extensions', [''])
+    no_prefix = index_dict.get('no_prefix', False)
+    if not no_prefix:
+        dirname = os.path.dirname(path)
+        prefix = basename
+        for extension in index_extensions:
+            if not prefix_plus_extension_exists(dirname,prefix,extension):
+                raise Exception( 'Unable to find files with prefix "{0}" and extension "{1}" in {2}. Is this a valid {3}?'.format( prefix, extension, dirname, index_name ) )
+    else:
+        if not os.path.exists(path):
+            raise Exception( 'Unable to find path {0}.'.format( path ) )
+
+    if os.path.exists(json_output_file):
+        params = json.loads( open( json_output_file ).read() )
+        print "params", params
+    else:
+        params = {}
+
+    data_manager_dict = {}
+    data_table_entry = dict( value=value, dbkey=dbkey, name=name, path=path )
+    _add_data_table_entry( data_manager_dict, data_table_name, data_table_entry )
+
+    #save info to json file
+    with open( json_output_file, 'wb' ) as output_file:
+        output_file.write( json.dumps( data_manager_dict ) )
+        output_file.write( "\n" )
+
+if __name__ == "__main__":
+    main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_conf.xml	Mon Sep 25 03:35:26 2017 -0400
@@ -0,0 +1,110 @@
+<?xml version="1.0"?>
+<data_managers>
+    <data_manager tool_file="data_manager/data_manager_select_index_by_path.xml" id="data_manager_select_index_by_path" version="0.0.2">
+        <data_table name="all_fasta">
+            <output>
+                <column name="value" />
+                <column name="dbkey" />
+                <column name="name" />
+                <column name="path" />
+            </output>
+        </data_table>
+        <data_table name="bowtie2_indexes">
+            <output>
+                <column name="value" />
+                <column name="dbkey" />
+                <column name="name" />
+                <column name="path" />
+            </output>
+        </data_table>
+        <data_table name="bowtie_indexes">
+            <output>
+                <column name="value" />
+                <column name="dbkey" />
+                <column name="name" />
+                <column name="path" />
+            </output>
+        </data_table>
+        <data_table name="bowtie_indexes_color">
+            <output>
+                <column name="value" />
+                <column name="dbkey" />
+                <column name="name" />
+                <column name="path" />
+            </output>
+        </data_table>
+        <data_table name="bwa_mem_indexes">
+            <output>
+                <column name="value" />
+                <column name="dbkey" />
+                <column name="name" />
+                <column name="path" />
+            </output>
+        </data_table>
+        <data_table name="bwameth_indexes">
+            <output>
+                <column name="value" />
+                <column name="dbkey" />
+                <column name="name" />
+                <column name="path" />
+            </output>
+        </data_table>
+        <data_table name="fasta_indexes">
+            <output>
+                <column name="value" />
+                <column name="dbkey" />
+                <column name="name" />
+                <column name="path" />
+            </output>
+        </data_table>
+        <data_table name="gatk_picard_indexes">
+            <output>
+                <column name="value" />
+                <column name="dbkey" />
+                <column name="name" />
+                <column name="path" />
+            </output>
+        </data_table>
+        <data_table name="gene_transfer">
+            <output>
+                <column name="value" />
+                <column name="dbkey" />
+                <column name="name" />
+                <column name="path" />
+            </output>
+        </data_table>
+        <data_table name="hisat2_indexes">
+            <output>
+                <column name="value" />
+                <column name="dbkey" />
+                <column name="name" />
+                <column name="path" />
+            </output>
+        </data_table>
+        <data_table name="kallisto_indexes">
+            <output>
+                <column name="value" />
+                <column name="dbkey" />
+                <column name="name" />
+                <column name="path" />
+            </output>
+        </data_table>
+        <data_table name="picard_indexes">
+            <output>
+                <column name="value" />
+                <column name="dbkey" />
+                <column name="name" />
+                <column name="path" />
+            </output>
+        </data_table>
+        <data_table name="tophat2_indexes">
+            <output>
+                <column name="value" />
+                <column name="dbkey" />
+                <column name="name" />
+                <column name="path" />
+            </output>
+        </data_table>
+    </data_manager>
+
+</data_managers>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test.json	Mon Sep 25 03:35:26 2017 -0400
@@ -0,0 +1,1 @@
+{"data_tables": {"all_fasta": [{"path": "test-data/EboVir3.fa", "dbkey": "EboVir3", "name": "EboVir3", "value": "EboVir3"}]}}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/all_fasta.loc.sample	Mon Sep 25 03:35:26 2017 -0400
@@ -0,0 +1,18 @@
+#This file lists the locations and dbkeys of all the fasta files
+#under the "genome" directory (a directory that contains a directory
+#for each build). The script extract_fasta.py will generate the file
+#all_fasta.loc. This file has the format (white space characters are
+#TAB characters):
+#
+#<unique_build_id>	<dbkey>		<display_name>	<file_path>
+#
+#So, all_fasta.loc could look something like this:
+#
+#apiMel3	apiMel3	Honeybee (Apis mellifera): apiMel3		/path/to/genome/apiMel3/apiMel3.fa
+#hg19canon	hg19		Human (Homo sapiens): hg19 Canonical		/path/to/genome/hg19/hg19canon.fa
+#hg19full	hg19		Human (Homo sapiens): hg19 Full			/path/to/genome/hg19/hg19full.fa
+#
+#Your all_fasta.loc file should contain an entry for each individual
+#fasta file. So there will be multiple fasta files for each build,
+#such as with hg19 above.
+#
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/bowtie2_indices.loc.sample	Mon Sep 25 03:35:26 2017 -0400
@@ -0,0 +1,37 @@
+# bowtie2_indices.loc.sample
+# This is a *.loc.sample file distributed with Galaxy that enables tools
+# to use a directory of indexed data files. This one is for Bowtie2 and Tophat2.
+# See the wiki: http://wiki.galaxyproject.org/Admin/NGS%20Local%20Setup
+# First create these data files and save them in your own data directory structure.
+# Then, create a bowtie_indices.loc file to use those indexes with tools.
+# Copy this file, save it with the same name (minus the .sample),
+# follow the format examples, and store the result in this directory.
+# The file should include an one line entry for each index set.
+# The path points to the "basename" for the set, not a specific file.
+# It has four text columns seperated by TABS.
+#
+# <unique_build_id>	<dbkey>	<display_name>	<file_base_path>
+#
+# So, for example, if you had hg18 indexes stored in:
+#
+#    /depot/data2/galaxy/hg19/bowtie2/
+#
+# containing hg19 genome and hg19.*.bt2 files, such as:
+#    -rw-rw-r-- 1 james   james   914M Feb 10 18:56 hg19canon.fa
+#    -rw-rw-r-- 1 james   james   914M Feb 10 18:56 hg19canon.1.bt2
+#    -rw-rw-r-- 1 james   james   683M Feb 10 18:56 hg19canon.2.bt2
+#    -rw-rw-r-- 1 james   james   3.3K Feb 10 16:54 hg19canon.3.bt2
+#    -rw-rw-r-- 1 james   james   683M Feb 10 16:54 hg19canon.4.bt2
+#    -rw-rw-r-- 1 james   james   914M Feb 10 20:45 hg19canon.rev.1.bt2
+#    -rw-rw-r-- 1 james   james   683M Feb 10 20:45 hg19canon.rev.2.bt2
+#
+# then the bowtie2_indices.loc entry could look like this:
+#
+#hg19	hg19	Human (hg19)	/depot/data2/galaxy/hg19/bowtie2/hg19canon
+#
+#More examples:
+#
+#mm10	mm10	Mouse (mm10)	/depot/data2/galaxy/mm10/bowtie2/mm10
+#dm3	dm3		D. melanogaster (dm3)	/depot/data2/galaxy/mm10/bowtie2/dm3
+#
+#
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/bowtie_indices.loc.sample	Mon Sep 25 03:35:26 2017 -0400
@@ -0,0 +1,37 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of Bowtie indexed sequences data files. You will
+#need to create these data files and then create a bowtie_indices.loc
+#file similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The bowtie_indices.loc
+#file has this format (longer white space characters are TAB characters):
+#
+#<unique_build_id>   <dbkey>   <display_name>   <file_base_path>
+#
+#So, for example, if you had hg18 indexed stored in
+#/depot/data2/galaxy/bowtie/hg18/,
+#then the bowtie_indices.loc entry would look like this:
+#
+#hg18	hg18	hg18	/depot/data2/galaxy/bowtie/hg18/hg18
+#
+#and your /depot/data2/galaxy/bowtie/hg18/ directory
+#would contain hg18.*.ebwt files:
+#
+#-rw-r--r--  1 james    universe 830134 2005-09-13 10:12 hg18.1.ebwt
+#-rw-r--r--  1 james    universe 527388 2005-09-13 10:12 hg18.2.ebwt
+#-rw-r--r--  1 james    universe 269808 2005-09-13 10:12 hg18.3.ebwt
+#...etc...
+#
+#Your bowtie_indices.loc file should include an entry per line for each
+#index set you have stored. The "file" in the path does not actually
+#exist, but it is the prefix for the actual index files. For example:
+#
+#hg18canon			hg18	hg18 Canonical	/depot/data2/galaxy/bowtie/hg18/hg18canon
+#hg18full			hg18	hg18 Full		/depot/data2/galaxy/bowtie/hg18/hg18full
+#/orig/path/hg19	hg19	hg19			/depot/data2/galaxy/bowtie/hg19/hg19
+#...etc...
+#
+#Note that for backwards compatibility with workflows, the unique ID of
+#an entry must be the path that was in the original loc file, because that
+#is the value stored in the workflow for that parameter. That is why the
+#hg19 entry above looks odd. New genomes can be better-looking.
+#
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/bowtie_indices_color.loc.sample	Mon Sep 25 03:35:26 2017 -0400
@@ -0,0 +1,37 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of Bowtie indexed sequences data files. You will
+#need to create these data files and then create a bowtie_indices.loc
+#file similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The bowtie_indices.loc
+#file has this format (longer white space characters are TAB characters):
+#
+#<unique_build_id>		<dbkey>		<display_name>		<file_base_path>
+#
+#So, for example, if you had hg18 indexed stored in
+#/depot/data2/galaxy/bowtie/hg18/,
+#then the bowtie_indices.loc entry would look like this:
+#
+#hg18   hg18   hg18   /depot/data2/galaxy/bowtie/hg18/hg18
+#
+#and your /depot/data2/galaxy/bowtie/hg18/ directory
+#would contain hg18.*.ebwt files:
+#
+#-rw-r--r--  1 james    universe 830134 2005-09-13 10:12 hg18.1.ebwt
+#-rw-r--r--  1 james    universe 527388 2005-09-13 10:12 hg18.2.ebwt
+#-rw-r--r--  1 james    universe 269808 2005-09-13 10:12 hg18.3.ebwt
+#...etc...
+#
+#Your bowtie_indices.loc file should include an entry per line for each
+#index set you have stored. The "file" in the path does not actually
+#exist, but it is the prefix for the actual index files. For example:
+#
+#hg18canon			hg18	hg18 Canonical	/depot/data2/galaxy/bowtie/hg18/hg18canon
+#hg18full			hg18	hg18 Full		/depot/data2/galaxy/bowtie/hg18/hg18full
+#/orig/path/hg19	hg19	hg19			/depot/data2/galaxy/bowtie/hg19/hg19
+#...etc...
+#
+#Note that for backwards compatibility with workflows, the unique ID of
+#an entry must be the path that was in the original loc file, because that
+#is the value stored in the workflow for that parameter. That is why the
+#hg19 entry above looks odd. New genomes can be better-looking.
+#
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/bwa_mem_index.loc.sample	Mon Sep 25 03:35:26 2017 -0400
@@ -0,0 +1,38 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of BWA indexed sequences data files. You will need
+#to create these data files and then create a bwa_index.loc file
+#similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The bwa_index.loc
+#file has this format (longer white space characters are TAB characters):
+#
+#<unique_build_id>   <dbkey>   <display_name>   <file_path>
+#
+#So, for example, if you had phiX indexed stored in
+#/depot/data2/galaxy/phiX/base/,
+#then the bwa_index.loc entry would look like this:
+#
+#phiX174   phiX   phiX Pretty   /depot/data2/galaxy/phiX/base/phiX.fa
+#
+#and your /depot/data2/galaxy/phiX/base/ directory
+#would contain phiX.fa.* files:
+#
+#-rw-r--r--  1 james    universe 830134 2005-09-13 10:12 phiX.fa.amb
+#-rw-r--r--  1 james    universe 527388 2005-09-13 10:12 phiX.fa.ann
+#-rw-r--r--  1 james    universe 269808 2005-09-13 10:12 phiX.fa.bwt
+#...etc...
+#
+#Your bwa_index.loc file should include an entry per line for each
+#index set you have stored. The "file" in the path does not actually
+#exist, but it is the prefix for the actual index files.  For example:
+#
+#phiX174				phiX	phiX174			/depot/data2/galaxy/phiX/base/phiX.fa
+#hg18canon				hg18	hg18 Canonical	/depot/data2/galaxy/hg18/base/hg18canon.fa
+#hg18full				hg18	hg18 Full		/depot/data2/galaxy/hg18/base/hg18full.fa
+#/orig/path/hg19.fa		hg19	hg19			/depot/data2/galaxy/hg19/base/hg19.fa
+#...etc...
+#
+#Note that for backwards compatibility with workflows, the unique ID of
+#an entry must be the path that was in the original loc file, because that
+#is the value stored in the workflow for that parameter. That is why the
+#hg19 entry above looks odd. New genomes can be better-looking.
+#
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/bwameth_indexes.loc.sample	Mon Sep 25 03:35:26 2017 -0400
@@ -0,0 +1,15 @@
+# This is a sample file distributed with Galaxy that is used to define a
+# list of bwa-meth indices, using three columns tab separated:
+#
+# <unique_build_id>	<dbkey>	<display_name>	<genome_fasta_file_path>
+#
+# An index can be created with the following command:
+#
+# bwameth.py index /some/path/genome.fa
+#
+# "/some/path/genome.fa" would then be the last column in the line
+# If this were for the mm10 mouse genome, the resulting entry would look like:
+#
+#mm9	mm9	Mouse (mm9)	/some/path/genome.fa
+#
+#
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/fasta_indexes.loc.sample	Mon Sep 25 03:35:26 2017 -0400
@@ -0,0 +1,29 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of Samtools indexed sequences data files.  You will need
+#to create these data files and then create a fasta_indexes.loc file
+#similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The fasta_indexes.loc
+#file has this format (white space characters are TAB characters):
+#
+# <unique_build_id>	<dbkey>	<display_name>	<file_base_path>
+#
+#So, for example, if you had hg19 Canonical indexed stored in
+#
+# /depot/data2/galaxy/hg19/sam/,
+#
+#then the fasta_indexes.loc entry would look like this:
+#
+#hg19canon	hg19	Human (Homo sapiens): hg19 Canonical	/depot/data2/galaxy/hg19/sam/hg19canon.fa
+#
+#and your /depot/data2/galaxy/hg19/sam/ directory
+#would contain hg19canon.fa and hg19canon.fa.fai files.
+#
+#Your fasta_indexes.loc file should include an entry per line for
+#each index set you have stored.  The file in the path does actually
+#exist, but it should never be directly used. Instead, the name serves
+#as a prefix for the index file.  For example:
+#
+#hg18canon	hg18	Human (Homo sapiens): hg18 Canonical	/depot/data2/galaxy/hg18/sam/hg18canon.fa
+#hg18full	hg18	Human (Homo sapiens): hg18 Full	/depot/data2/galaxy/hg18/sam/hg18full.fa
+#hg19canon	hg19	Human (Homo sapiens): hg19 Canonical	/depot/data2/galaxy/hg19/sam/hg19canon.fa
+#hg19full	hg19	Human (Homo sapiens): hg19 Full	/depot/data2/galaxy/hg19/sam/hg19full.fa
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/gatk_sorted_picard_index.loc.sample	Mon Sep 25 03:35:26 2017 -0400
@@ -0,0 +1,26 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of Picard dict and associated files. You will need
+#to create these data files and then create a picard_index.loc file
+#similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The picard_index.loc
+#file has this format (longer white space is the TAB character):
+#
+#<unique_build_id>	<dbkey>		<display_name>		<fasta_file_path>
+#
+#So, for example, if you had hg18 indexed and stored in
+#/depot/data2/galaxy/srma/hg18/,
+#then the srma_index.loc entry would look like this:
+#
+#hg18	hg18	hg18 Pretty		/depot/data2/galaxy/picard/hg18/hg18.fa
+#
+#and your /depot/data2/galaxy/srma/hg18/ directory
+#would contain the following three files:
+#hg18.fa
+#hg18.dict
+#hg18.fa.fai
+#
+#The dictionary file for each reference (ex. hg18.dict) must be
+#created via Picard (http://picard.sourceforge.net). Note that
+#the dict file does not have the .fa extension although the
+#path list in the loc file does include it.
+#
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/gene_transfer.loc.sample	Mon Sep 25 03:35:26 2017 -0400
@@ -0,0 +1,14 @@
+#This file lists the locations and dbkeys of all the gene transfer files
+
+#This file has the format (white space characters are TAB characters):
+#
+#<unique_build_id>	<dbkey>		<display_name>	<file_path>
+#
+#So, gene_transfer.loc could look something like this:
+#
+#vm5	vm5	vM5 annotation	/path/to/vM5.annotation.gtf
+#
+#Your gene_transfer.loc file should contain an entry for each individual
+#gtf file.
+#
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/hisat2_indexes.loc.sample	Mon Sep 25 03:35:26 2017 -0400
@@ -0,0 +1,39 @@
+# hisat2_indexes.loc.sample
+# This is a *.loc.sample file distributed with Galaxy that enables tools
+# to use a directory of indexed data files. This one is for HISAT2.
+# See the wiki: http://wiki.galaxyproject.org/Admin/NGS%20Local%20Setup
+# First create these data files and save them in your own data directory structure.
+# Then, create a hisat2_indexes.loc file to use those indexes with tools.
+# Copy this file, save it with the same name (minus the .sample),
+# follow the format examples, and store the result in this directory.
+# The file should include an one line entry for each index set.
+# The path points to the "basename" for the set, not a specific file.
+# It has four text columns seperated by TABS.
+#
+# <unique_build_id>	<dbkey>	<display_name>	<file_base_path>
+#
+# So, for example, if you had sacCer3 indexes stored in:
+#
+#    /depot/data2/galaxy/sacCer3/hisat2_indexes/
+#
+# containing sacCer3 genome and sacCer3.*.ht2 files, such as:
+#
+#   -rw-rw-r-- 1 dave dave  12M Sep 23 13:57 sacCer3.1.ht2
+#   -rw-rw-r-- 1 dave dave 2.9M Sep 23 13:57 sacCer3.2.ht2
+#   -rw-rw-r-- 1 dave dave  161 Sep 23 13:57 sacCer3.3.ht2
+#   -rw-rw-r-- 1 dave dave 2.9M Sep 23 13:57 sacCer3.4.ht2
+#   -rw-rw-r-- 1 dave dave 7.3M Sep 23 13:57 sacCer3.5.ht2
+#   -rw-rw-r-- 1 dave dave 3.0M Sep 23 13:57 sacCer3.6.ht2
+#   -rw-rw-r-- 1 dave dave 128K Sep 23 13:57 sacCer3.7.ht2
+#   -rw-rw-r-- 1 dave dave  32K Sep 23 13:57 sacCer3.8.ht2
+#
+# then the hisat2_indexes.loc entry could look like this:
+#
+#sacCer3	sacCer3	S. cerevisiae Apr. 2011 (SacCer_Apr2011/sacCer3) (sacCer3)	/depot/data2/galaxy/hisat2_indexes/sacCer3
+#
+#More examples:
+#
+#mm10	mm10	Mouse (mm10)	/depot/data2/galaxy/hisat2_indexes/mm10
+#dm3	dm3		D. melanogaster (dm3)	/depot/data2/galaxy/hisat2_indexes/dm3
+#
+#
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/kallisto_indexes.loc.sample	Mon Sep 25 03:35:26 2017 -0400
@@ -0,0 +1,33 @@
+# kallisto_indexes.loc.sample
+# This is a *.loc.sample file distributed with Galaxy that enables tools
+# to use a directory of indexed data files. This one is for kallisto.
+# See the wiki: http://wiki.galaxyproject.org/Admin/NGS%20Local%20Setup
+# First create these data files and save them in your own data directory structure.
+# Then, create a kallisto_indexes.loc file to use those indexes with tools.
+# Copy this file, save it with the same name (minus the .sample),
+# follow the format examples, and store the result in this directory.
+# The file should include an one line entry for each index set.
+# The path points to the "basename" for the set, not a specific file.
+# It has four text columns seperated by TABS.
+#
+# <unique_build_id>	<dbkey>	<display_name>	<file_base_path>
+#
+# So, for example, if you had sacCer3 indexes stored in:
+#
+#    /depot/data2/galaxy/sacCer3/kallisto_indexes/
+#
+# containing sacCer3 genome and sacCer3.*.ht2 files, such as:
+#
+#   -rw-rw-r-- 1 dave dave  12M Sep 23 13:57 sacCer3.fa
+#   -rw-rw-r-- 1 dave dave 2.9M Sep 23 13:57 sacCer3.kallisto
+#
+# then the kallisto_indexes.loc entry could look like this:
+#
+#sacCer3	sacCer3	S. cerevisiae Apr. 2011 (SacCer_Apr2011/sacCer3) (sacCer3)	/depot/data2/galaxy/sacCer3/kallisto_indexes/sacCer3.kallisto
+#
+#More examples:
+#
+#mm10	mm10	Mouse (mm10)	/depot/data2/galaxy/kallisto_indexes/mm10
+#dm3	dm3		D. melanogaster (dm3)	/depot/data2/galaxy/kallisto_indexes/dm3
+#
+#
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/picard_index.loc.sample	Mon Sep 25 03:35:26 2017 -0400
@@ -0,0 +1,26 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of Picard dict and associated files. You will need
+#to create these data files and then create a picard_index.loc file
+#similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The picard_index.loc
+#file has this format (longer white space is the TAB character):
+#
+#<unique_build_id>		<dbkey>		<display_name>		<fasta_file_path>
+#
+#So, for example, if you had hg18 indexed and stored in
+#/depot/data2/galaxy/srma/hg18/,
+#then the srma_index.loc entry would look like this:
+#
+#hg18	hg18	hg18 Pretty		/depot/data2/galaxy/picard/hg18/hg18.fa
+#
+#and your /depot/data2/galaxy/srma/hg18/ directory
+#would contain the following three files:
+#hg18.fa
+#hg18.dict
+#hg18.fa.fai
+#
+#The dictionary file for each reference (ex. hg18.dict) must be
+#created via Picard (http://picard.sourceforge.net). Note that
+#the dict file does not have the .fa extension although the
+#path list in the loc file does include it.
+#
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/tophat2_indices.loc.sample	Mon Sep 25 03:35:26 2017 -0400
@@ -0,0 +1,37 @@
+# tophat2_indices.loc.sample
+# This is a *.loc.sample file distributed with Galaxy that enables tools
+# to use a directory of indexed data files. This one is for Bowtie2 and Tophat2.
+# See the wiki: http://wiki.galaxyproject.org/Admin/NGS%20Local%20Setup
+# First create these data files and save them in your own data directory structure.
+# Then, create a bowtie_indices.loc file to use those indexes with tools.
+# Copy this file, save it with the same name (minus the .sample),
+# follow the format examples, and store the result in this directory.
+# The file should include an one line entry for each index set.
+# The path points to the "basename" for the set, not a specific file.
+# It has four text columns seperated by TABS.
+#
+# <unique_build_id>	<dbkey>	<display_name>	<file_base_path>
+#
+# So, for example, if you had hg18 indexes stored in:
+#
+#    /depot/data2/galaxy/hg19/bowtie2/
+#
+# containing hg19 genome and hg19.*.bt2 files, such as:
+#    -rw-rw-r-- 1 james   james   914M Feb 10 18:56 hg19canon.fa
+#    -rw-rw-r-- 1 james   james   914M Feb 10 18:56 hg19canon.1.bt2
+#    -rw-rw-r-- 1 james   james   683M Feb 10 18:56 hg19canon.2.bt2
+#    -rw-rw-r-- 1 james   james   3.3K Feb 10 16:54 hg19canon.3.bt2
+#    -rw-rw-r-- 1 james   james   683M Feb 10 16:54 hg19canon.4.bt2
+#    -rw-rw-r-- 1 james   james   914M Feb 10 20:45 hg19canon.rev.1.bt2
+#    -rw-rw-r-- 1 james   james   683M Feb 10 20:45 hg19canon.rev.2.bt2
+#
+# then the bowtie2_indices.loc entry could look like this:
+#
+#hg19	hg19	Human (hg19)	/depot/data2/galaxy/hg19/bowtie2/hg19canon
+#
+#More examples:
+#
+#mm10	mm10	Mouse (mm10)	/depot/data2/galaxy/mm10/bowtie2/mm10
+#dm3	dm3		D. melanogaster (dm3)	/depot/data2/galaxy/mm10/bowtie2/dm3
+#
+#
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Mon Sep 25 03:35:26 2017 -0400
@@ -0,0 +1,55 @@
+<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc-->
+<tables>
+    <table name="all_fasta" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/all_fasta.loc" />
+    </table>
+    <table name="bowtie2_indexes" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/bowtie2_indices.loc" />
+    </table>
+    <table name="bowtie_indexes" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/bowtie_indices.loc" />
+    </table>
+    <table name="bowtie_indexes_color" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/bowtie_indices_color.loc" />
+    </table>
+    <table name="bwa_mem_indexes" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/bwa_mem_index.loc" />
+    </table>
+    <table name="bwameth_indexes" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/bwameth_indexes.loc" />
+    </table>
+    <table name="fasta_indexes" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/fasta_indexes.loc" />
+    </table>
+    <table name="gatk_picard_indexes" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/gatk_sorted_picard_index.loc" />
+    </table>
+    <table name="gene_transfer" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/gene_transfer.loc" />
+    </table>
+    <table name="hisat2_indexes" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/hisat2_indexes.loc" />
+    </table>
+    <table name="kallisto_indexes" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/kallisto_indexes.loc" />
+    </table>
+    <table name="picard_indexes" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/picard_index.loc" />
+    </table>
+    <table name="tophat2_indexes" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/tophat2_indices.loc" />
+    </table>
+ </tables>