# HG changeset patch
# User brenninc
# Date 1462775123 14400
# Node ID 288a172e95aa647db5ccd1ab3a05b1bd0dbd079b
# Parent 1d1b8eb0e6b76191505769ff45fccf0e62a195dd
Uploaded
diff -r 1d1b8eb0e6b7 -r 288a172e95aa data_reader.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_reader.xml Mon May 09 02:25:23 2016 -0400
@@ -0,0 +1,126 @@
+
+ Reads data from preconfigured directories table.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ (results['required'] == 'data')
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 1d1b8eb0e6b7 -r 288a172e95aa directory_copier.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/directory_copier.py Mon May 09 02:25:23 2016 -0400
@@ -0,0 +1,106 @@
+import gzip
+import optparse # using optparse as hydra still python 2.6
+import os.path
+import shutil
+import sys
+
+def report_error(*args):
+ sys.stderr.write(' '.join(map(str,args)) + '\n')
+ sys.stderr.flush()
+ sys.exit(1)
+
+
+def check_pattern_get_new_name(a_file, ending, options):
+ if options.start:
+ if not(a_file.startswith(options.start)):
+ return None
+ if options.last:
+ if ending[0] == ".":
+ last = options.last + ending
+ else:
+ if options.last[-1] == ".":
+ last = options.last + ending
+ else:
+ last = options.last + "." + ending
+ if not(a_file.endswith(last)):
+ return None
+ if options.new_ending:
+ name = a_file[:-len(ending)]
+ if options.new_ending[0] ==".":
+ if name[-1] == ".":
+ name = name[:-1]
+ return name + options.new_ending
+ if options.decompress:
+ if a_file.endswith(".gz"):
+ return a_file[:-3]
+ return a_file
+
+
+def check_and_get_new_name(a_file, options):
+ for ending in options.endings:
+ if a_file.endswith(ending):
+ return check_pattern_get_new_name (a_file, ending, options)
+ return None
+
+
+def link(a_file, new_name, path):
+ file_path = os.path.join(os.path.realpath(path), a_file)
+ sym_path = os.path.join(os.path.realpath("output"), new_name)
+ #if not(os.path.exists(sym_path)):
+ os.link(file_path, sym_path)
+
+
+def decompress(a_file, new_name, path):
+ file_path = os.path.join(os.path.realpath(path), a_file)
+ target_path = os.path.join(os.path.realpath("output"), new_name)
+ with gzip.open(file_path, 'rb') as f_in, open(target_path, 'wb') as f_out:
+ shutil.copyfileobj(f_in, f_out)
+
+
+def copy_and_link(path, options):
+ os.mkdir("output")
+ with open(options.list, 'w') as list_file:
+ files = os.listdir(path)
+ files.sort()
+ for a_file in files:
+ new_name = check_and_get_new_name(a_file, options)
+ if new_name:
+ list_file.write(new_name)
+ list_file.write("\n")
+ if options.decompress:
+ if a_file.endswith(".gz"):
+ decompress(a_file, new_name,path)
+ else:
+ link(a_file, new_name, path)
+ elif options.link:
+ link(a_file, new_name, path)
+
+
+if __name__ == '__main__':
+ parser = optparse.OptionParser()
+ parser.add_option("--path", action="store", type="string",
+ help="Path of directory to check. ")
+ parser.add_option("--ending", action="append", type="string", dest="endings",
+ help="Ending that can be listed and if requested linked or decompressed. ")
+ parser.add_option("--start", action="store", type="string",
+ help="String that must be at the start of the file name ")
+ parser.add_option("--last", action="store", type="string",
+ help="String that must be the last bit of the file name before the endings")
+ parser.add_option("--new_ending", action="store", type="string",
+ help="New ending to replace any previous ending in list and if required links or decompressions. Note: If not set decompression will auto remove the compressioned part of the ending")
+ #parser.add_option("--regex", action="store", type="string",
+ # help="Regex pattern the file name (less . ending) must match before the endings")
+ parser.add_option("--list", action="store", type="string",
+ help="Path to where all files should be listed. ")
+ parser.add_option("--link", action="store_true", default=False,
+ help="If set will cause links to be added in output directory. ")
+ parser.add_option("--decompress", action="store_true", default=False,
+ help="If set will cause gz files to be decompressed or if not a supported decompression ending linked.")
+ (options, args) = parser.parse_args()
+
+
+ path = options.path.strip()
+ if path[-1] != '/':
+ path = path + "/"
+ copy_and_link(path, options)
+
diff -r 1d1b8eb0e6b7 -r 288a172e95aa test-data/other.fastq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/other.fastq Mon May 09 02:25:23 2016 -0400
@@ -0,0 +1,12 @@
+@SRR566546.971 HWUSI-EAS1673_11067_FC7070M:4:1:2374:1108 length=50
+GATTTGTATGAAAGTATACAACTAAAACTGCAGGTGGATCAGAGTAAGTC
++SRR566546.971 HWUSI-EAS1673_11067_FC7070M:4:1:2374:1108 length=50
+hhhhgfhhcghghggfcffdhfehhhhcehdchhdhahehffffde`bVd
+@SRR566546.970 HWUSI-EAS1673_11067_FC7070M:4:1:2299:1109 length=50
+TTGCCTGCCTATCATTTTAGTGCCTGTGAGGTGGAGATGTGAGGATCAGT
++SRR566546.970 HWUSI-EAS1673_11067_FC7070M:4:1:2299:1109 length=50
+hhhhhhhhhhghhghhhhhfhhhhhfffffe`ee[`X]b[d[ed`[Y[^Y
+@SRR566546.972 HWUSI-EAS1673_11067_FC7070M:4:1:2438:1109 length=50
+TGCATGATCTTCAGTGCCAGGACCTTATCAAGCGGTTTGGTCCCTTTGTT
++SRR566546.972 HWUSI-EAS1673_11067_FC7070M:4:1:2438:1109 length=50
+dhhhgchhhghhhfhhhhhdhhhhehhghfhhhchfddffcffafhfghe
diff -r 1d1b8eb0e6b7 -r 288a172e95aa test-data/other.fastq.gz
Binary file test-data/other.fastq.gz has changed
diff -r 1d1b8eb0e6b7 -r 288a172e95aa test-data/sample1.fastq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sample1.fastq Mon May 09 02:25:23 2016 -0400
@@ -0,0 +1,12 @@
+@SRR566546.970 HWUSI-EAS1673_11067_FC7070M:4:1:2299:1109 length=50
+TTGCCTGCCTATCATTTTAGTGCCTGTGAGGTGGAGATGTGAGGATCAGT
++SRR566546.970 HWUSI-EAS1673_11067_FC7070M:4:1:2299:1109 length=50
+hhhhhhhhhhghhghhhhhfhhhhhfffffe`ee[`X]b[d[ed`[Y[^Y
+@SRR566546.971 HWUSI-EAS1673_11067_FC7070M:4:1:2374:1108 length=50
+GATTTGTATGAAAGTATACAACTAAAACTGCAGGTGGATCAGAGTAAGTC
++SRR566546.971 HWUSI-EAS1673_11067_FC7070M:4:1:2374:1108 length=50
+hhhhgfhhcghghggfcffdhfehhhhcehdchhdhahehffffde`bVd
+@SRR566546.972 HWUSI-EAS1673_11067_FC7070M:4:1:2438:1109 length=50
+TGCATGATCTTCAGTGCCAGGACCTTATCAAGCGGTTTGGTCCCTTTGTT
++SRR566546.972 HWUSI-EAS1673_11067_FC7070M:4:1:2438:1109 length=50
+dhhhgchhhghhhfhhhhhdhhhhehhghfhhhchfddffcffafhfghe
diff -r 1d1b8eb0e6b7 -r 288a172e95aa test-data/sample1.fastq.gz
Binary file test-data/sample1.fastq.gz has changed
diff -r 1d1b8eb0e6b7 -r 288a172e95aa test-data/sample2.txt
--- a/test-data/sample2.txt Mon May 09 02:06:24 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-2
diff -r 1d1b8eb0e6b7 -r 288a172e95aa tool-data/directory_data.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/directory_data.loc.sample Mon May 09 02:25:23 2016 -0400
@@ -0,0 +1,19 @@
+#This file lists the directories that can be read in
+
+#This file has the format (white space characters are TAB characters):
+#
+#
+#
+#original_extension should not include the starting .
+#
+#galaxy_extension should be one listed in galaxy/config/datatypes_conf.xml (or xml.sample)
+#
+#decompress should be No or Yes
+#
+#So, data_manager.loc could look something like this: (whitespace is tabs)
+#
+#john_12 john_12 John's fastq files batch 12 fastq.gz fastqsanger Yes /data/john/batch12
+#
+#Your directory_data.loc file should contain an entry for each path and extension pair
+#
+
diff -r 1d1b8eb0e6b7 -r 288a172e95aa tool-data/white-list.ini
--- a/tool-data/white-list.ini Mon May 09 02:06:24 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,15 +0,0 @@
-# This file only works if saved as {tool}/tool-data/white_list.ini
-
-# Start of paths that will be accepted by the directory reader
-# No jokers including * currently supported.
-# Even files listed here will be checked against the black list
-
-# To accept all paths just keep line with a single slash
-/
-
-# Add directories absolulute for example
-/home/joe_blog/galaxy_data
-
-# relative test_data as it only make sense for planemo tests
-test-data/
-
diff -r 1d1b8eb0e6b7 -r 288a172e95aa tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Mon May 09 02:25:23 2016 -0400
@@ -0,0 +1,6 @@
+
+
+ value, dbkey, name, original_extension, galaxy_extension, decompress, path
+
+
+