# HG changeset patch # User brenninc # Date 1466494732 14400 # Node ID b737d0ed42be62aeef7b7d7ec266d299dbe2fca3 Uploaded diff -r 000000000000 -r b737d0ed42be data_reader.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_reader.xml Tue Jun 21 03:38:52 2016 -0400 @@ -0,0 +1,157 @@ + + Reads data from preconfigured directories table. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + (results['required'] == 'data') + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r b737d0ed42be directory_copier.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/directory_copier.py Tue Jun 21 03:38:52 2016 -0400 @@ -0,0 +1,106 @@ +import gzip +import optparse # using optparse as hydra still python 2.6 +import os.path +import re +import shutil +import sys + +def report_error(*args): + sys.stderr.write(' '.join(map(str,args)) + '\n') + sys.stderr.flush() + sys.exit(1) + + +def check_pattern_get_new_name(a_file, ending, options): + if options.start: + if not(a_file.startswith(options.start)): + return None + name = a_file[:-len(ending)] + if name.endswith("."): + name = name[:-1] + if options.last: + if not(name.endswith(last)): + return None + if options.regex: + pattern = re.compile(options.regex) + if pattern.search(name) is None: + return None + if options.new_ending: + if options.new_ending[0] ==".": + return name + options.new_ending + else: + return name + "." + options.new_ending + if options.decompress: + if a_file.endswith(".gz"): + return a_file[:-3] + return a_file + + +def check_and_get_new_name(a_file, options): + for ending in options.endings: + if a_file.endswith(ending): + return check_pattern_get_new_name (a_file, ending, options) + return None + + +def link(a_file, new_name, path): + file_path = os.path.join(os.path.realpath(path), a_file) + sym_path = os.path.join(os.path.realpath("output"), new_name) + os.link(file_path, sym_path) + + +def decompress(a_file, new_name, path): + file_path = os.path.join(os.path.realpath(path), a_file) + target_path = os.path.join(os.path.realpath("output"), new_name) + with gzip.open(file_path, 'rb') as f_in, open(target_path, 'wb') as f_out: + shutil.copyfileobj(f_in, f_out) + + +def copy_and_link(path, options): + if options.decompress or options.link: + os.mkdir("output") + with open(options.list, 'w') as list_file: + files = os.listdir(path) + files.sort() + for a_file in files: + new_name = check_and_get_new_name(a_file, options) + if new_name: + list_file.write(new_name) + list_file.write("\n") + if options.decompress: + if a_file.endswith(".gz"): + decompress(a_file, new_name,path) + else: + link(a_file, new_name, path) + elif options.link: + link(a_file, new_name, path) + + +if __name__ == '__main__': + parser = optparse.OptionParser() + parser.add_option("--path", action="store", type="string", + help="Path of directory to check. ") + parser.add_option("--ending", action="append", type="string", dest="endings", + help="Ending that can be listed and if requested linked or decompressed. ") + parser.add_option("--start", action="store", type="string", + help="String that must be at the start of the file name ") + parser.add_option("--last", action="store", type="string", + help="String that must be the last bit of the file name before the endings") + parser.add_option("--regex", action="store", type="string", + help="Regex for file names not including the endings") + parser.add_option("--new_ending", action="store", type="string", + help="New ending to replace any previous ending in list and if required links or decompressions. Note: If not set decompression will auto remove the compressioned part of the ending") + parser.add_option("--list", action="store", type="string", + help="Path to where all files should be listed. ") + parser.add_option("--link", action="store_true", default=False, + help="If set will cause links to be added in output directory. ") + parser.add_option("--decompress", action="store_true", default=False, + help="If set will cause gz files to be decompressed or if not a supported decompression ending linked.") + (options, args) = parser.parse_args() + + + path = options.path.strip() + if path[-1] != '/': + path = path + "/" + copy_and_link(path, options) + diff -r 000000000000 -r b737d0ed42be test-data/other.fastq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/other.fastq Tue Jun 21 03:38:52 2016 -0400 @@ -0,0 +1,12 @@ +@SRR566546.971 HWUSI-EAS1673_11067_FC7070M:4:1:2374:1108 length=50 +GATTTGTATGAAAGTATACAACTAAAACTGCAGGTGGATCAGAGTAAGTC ++SRR566546.971 HWUSI-EAS1673_11067_FC7070M:4:1:2374:1108 length=50 +hhhhgfhhcghghggfcffdhfehhhhcehdchhdhahehffffde`bVd +@SRR566546.970 HWUSI-EAS1673_11067_FC7070M:4:1:2299:1109 length=50 +TTGCCTGCCTATCATTTTAGTGCCTGTGAGGTGGAGATGTGAGGATCAGT ++SRR566546.970 HWUSI-EAS1673_11067_FC7070M:4:1:2299:1109 length=50 +hhhhhhhhhhghhghhhhhfhhhhhfffffe`ee[`X]b[d[ed`[Y[^Y +@SRR566546.972 HWUSI-EAS1673_11067_FC7070M:4:1:2438:1109 length=50 +TGCATGATCTTCAGTGCCAGGACCTTATCAAGCGGTTTGGTCCCTTTGTT ++SRR566546.972 HWUSI-EAS1673_11067_FC7070M:4:1:2438:1109 length=50 +dhhhgchhhghhhfhhhhhdhhhhehhghfhhhchfddffcffafhfghe diff -r 000000000000 -r b737d0ed42be test-data/other.fastq.gz Binary file test-data/other.fastq.gz has changed diff -r 000000000000 -r b737d0ed42be test-data/sample1.fastq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sample1.fastq Tue Jun 21 03:38:52 2016 -0400 @@ -0,0 +1,12 @@ +@SRR566546.970 HWUSI-EAS1673_11067_FC7070M:4:1:2299:1109 length=50 +TTGCCTGCCTATCATTTTAGTGCCTGTGAGGTGGAGATGTGAGGATCAGT ++SRR566546.970 HWUSI-EAS1673_11067_FC7070M:4:1:2299:1109 length=50 +hhhhhhhhhhghhghhhhhfhhhhhfffffe`ee[`X]b[d[ed`[Y[^Y +@SRR566546.971 HWUSI-EAS1673_11067_FC7070M:4:1:2374:1108 length=50 +GATTTGTATGAAAGTATACAACTAAAACTGCAGGTGGATCAGAGTAAGTC ++SRR566546.971 HWUSI-EAS1673_11067_FC7070M:4:1:2374:1108 length=50 +hhhhgfhhcghghggfcffdhfehhhhcehdchhdhahehffffde`bVd +@SRR566546.972 HWUSI-EAS1673_11067_FC7070M:4:1:2438:1109 length=50 +TGCATGATCTTCAGTGCCAGGACCTTATCAAGCGGTTTGGTCCCTTTGTT ++SRR566546.972 HWUSI-EAS1673_11067_FC7070M:4:1:2438:1109 length=50 +dhhhgchhhghhhfhhhhhdhhhhehhghfhhhchfddffcffafhfghe diff -r 000000000000 -r b737d0ed42be test-data/sample1.fastq.gz Binary file test-data/sample1.fastq.gz has changed diff -r 000000000000 -r b737d0ed42be tool-data/directory_data.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/directory_data.loc.sample Tue Jun 21 03:38:52 2016 -0400 @@ -0,0 +1,19 @@ +#This file lists the directories that can be read in + +#This file has the format (white space characters are TAB characters): +# +# +# +#original_extension should not include the starting . +# +#galaxy_extension should be one listed in galaxy/config/datatypes_conf.xml (or xml.sample) +# +#decompress should be No or Yes +# +#So, data_manager.loc could look something like this: (whitespace is tabs) +# +#john_12 john_12 John's fastq files batch 12 fastq.gz fastqsanger Yes /data/john/batch12 +# +#Your directory_data.loc file should contain an entry for each path and extension pair +# + diff -r 000000000000 -r b737d0ed42be tool_data_table_conf.xml.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Tue Jun 21 03:38:52 2016 -0400 @@ -0,0 +1,6 @@ + + + value, dbkey, name, original_extension, galaxy_extension, decompress, path + +
+