| 
0
 | 
     1 import gzip
 | 
| 
 | 
     2 import optparse  # using optparse as hydra still python 2.6
 | 
| 
 | 
     3 import os.path
 | 
| 
1
 | 
     4 import re
 | 
| 
0
 | 
     5 import shutil
 | 
| 
 | 
     6 import sys
 | 
| 
 | 
     7 
 | 
| 
 | 
     8 def report_error(*args):
 | 
| 
 | 
     9     sys.stderr.write(' '.join(map(str,args)) + '\n')
 | 
| 
 | 
    10     sys.stderr.flush()
 | 
| 
 | 
    11     sys.exit(1)
 | 
| 
 | 
    12 
 | 
| 
 | 
    13 
 | 
| 
 | 
    14 def check_pattern_get_new_name(a_file, ending, options):
 | 
| 
 | 
    15     if options.start:
 | 
| 
 | 
    16         if not(a_file.startswith(options.start)):
 | 
| 
 | 
    17             return None
 | 
| 
1
 | 
    18     name = a_file[:-len(ending)]
 | 
| 
 | 
    19     if name.endswith("."):
 | 
| 
 | 
    20         name = name[:-1]
 | 
| 
0
 | 
    21     if options.last:
 | 
| 
1
 | 
    22         if not(name.endswith(last)):
 | 
| 
 | 
    23             return None
 | 
| 
 | 
    24     if options.regex:
 | 
| 
 | 
    25         pattern = re.compile(options.regex)
 | 
| 
 | 
    26         if pattern.search(name) is None:
 | 
| 
0
 | 
    27             return None
 | 
| 
 | 
    28     if options.new_ending:
 | 
| 
 | 
    29         if options.new_ending[0] ==".":
 | 
| 
1
 | 
    30             return name + options.new_ending
 | 
| 
 | 
    31         else:
 | 
| 
 | 
    32             return name + "." + options.new_ending
 | 
| 
0
 | 
    33     if options.decompress:
 | 
| 
 | 
    34         if a_file.endswith(".gz"):
 | 
| 
 | 
    35             return a_file[:-3]
 | 
| 
 | 
    36     return a_file
 | 
| 
 | 
    37 
 | 
| 
 | 
    38 
 | 
| 
 | 
    39 def check_and_get_new_name(a_file, options):
 | 
| 
 | 
    40     for ending in options.endings:
 | 
| 
 | 
    41         if a_file.endswith(ending):
 | 
| 
 | 
    42             return check_pattern_get_new_name (a_file, ending, options)
 | 
| 
 | 
    43     return None
 | 
| 
 | 
    44 
 | 
| 
 | 
    45 
 | 
| 
 | 
    46 def link(a_file, new_name, path):
 | 
| 
 | 
    47     file_path = os.path.join(os.path.realpath(path), a_file)
 | 
| 
 | 
    48     sym_path = os.path.join(os.path.realpath("output"), new_name)
 | 
| 
 | 
    49     os.link(file_path, sym_path)
 | 
| 
 | 
    50 
 | 
| 
 | 
    51 
 | 
| 
 | 
    52 def decompress(a_file, new_name, path):
 | 
| 
 | 
    53     file_path = os.path.join(os.path.realpath(path), a_file)
 | 
| 
 | 
    54     target_path = os.path.join(os.path.realpath("output"), new_name)
 | 
| 
 | 
    55     with gzip.open(file_path, 'rb') as f_in, open(target_path, 'wb') as f_out:
 | 
| 
 | 
    56         shutil.copyfileobj(f_in, f_out)
 | 
| 
 | 
    57 
 | 
| 
 | 
    58 
 | 
| 
 | 
    59 def copy_and_link(path, options):
 | 
| 
1
 | 
    60     if options.decompress or options.link:
 | 
| 
 | 
    61         os.mkdir("output")
 | 
| 
0
 | 
    62     with open(options.list, 'w') as list_file:
 | 
| 
 | 
    63         files = os.listdir(path)
 | 
| 
 | 
    64         files.sort()
 | 
| 
 | 
    65         for a_file in files:
 | 
| 
 | 
    66             new_name = check_and_get_new_name(a_file, options)
 | 
| 
 | 
    67             if new_name:
 | 
| 
 | 
    68                 list_file.write(new_name)
 | 
| 
 | 
    69                 list_file.write("\n")
 | 
| 
 | 
    70                 if options.decompress:
 | 
| 
 | 
    71                     if a_file.endswith(".gz"):
 | 
| 
 | 
    72                         decompress(a_file, new_name,path)
 | 
| 
 | 
    73                     else:
 | 
| 
 | 
    74                         link(a_file, new_name, path)
 | 
| 
 | 
    75                 elif options.link:
 | 
| 
 | 
    76                     link(a_file, new_name, path)
 | 
| 
 | 
    77 
 | 
| 
 | 
    78 
 | 
| 
 | 
    79 if __name__ == '__main__':
 | 
| 
 | 
    80     parser = optparse.OptionParser()
 | 
| 
 | 
    81     parser.add_option("--path", action="store", type="string",
 | 
| 
 | 
    82                       help="Path of directory to check. ")
 | 
| 
 | 
    83     parser.add_option("--ending", action="append", type="string", dest="endings",
 | 
| 
 | 
    84                       help="Ending that can be listed and if requested linked or decompressed. ")
 | 
| 
 | 
    85     parser.add_option("--start", action="store", type="string",
 | 
| 
 | 
    86                       help="String that must be at the start of the file name ")
 | 
| 
 | 
    87     parser.add_option("--last", action="store", type="string",
 | 
| 
 | 
    88                       help="String that must be the last bit of the file name before the endings")
 | 
| 
1
 | 
    89     parser.add_option("--regex", action="store", type="string",
 | 
| 
 | 
    90                       help="Regex for file names not including the endings")
 | 
| 
0
 | 
    91     parser.add_option("--new_ending", action="store", type="string", 
 | 
| 
 | 
    92                       help="New ending to replace any previous ending in list and if required links or decompressions. Note: If not set decompression will auto remove the compressioned part of the ending")
 | 
| 
 | 
    93     parser.add_option("--list", action="store", type="string",
 | 
| 
 | 
    94                       help="Path to where all files should be listed. ")
 | 
| 
 | 
    95     parser.add_option("--link", action="store_true", default=False,
 | 
| 
 | 
    96                       help="If set will cause links to be added in output directory. ")
 | 
| 
 | 
    97     parser.add_option("--decompress", action="store_true", default=False,
 | 
| 
 | 
    98                       help="If set will cause gz files to be decompressed or if not a supported decompression ending linked.")
 | 
| 
 | 
    99     (options, args) = parser.parse_args()
 | 
| 
 | 
   100 
 | 
| 
 | 
   101 
 | 
| 
 | 
   102     path = options.path.strip()
 | 
| 
 | 
   103     if path[-1] != '/':
 | 
| 
 | 
   104         path = path + "/"
 | 
| 
 | 
   105     copy_and_link(path, options)
 | 
| 
 | 
   106 
 |