diff resize_coordinate_window.py @ 1:0164d2edba9f draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/resize_coordinate_window commit 7aa2429d3f53a14be7e44dc6021ed3e11dc2f080
author iuc
date Tue, 16 Feb 2016 04:05:23 -0500
parents 08b6255afde7
children 541f300f322d
line wrap: on
line diff
--- a/resize_coordinate_window.py	Tue Jan 19 09:34:56 2016 -0500
+++ b/resize_coordinate_window.py	Tue Feb 16 04:05:23 2016 -0500
@@ -1,41 +1,88 @@
 import argparse
+import fileinput
 import sys
 
+# Maximum value of a signed 32 bit integer (2**31 - 1).
+MAX_CHROM_LEN = 2147483647
 
-def stop_err( msg ):
-    sys.stderr.write( msg )
+
+def stop_err(msg):
+    sys.stderr.write(msg)
     sys.exit(1)
 
 parser = argparse.ArgumentParser()
 parser.add_argument('--input', dest='input', help="Input dataset")
+parser.add_argument('--start_coordinate', dest='start_coordinate', type=int, help='Chromosome start coordinate, either 0 or 1.')
 parser.add_argument('--subtract_from_start', dest='subtract_from_start', type=int, help='Distance to subtract from start.')
 parser.add_argument('--add_to_end', dest='add_to_end', type=int, help='Distance to add to end.')
-parser.add_argument('--extend_existing', dest='extend_existing', help='Extend existing start/end rather or from computed midpoint.')
+parser.add_argument('--extend_existing', dest='extend_existing', help='Extend existing start/end instead of from computed midpoint.')
+parser.add_argument('--chrom_len_file', dest='chrom_len_file', help="File names of .len files for chromosome lengths")
+parser.add_argument('--region_boundaries', dest='region_boundaries', help="Option for handling region boundaries")
 parser.add_argument('--output', dest='output', help="Output dataset")
 args = parser.parse_args()
 
 extend_existing = args.extend_existing == 'existing'
 out = open(args.output, 'wb')
 
-for line in open(args.input):
-    if line.startswith('#'):
-        continue
-    items = line.split('\t')
-    if len(items) != 9:
-        continue
-    start = int(items[3])
-    end = int(items[4])
-    if extend_existing:
-        start -= args.subtract_from_start
-        end += args.add_to_end
-    else:
-        midpoint = (start + end) // 2
-        start = midpoint - args.subtract_from_start
-        end = midpoint + args.add_to_end
-    if start < 1:
-        out.close()
-        stop_err('Requested expansion places region beyond chromosome bounds.')
-    new_line = '\t'.join([items[0], items[1], items[2], str(start), str(end), items[5], items[6], items[7], items[8]])
-    out.write(new_line)
+chrom_start = int(args.start_coordinate)
+chrom_lens = dict()
+# Determine the length of each chromosome and add it to the chrom_lens dictionary.
+len_file_missing = False
+len_file_error = None
+len_file = fileinput.FileInput(args.chrom_len_file)
+try:
+    for line in len_file:
+        fields = line.split("\t")
+        chrom_lens[fields[0]] = int(fields[1])
+except Exception, e:
+    len_file_error = str(e)
+
+with open(args.input) as fhi:
+    for line in fhi:
+        if line.startswith('#'):
+            # Skip comments.
+            continue
+        items = line.split('\t')
+        if len(items) != 9:
+            # Skip invalid gff data.
+            continue
+        chrom = items[0]
+        start = int(items[3])
+        end = int(items[4])
+        if extend_existing:
+            new_start = start - args.subtract_from_start
+            new_end = end + args.add_to_end
+        else:
+            midpoint = (start + end) // 2
+            new_start = midpoint - args.subtract_from_start
+            new_end = midpoint + args.add_to_end
+        # Check start boundary.
+        if new_start < chrom_start:
+            if args.region_boundaries == 'discard':
+                continue
+            elif args.region_boundaries == 'limit':
+                new_start = chrom_start
+            elif args.region_boundaries == 'error':
+                out.close()
+                stop_err('Requested expansion places region beyond chromosome start boundary of %d.' % chrom_start)
+        # Check end boundary.
+        chrom_len = chrom_lens.get(chrom, None)
+        if chrom_len is None:
+            len_file_missing = True
+            chrom_len = MAX_CHROM_LEN
+        if new_end > chrom_len:
+            if args.region_boundaries == 'discard':
+                continue
+            elif args.region_boundaries == 'limit':
+                new_end = chrom_len
+            elif args.region_boundaries == 'error':
+                out.close()
+                stop_err('Requested expansion places region beyond chromosome end boundary of %d.' % chrom_len)
+        new_line = '\t'.join([chrom, items[1], items[2], str(new_start), str(new_end), items[5], items[6], items[7], items[8]])
+        out.write(new_line)
 out.close()
 
+if len_file_error is not None:
+    print "All chrom lengths set to %d, error in chrom len file: %s" % (MAX_CHROM_LEN, len_file_error)
+if len_file_missing:
+    print "All chrom lengths set to %d, chrom len files are not installed." % MAX_CHROM_LEN