comparison resize_coordinate_window.py @ 1:0164d2edba9f draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/resize_coordinate_window commit 7aa2429d3f53a14be7e44dc6021ed3e11dc2f080
author iuc
date Tue, 16 Feb 2016 04:05:23 -0500
parents 08b6255afde7
children 541f300f322d
comparison
equal deleted inserted replaced
0:08b6255afde7 1:0164d2edba9f
1 import argparse 1 import argparse
2 import fileinput
2 import sys 3 import sys
3 4
5 # Maximum value of a signed 32 bit integer (2**31 - 1).
6 MAX_CHROM_LEN = 2147483647
4 7
5 def stop_err( msg ): 8
6 sys.stderr.write( msg ) 9 def stop_err(msg):
10 sys.stderr.write(msg)
7 sys.exit(1) 11 sys.exit(1)
8 12
9 parser = argparse.ArgumentParser() 13 parser = argparse.ArgumentParser()
10 parser.add_argument('--input', dest='input', help="Input dataset") 14 parser.add_argument('--input', dest='input', help="Input dataset")
15 parser.add_argument('--start_coordinate', dest='start_coordinate', type=int, help='Chromosome start coordinate, either 0 or 1.')
11 parser.add_argument('--subtract_from_start', dest='subtract_from_start', type=int, help='Distance to subtract from start.') 16 parser.add_argument('--subtract_from_start', dest='subtract_from_start', type=int, help='Distance to subtract from start.')
12 parser.add_argument('--add_to_end', dest='add_to_end', type=int, help='Distance to add to end.') 17 parser.add_argument('--add_to_end', dest='add_to_end', type=int, help='Distance to add to end.')
13 parser.add_argument('--extend_existing', dest='extend_existing', help='Extend existing start/end rather or from computed midpoint.') 18 parser.add_argument('--extend_existing', dest='extend_existing', help='Extend existing start/end instead of from computed midpoint.')
19 parser.add_argument('--chrom_len_file', dest='chrom_len_file', help="File names of .len files for chromosome lengths")
20 parser.add_argument('--region_boundaries', dest='region_boundaries', help="Option for handling region boundaries")
14 parser.add_argument('--output', dest='output', help="Output dataset") 21 parser.add_argument('--output', dest='output', help="Output dataset")
15 args = parser.parse_args() 22 args = parser.parse_args()
16 23
17 extend_existing = args.extend_existing == 'existing' 24 extend_existing = args.extend_existing == 'existing'
18 out = open(args.output, 'wb') 25 out = open(args.output, 'wb')
19 26
20 for line in open(args.input): 27 chrom_start = int(args.start_coordinate)
21 if line.startswith('#'): 28 chrom_lens = dict()
22 continue 29 # Determine the length of each chromosome and add it to the chrom_lens dictionary.
23 items = line.split('\t') 30 len_file_missing = False
24 if len(items) != 9: 31 len_file_error = None
25 continue 32 len_file = fileinput.FileInput(args.chrom_len_file)
26 start = int(items[3]) 33 try:
27 end = int(items[4]) 34 for line in len_file:
28 if extend_existing: 35 fields = line.split("\t")
29 start -= args.subtract_from_start 36 chrom_lens[fields[0]] = int(fields[1])
30 end += args.add_to_end 37 except Exception, e:
31 else: 38 len_file_error = str(e)
32 midpoint = (start + end) // 2 39
33 start = midpoint - args.subtract_from_start 40 with open(args.input) as fhi:
34 end = midpoint + args.add_to_end 41 for line in fhi:
35 if start < 1: 42 if line.startswith('#'):
36 out.close() 43 # Skip comments.
37 stop_err('Requested expansion places region beyond chromosome bounds.') 44 continue
38 new_line = '\t'.join([items[0], items[1], items[2], str(start), str(end), items[5], items[6], items[7], items[8]]) 45 items = line.split('\t')
39 out.write(new_line) 46 if len(items) != 9:
47 # Skip invalid gff data.
48 continue
49 chrom = items[0]
50 start = int(items[3])
51 end = int(items[4])
52 if extend_existing:
53 new_start = start - args.subtract_from_start
54 new_end = end + args.add_to_end
55 else:
56 midpoint = (start + end) // 2
57 new_start = midpoint - args.subtract_from_start
58 new_end = midpoint + args.add_to_end
59 # Check start boundary.
60 if new_start < chrom_start:
61 if args.region_boundaries == 'discard':
62 continue
63 elif args.region_boundaries == 'limit':
64 new_start = chrom_start
65 elif args.region_boundaries == 'error':
66 out.close()
67 stop_err('Requested expansion places region beyond chromosome start boundary of %d.' % chrom_start)
68 # Check end boundary.
69 chrom_len = chrom_lens.get(chrom, None)
70 if chrom_len is None:
71 len_file_missing = True
72 chrom_len = MAX_CHROM_LEN
73 if new_end > chrom_len:
74 if args.region_boundaries == 'discard':
75 continue
76 elif args.region_boundaries == 'limit':
77 new_end = chrom_len
78 elif args.region_boundaries == 'error':
79 out.close()
80 stop_err('Requested expansion places region beyond chromosome end boundary of %d.' % chrom_len)
81 new_line = '\t'.join([chrom, items[1], items[2], str(new_start), str(new_end), items[5], items[6], items[7], items[8]])
82 out.write(new_line)
40 out.close() 83 out.close()
41 84
85 if len_file_error is not None:
86 print "All chrom lengths set to %d, error in chrom len file: %s" % (MAX_CHROM_LEN, len_file_error)
87 if len_file_missing:
88 print "All chrom lengths set to %d, chrom len files are not installed." % MAX_CHROM_LEN