Mercurial > repos > iuc > resize_coordinate_window
comparison resize_coordinate_window.py @ 1:0164d2edba9f draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/resize_coordinate_window commit 7aa2429d3f53a14be7e44dc6021ed3e11dc2f080
| author | iuc |
|---|---|
| date | Tue, 16 Feb 2016 04:05:23 -0500 |
| parents | 08b6255afde7 |
| children | 541f300f322d |
comparison
equal
deleted
inserted
replaced
| 0:08b6255afde7 | 1:0164d2edba9f |
|---|---|
| 1 import argparse | 1 import argparse |
| 2 import fileinput | |
| 2 import sys | 3 import sys |
| 3 | 4 |
| 5 # Maximum value of a signed 32 bit integer (2**31 - 1). | |
| 6 MAX_CHROM_LEN = 2147483647 | |
| 4 | 7 |
| 5 def stop_err( msg ): | 8 |
| 6 sys.stderr.write( msg ) | 9 def stop_err(msg): |
| 10 sys.stderr.write(msg) | |
| 7 sys.exit(1) | 11 sys.exit(1) |
| 8 | 12 |
| 9 parser = argparse.ArgumentParser() | 13 parser = argparse.ArgumentParser() |
| 10 parser.add_argument('--input', dest='input', help="Input dataset") | 14 parser.add_argument('--input', dest='input', help="Input dataset") |
| 15 parser.add_argument('--start_coordinate', dest='start_coordinate', type=int, help='Chromosome start coordinate, either 0 or 1.') | |
| 11 parser.add_argument('--subtract_from_start', dest='subtract_from_start', type=int, help='Distance to subtract from start.') | 16 parser.add_argument('--subtract_from_start', dest='subtract_from_start', type=int, help='Distance to subtract from start.') |
| 12 parser.add_argument('--add_to_end', dest='add_to_end', type=int, help='Distance to add to end.') | 17 parser.add_argument('--add_to_end', dest='add_to_end', type=int, help='Distance to add to end.') |
| 13 parser.add_argument('--extend_existing', dest='extend_existing', help='Extend existing start/end rather or from computed midpoint.') | 18 parser.add_argument('--extend_existing', dest='extend_existing', help='Extend existing start/end instead of from computed midpoint.') |
| 19 parser.add_argument('--chrom_len_file', dest='chrom_len_file', help="File names of .len files for chromosome lengths") | |
| 20 parser.add_argument('--region_boundaries', dest='region_boundaries', help="Option for handling region boundaries") | |
| 14 parser.add_argument('--output', dest='output', help="Output dataset") | 21 parser.add_argument('--output', dest='output', help="Output dataset") |
| 15 args = parser.parse_args() | 22 args = parser.parse_args() |
| 16 | 23 |
| 17 extend_existing = args.extend_existing == 'existing' | 24 extend_existing = args.extend_existing == 'existing' |
| 18 out = open(args.output, 'wb') | 25 out = open(args.output, 'wb') |
| 19 | 26 |
| 20 for line in open(args.input): | 27 chrom_start = int(args.start_coordinate) |
| 21 if line.startswith('#'): | 28 chrom_lens = dict() |
| 22 continue | 29 # Determine the length of each chromosome and add it to the chrom_lens dictionary. |
| 23 items = line.split('\t') | 30 len_file_missing = False |
| 24 if len(items) != 9: | 31 len_file_error = None |
| 25 continue | 32 len_file = fileinput.FileInput(args.chrom_len_file) |
| 26 start = int(items[3]) | 33 try: |
| 27 end = int(items[4]) | 34 for line in len_file: |
| 28 if extend_existing: | 35 fields = line.split("\t") |
| 29 start -= args.subtract_from_start | 36 chrom_lens[fields[0]] = int(fields[1]) |
| 30 end += args.add_to_end | 37 except Exception, e: |
| 31 else: | 38 len_file_error = str(e) |
| 32 midpoint = (start + end) // 2 | 39 |
| 33 start = midpoint - args.subtract_from_start | 40 with open(args.input) as fhi: |
| 34 end = midpoint + args.add_to_end | 41 for line in fhi: |
| 35 if start < 1: | 42 if line.startswith('#'): |
| 36 out.close() | 43 # Skip comments. |
| 37 stop_err('Requested expansion places region beyond chromosome bounds.') | 44 continue |
| 38 new_line = '\t'.join([items[0], items[1], items[2], str(start), str(end), items[5], items[6], items[7], items[8]]) | 45 items = line.split('\t') |
| 39 out.write(new_line) | 46 if len(items) != 9: |
| 47 # Skip invalid gff data. | |
| 48 continue | |
| 49 chrom = items[0] | |
| 50 start = int(items[3]) | |
| 51 end = int(items[4]) | |
| 52 if extend_existing: | |
| 53 new_start = start - args.subtract_from_start | |
| 54 new_end = end + args.add_to_end | |
| 55 else: | |
| 56 midpoint = (start + end) // 2 | |
| 57 new_start = midpoint - args.subtract_from_start | |
| 58 new_end = midpoint + args.add_to_end | |
| 59 # Check start boundary. | |
| 60 if new_start < chrom_start: | |
| 61 if args.region_boundaries == 'discard': | |
| 62 continue | |
| 63 elif args.region_boundaries == 'limit': | |
| 64 new_start = chrom_start | |
| 65 elif args.region_boundaries == 'error': | |
| 66 out.close() | |
| 67 stop_err('Requested expansion places region beyond chromosome start boundary of %d.' % chrom_start) | |
| 68 # Check end boundary. | |
| 69 chrom_len = chrom_lens.get(chrom, None) | |
| 70 if chrom_len is None: | |
| 71 len_file_missing = True | |
| 72 chrom_len = MAX_CHROM_LEN | |
| 73 if new_end > chrom_len: | |
| 74 if args.region_boundaries == 'discard': | |
| 75 continue | |
| 76 elif args.region_boundaries == 'limit': | |
| 77 new_end = chrom_len | |
| 78 elif args.region_boundaries == 'error': | |
| 79 out.close() | |
| 80 stop_err('Requested expansion places region beyond chromosome end boundary of %d.' % chrom_len) | |
| 81 new_line = '\t'.join([chrom, items[1], items[2], str(new_start), str(new_end), items[5], items[6], items[7], items[8]]) | |
| 82 out.write(new_line) | |
| 40 out.close() | 83 out.close() |
| 41 | 84 |
| 85 if len_file_error is not None: | |
| 86 print "All chrom lengths set to %d, error in chrom len file: %s" % (MAX_CHROM_LEN, len_file_error) | |
| 87 if len_file_missing: | |
| 88 print "All chrom lengths set to %d, chrom len files are not installed." % MAX_CHROM_LEN |
