Mercurial > repos > xuebing > sharplabtool
comparison tools/regVariation/windowSplitter.py @ 0:9071e359b9a3
Uploaded
| author | xuebing |
|---|---|
| date | Fri, 09 Mar 2012 19:37:19 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:9071e359b9a3 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 """ | |
| 4 Split into windows. | |
| 5 | |
| 6 usage: %prog input size out_file | |
| 7 -l, --cols=N,N,N,N: Columns for chrom, start, end, strand in file | |
| 8 """ | |
| 9 | |
| 10 import sys, re, os | |
| 11 | |
| 12 from galaxy import eggs | |
| 13 import pkg_resources; pkg_resources.require( "bx-python" ) | |
| 14 from bx.cookbook import doc_optparse | |
| 15 from galaxy.tools.util.galaxyops import * | |
| 16 | |
| 17 def stop_err( msg ): | |
| 18 sys.stderr.write( msg ) | |
| 19 sys.exit() | |
| 20 | |
| 21 def main(): | |
| 22 # Parsing Command Line here | |
| 23 options, args = doc_optparse.parse( __doc__ ) | |
| 24 | |
| 25 try: | |
| 26 chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols ) | |
| 27 inp_file, winsize, out_file, makesliding, offset = args | |
| 28 winsize = int(winsize) | |
| 29 offset = int(offset) | |
| 30 makesliding = int(makesliding) | |
| 31 if strand_col_1 <= 0: | |
| 32 strand = "+" #if strand is not defined, default it to + | |
| 33 except: | |
| 34 stop_err( "Data issue, click the pencil icon in the history item to correct the metadata attributes of the input dataset." ) | |
| 35 | |
| 36 fo = open(out_file,'w') | |
| 37 | |
| 38 skipped_lines = 0 | |
| 39 first_invalid_line = 0 | |
| 40 invalid_line = None | |
| 41 if offset == 0: | |
| 42 makesliding = 0 | |
| 43 | |
| 44 for i, line in enumerate( file( inp_file ) ): | |
| 45 line = line.strip() | |
| 46 if line and line[0:1] != "#": | |
| 47 try: | |
| 48 elems = line.split('\t') | |
| 49 if strand_col_1 != -1: | |
| 50 strand = elems[strand_col_1] | |
| 51 start = int(elems[start_col_1]) | |
| 52 end = int(elems[end_col_1]) | |
| 53 if makesliding == 0: | |
| 54 numwin = (end - start)/winsize | |
| 55 else: | |
| 56 numwin = (end - start)/offset | |
| 57 if numwin > 0: | |
| 58 for win in range(numwin): | |
| 59 elems_1 = elems | |
| 60 elems_1[start_col_1] = str(start) | |
| 61 elems_1[end_col_1] = str(start + winsize) | |
| 62 fo.write( "%s\n" % '\t'.join( elems_1 ) ) | |
| 63 if makesliding == 0: | |
| 64 start = start + winsize | |
| 65 else: | |
| 66 start = start + offset | |
| 67 if start+winsize > end: | |
| 68 break | |
| 69 except: | |
| 70 skipped_lines += 1 | |
| 71 if not invalid_line: | |
| 72 first_invalid_line = i + 1 | |
| 73 invalid_line = line | |
| 74 | |
| 75 fo.close() | |
| 76 | |
| 77 if makesliding == 1: | |
| 78 print 'Window size=%d, Sliding=Yes, Offset=%d' %(winsize, offset) | |
| 79 else: | |
| 80 print 'Window size=%d, Sliding=No' %(winsize) | |
| 81 if skipped_lines > 0: | |
| 82 print 'Skipped %d invalid lines starting with #%d: "%s"' % ( skipped_lines, first_invalid_line, invalid_line ) | |
| 83 | |
| 84 if __name__ == "__main__": | |
| 85 main() |
