Mercurial > repos > xuebing > sharplabtool
diff tools/regVariation/windowSplitter.py @ 0:9071e359b9a3
Uploaded
author | xuebing |
---|---|
date | Fri, 09 Mar 2012 19:37:19 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/regVariation/windowSplitter.py Fri Mar 09 19:37:19 2012 -0500 @@ -0,0 +1,85 @@ +#!/usr/bin/env python + +""" +Split into windows. + +usage: %prog input size out_file + -l, --cols=N,N,N,N: Columns for chrom, start, end, strand in file +""" + +import sys, re, os + +from galaxy import eggs +import pkg_resources; pkg_resources.require( "bx-python" ) +from bx.cookbook import doc_optparse +from galaxy.tools.util.galaxyops import * + +def stop_err( msg ): + sys.stderr.write( msg ) + sys.exit() + +def main(): + # Parsing Command Line here + options, args = doc_optparse.parse( __doc__ ) + + try: + chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols ) + inp_file, winsize, out_file, makesliding, offset = args + winsize = int(winsize) + offset = int(offset) + makesliding = int(makesliding) + if strand_col_1 <= 0: + strand = "+" #if strand is not defined, default it to + + except: + stop_err( "Data issue, click the pencil icon in the history item to correct the metadata attributes of the input dataset." ) + + fo = open(out_file,'w') + + skipped_lines = 0 + first_invalid_line = 0 + invalid_line = None + if offset == 0: + makesliding = 0 + + for i, line in enumerate( file( inp_file ) ): + line = line.strip() + if line and line[0:1] != "#": + try: + elems = line.split('\t') + if strand_col_1 != -1: + strand = elems[strand_col_1] + start = int(elems[start_col_1]) + end = int(elems[end_col_1]) + if makesliding == 0: + numwin = (end - start)/winsize + else: + numwin = (end - start)/offset + if numwin > 0: + for win in range(numwin): + elems_1 = elems + elems_1[start_col_1] = str(start) + elems_1[end_col_1] = str(start + winsize) + fo.write( "%s\n" % '\t'.join( elems_1 ) ) + if makesliding == 0: + start = start + winsize + else: + start = start + offset + if start+winsize > end: + break + except: + skipped_lines += 1 + if not invalid_line: + first_invalid_line = i + 1 + invalid_line = line + + fo.close() + + if makesliding == 1: + print 'Window size=%d, Sliding=Yes, Offset=%d' %(winsize, offset) + else: + print 'Window size=%d, Sliding=No' %(winsize) + if skipped_lines > 0: + print 'Skipped %d invalid lines starting with #%d: "%s"' % ( skipped_lines, first_invalid_line, invalid_line ) + +if __name__ == "__main__": + main()