| 0 | 1 #!/usr/bin/env python | 
|  | 2 | 
|  | 3 """ | 
|  | 4 Split into windows. | 
|  | 5 | 
|  | 6 usage: %prog input size out_file | 
|  | 7    -l, --cols=N,N,N,N: Columns for chrom, start, end, strand in file | 
|  | 8 """ | 
|  | 9 | 
|  | 10 import sys | 
|  | 11 | 
|  | 12 from galaxy import eggs | 
|  | 13 import pkg_resources | 
|  | 14 pkg_resources.require( "bx-python" ) | 
|  | 15 from bx.cookbook import doc_optparse | 
|  | 16 from galaxy.tools.util.galaxyops import * | 
|  | 17 | 
|  | 18 def stop_err( msg ): | 
|  | 19     sys.stderr.write( msg ) | 
|  | 20     sys.exit() | 
|  | 21 | 
|  | 22 | 
|  | 23 def main(): | 
|  | 24     # Parsing Command Line here | 
|  | 25     options, args = doc_optparse.parse( __doc__ ) | 
|  | 26 | 
|  | 27     try: | 
|  | 28         chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols ) | 
|  | 29         inp_file, winsize, out_file, makesliding, offset = args | 
|  | 30         winsize = int(winsize) | 
|  | 31         offset = int(offset) | 
|  | 32         makesliding = int(makesliding) | 
|  | 33     except: | 
|  | 34         stop_err( "Data issue, click the pencil icon in the history item to correct the metadata attributes of the input dataset." ) | 
|  | 35 | 
|  | 36     fo = open(out_file,'w') | 
|  | 37 | 
|  | 38     skipped_lines = 0 | 
|  | 39     first_invalid_line = 0 | 
|  | 40     invalid_line = None | 
|  | 41     if offset == 0: | 
|  | 42         makesliding = 0 | 
|  | 43 | 
|  | 44     for i, line in enumerate( file( inp_file ) ): | 
|  | 45         line = line.strip() | 
|  | 46         if line and line[0:1] != "#": | 
|  | 47             try: | 
|  | 48                 elems = line.split('\t') | 
|  | 49                 start = int(elems[start_col_1]) | 
|  | 50                 end = int(elems[end_col_1]) | 
|  | 51                 if makesliding == 0: | 
|  | 52                     numwin = (end - start)/winsize | 
|  | 53                 else: | 
|  | 54                     numwin = (end - start)/offset | 
|  | 55                 if numwin > 0: | 
|  | 56                     for win in range(numwin): | 
|  | 57                         elems_1 = elems | 
|  | 58                         elems_1[start_col_1] = str(start) | 
|  | 59                         elems_1[end_col_1] = str(start + winsize) | 
|  | 60                         fo.write( "%s\n" % '\t'.join( elems_1 ) ) | 
|  | 61                         if makesliding == 0: | 
|  | 62                             start = start + winsize | 
|  | 63                         else: | 
|  | 64                             start = start + offset | 
|  | 65                             if start+winsize > end: | 
|  | 66                                 break | 
|  | 67             except: | 
|  | 68                 skipped_lines += 1 | 
|  | 69                 if not invalid_line: | 
|  | 70                     first_invalid_line = i + 1 | 
|  | 71                     invalid_line = line | 
|  | 72 | 
|  | 73     fo.close() | 
|  | 74 | 
|  | 75     if makesliding == 1: | 
|  | 76         print 'Window size=%d, Sliding=Yes, Offset=%d' % ( winsize, offset ) | 
|  | 77     else: | 
|  | 78         print 'Window size=%d, Sliding=No' % (winsize) | 
|  | 79     if skipped_lines > 0: | 
|  | 80         print 'Skipped %d invalid lines starting with #%d: "%s"' % ( skipped_lines, first_invalid_line, invalid_line ) | 
|  | 81 | 
|  | 82 | 
|  | 83 if __name__ == "__main__": | 
|  | 84     main() |