diff build_ucsc_custom_track.py @ 2:3d87079756e1 draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
author devteam
date Mon, 28 Feb 2022 20:06:04 +0000
parents 618e56c3109b
children
line wrap: on
line diff
--- a/build_ucsc_custom_track.py	Fri Oct 09 17:18:58 2015 -0400
+++ b/build_ucsc_custom_track.py	Mon Feb 28 20:06:04 2022 +0000
@@ -3,97 +3,86 @@
 Build a UCSC genome browser custom track file
 """
 
-import sys, os
+import sys
 
-assert sys.version_info[:2] >= ( 2, 4 )
+FILE_TYPE_TO_TRACK_TYPE = {'bed': None, 'bedstrict': None, 'bed6': None, 'bed12': None, 'bedgraph': 'bedGraph', 'wig': 'wiggle_0'}
+CHUNK_SIZE = 2**20  # 1 mb
 
-def stop_err( msg ):
-    sys.stderr.write( msg )
-    sys.exit()
 
-FILE_TYPE_TO_TRACK_TYPE = { 'bed': None, 'bedstrict': None, 'bed6': None, 'bed12': None, 'bedgraph':'bedGraph', 'wig':'wiggle_0' }
-CHUNK_SIZE = 2**20 #1mb
-
-def get_track_line_is_interval( file_type, name, description, color, visibility  ):
+def get_track_line_is_interval(file_type, name, description, color, visibility):
     if file_type in FILE_TYPE_TO_TRACK_TYPE:
-        track_type = FILE_TYPE_TO_TRACK_TYPE[ file_type ]
+        track_type = FILE_TYPE_TO_TRACK_TYPE[file_type]
         is_interval = False
     else:
         track_type = None
         is_interval = True
     track_line = 'track '
     if track_type:
-        track_line += 'type=%s ' % ( track_type )
-    track_line += 'name="%s" description="%s" color=%s visibility=%s\n' % ( name, description, color, visibility )
+        track_line += f"type={track_type} "
+    track_line += f'name="{name}" description="{description}" color={color} visibility={visibility}\n'
     return track_line, is_interval
 
-args = sys.argv[1:]
-
-out_fname = args.pop(0)
-out = open( out_fname, "w" )
 
 num_tracks = 0
 skipped_lines = 0
 first_invalid_line = 0
-while args:
-    # Suck in one dataset worth of arguments
-    in_fname = args.pop(0)
-    file_type = args.pop(0)
-    colspec = args.pop(0)
-    name = args.pop(0)
-    description = args.pop(0)
-    color = args.pop(0).replace( '-', ',' )
-    visibility = args.pop(0)
-    track_line, is_interval = get_track_line_is_interval( file_type, name, description, color, visibility  )
-    # Do the work
-    in_file = open( in_fname )
-    out.write( track_line )
-    if not is_interval:
-        while True:
-            chunk = in_file.read( CHUNK_SIZE )
-            if chunk:
-                out.write( chunk )
+args = sys.argv[1:]
+out_fname = args.pop(0)
+with open(out_fname, "w") as out:
+    while args:
+        # Suck in one dataset worth of arguments
+        in_fname = args.pop(0)
+        file_type = args.pop(0)
+        colspec = args.pop(0)
+        name = args.pop(0)
+        description = args.pop(0)
+        color = args.pop(0).replace('-', ',')
+        visibility = args.pop(0)
+        track_line, is_interval = get_track_line_is_interval(file_type, name, description, color, visibility)
+        # Do the work
+        out.write(track_line)
+        with open(in_fname) as in_file:
+            if not is_interval:
+                while True:
+                    chunk = in_file.read(CHUNK_SIZE)
+                    if chunk:
+                        out.write(chunk)
+                    else:
+                        break
             else:
-                break
-    else:
-        # Assume type is interval (don't pass this script anything else!)
-        try:
-            c, s, e, st = [ int( x ) - 1 for x in colspec.split( "," ) ]
-        except:
-            try:
-                c, s, e = [ int( x ) - 1 for x in colspec.split( "," )[:3] ]
-                st = -1    #strand column is absent
-            except:
-                stop_err( "Columns in interval file invalid for UCSC custom track." )
-        
-        i = 0
-        for i, line in enumerate( in_file ):
-            line = line.rstrip( '\r\n' )
-            if line and not line.startswith( '#' ):
-                fields = line.split( "\t" )
-                if st > 0:
-                    #strand column is present
+                # Assume type is interval (don't pass this script anything else!)
+                try:
+                    c, s, e, st = (int(x) - 1 for x in colspec.split(","))
+                except ValueError:
                     try:
-                        out.write( "%s\t%s\t%s\t%d\t0\t%s\n" % ( fields[c], fields[s], fields[e], i, fields[st] ) )
-                    except:
-                        skipped_lines += 1
-                        if not first_invalid_line:
-                            first_invalid_line = i+1
-                else:
-                    try:
-                       out.write( "%s\t%s\t%s\n" % ( fields[c], fields[s], fields[e] ) )
-                    except:
-                        skipped_lines += 1
-                        if not first_invalid_line:
-                            first_invalid_line = i+1
-    out.write( "\n" ) #separating newline
-    num_tracks += 1
-    
-out.close()
+                        c, s, e = (int(x) - 1 for x in colspec.split(",")[:3])
+                        st = -1  # strand column is absent
+                    except Exception:
+                        sys.exit("Columns in interval file invalid for UCSC custom track.")
 
-print "Generated a custom track containing %d subtracks." % num_tracks
+                i = 0
+                for i, line in enumerate(in_file):
+                    line = line.rstrip('\r\n')
+                    if line and not line.startswith('#'):
+                        fields = line.split("\t")
+                        if st > 0:
+                            # strand column is present
+                            try:
+                                out.write(f"{fields[c]}\t{fields[s]}\t{fields[e]}\t{i}\t0\t{fields[st]}\n")
+                            except Exception:
+                                skipped_lines += 1
+                                if not first_invalid_line:
+                                    first_invalid_line = i + 1
+                        else:
+                            try:
+                                out.write(f"{fields[c]}\t{fields[s]}\t{fields[e]}\n")
+                            except Exception:
+                                skipped_lines += 1
+                                if not first_invalid_line:
+                                    first_invalid_line = i + 1
+        out.write("\n")  # separating newline
+        num_tracks += 1
+
+print(f"Generated a custom track containing {num_tracks} subtracks.")
 if skipped_lines:
-    print "Skipped %d invalid lines starting at #%d" % ( skipped_lines, first_invalid_line )
-
-
-
+    print(f"Skipped {skipped_lines} invalid lines starting at #{first_invalid_line}")