diff column_order_header_sort.py @ 0:6ae9724caf4d draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_order_header_sort commit d562cc65926c8c95af21467177b253b6ac985cb4
author iuc
date Wed, 12 Apr 2017 17:17:18 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/column_order_header_sort.py	Wed Apr 12 17:17:18 2017 -0400
@@ -0,0 +1,35 @@
+#!/usr/bin/env python
+
+import subprocess
+import sys
+
+AWK_CMD = """BEGIN{FS="%s"; OFS="%s";} {print %s;}"""
+
+input_filename = sys.argv[1]
+output_filename = sys.argv[2]
+delimiter = sys.argv[3]
+key_column = sys.argv[4]
+
+try:
+    key_column = int( key_column ) - 1
+except Exception:
+    key_column = None
+
+header = None
+with open( input_filename, 'r' ) as fh:
+    header = fh.readline().strip( '\r\n' )
+header = header.split( delimiter )
+assert len( header ) == len( set( header ) ), "Header values must be unique"
+sorted_header = list( header )
+if key_column is None:
+    columns = []
+else:
+    columns = [ key_column ]
+    sorted_header.pop( key_column )
+sorted_header.sort()
+
+for key in sorted_header:
+    columns.append( header.index( key ) )
+
+awk_cmd = AWK_CMD % ( delimiter, delimiter, ",".join( map( lambda x: "$%i" % ( x + 1 ), columns ) ) )
+sys.exit( subprocess.call( [ 'gawk', awk_cmd, input_filename ], stdout=open( output_filename, 'wb+' ), shell=False ) )