# HG changeset patch # User r-lannes # Date 1450273506 18000 # Node ID 93f267c26057374645fecdd926bb920cf5ad4605 Uploaded diff -r 000000000000 -r 93f267c26057 cut_fr.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cut_fr.py Wed Dec 16 08:45:06 2015 -0500 @@ -0,0 +1,93 @@ +#!usr/bin/pyhton3 + ''' + Copyright (C) <2015> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see .''' + +#-------- import +import os +import argparse +import sys + +#------- argument +parser = argparse.ArgumentParser(description = "renvoie le fichier inputs moins les collones sellectionner") +parser.add_argument('--c', help = "collumn to be removed" ) +parser.add_argument('--i', help = "input datasets" ) +parser.add_argument('--type_cut', help = "type of cut" ) +parser.add_argument('--extend_print', help = "add sdtrout print" ) +parser.add_argument('--o', help = "output datasets" ) + +args = parser.parse_args() + + +if(args.type_cut == "exclude"): + #------- First step count number of column + dataset_initial = open(args.i,'r') + firstline_dataset = dataset_initial.readline() + + column_header = firstline_dataset.split('\t') + number_column = len(column_header) + + # Collumn are in foramt c1,c2,c3 + # need to get them + column_list = args.c + column_list = column_list.split(',') + + # remove the letter c + # there is certainly a more convenient way to do it + for i in range(len(column_list)): + column_list[i] = int(column_list[i][-1]) + + # Second Step prepare the command to be printed + str_argument = "-f" + for i in range( 1,number_column +1 ,1 ): + if i not in column_list: + str_argument = str_argument + str(i) + ',' + str_argument = str_argument.strip(',') + + # Create the command to be launch + str_cut = "cut " + str_argument + " " + args.i +" >"+ args.o + +if(args.type_cut == "include"): + # Collumn are in foramt c1,c2,c3 + # need to get them + column_list = args.c + column_list = column_list.split(',') + # remove the letter c + # there is certainly a more convenient way to do it + for i in range(len(column_list)): + column_list[i] = int(column_list[i][-1]) + # Second Step prepare the command to be printed + str_argument = "-f" + for indice in column_list : + str_argument = str_argument + str(indice) + ',' + str_argument = str_argument.strip(',') + # Create the command to be launch + str_cut = "cut " + str_argument + " " + args.i +" >"+ args.o + +# launch the command +result = os.system( str_cut ) +if (args.extend_print == "True"): + sys.stdout.write("input dataset : " + args.i + '\n') + sys.stdout.write("column selected : " + args.c + '\n') + sys.stdout.write("cut_type : " + args.type_cut+'\n') + sys.stdout.write("output dataset : " + args.o+'\n') + sys.stdout.write("command launch : " + str_cut+'\n' ) + + + + + + +