Mercurial > repos > r-lannes > cut_include_exclude
view cut_fr.py @ 1:59bd6cbbeb92 draft
Uploaded
author | r-lannes |
---|---|
date | Wed, 16 Dec 2015 08:46:07 -0500 |
parents | 93f267c26057 |
children |
line wrap: on
line source
#!usr/bin/pyhton3 '''<cut_fr.py is script allwing more convenient use of cut in galaxy environment> Copyright (C) <2015> <Lannes Romain> This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>.''' #-------- import import os import argparse import sys #------- argument parser = argparse.ArgumentParser(description = "renvoie le fichier inputs moins les collones sellectionner") parser.add_argument('--c', help = "collumn to be removed" ) parser.add_argument('--i', help = "input datasets" ) parser.add_argument('--type_cut', help = "type of cut" ) parser.add_argument('--extend_print', help = "add sdtrout print" ) parser.add_argument('--o', help = "output datasets" ) args = parser.parse_args() if(args.type_cut == "exclude"): #------- First step count number of column dataset_initial = open(args.i,'r') firstline_dataset = dataset_initial.readline() column_header = firstline_dataset.split('\t') number_column = len(column_header) # Collumn are in foramt c1,c2,c3 # need to get them column_list = args.c column_list = column_list.split(',') # remove the letter c # there is certainly a more convenient way to do it for i in range(len(column_list)): column_list[i] = int(column_list[i][-1]) # Second Step prepare the command to be printed str_argument = "-f" for i in range( 1,number_column +1 ,1 ): if i not in column_list: str_argument = str_argument + str(i) + ',' str_argument = str_argument.strip(',') # Create the command to be launch str_cut = "cut " + str_argument + " " + args.i +" >"+ args.o if(args.type_cut == "include"): # Collumn are in foramt c1,c2,c3 # need to get them column_list = args.c column_list = column_list.split(',') # remove the letter c # there is certainly a more convenient way to do it for i in range(len(column_list)): column_list[i] = int(column_list[i][-1]) # Second Step prepare the command to be printed str_argument = "-f" for indice in column_list : str_argument = str_argument + str(indice) + ',' str_argument = str_argument.strip(',') # Create the command to be launch str_cut = "cut " + str_argument + " " + args.i +" >"+ args.o # launch the command result = os.system( str_cut ) if (args.extend_print == "True"): sys.stdout.write("input dataset : " + args.i + '\n') sys.stdout.write("column selected : " + args.c + '\n') sys.stdout.write("cut_type : " + args.type_cut+'\n') sys.stdout.write("output dataset : " + args.o+'\n') sys.stdout.write("command launch : " + str_cut+'\n' )