Mercurial > repos > lgueguen > sartools
diff pre_sartools.py @ 0:581d217c7337 draft
Planemo upload
author | lgueguen |
---|---|
date | Fri, 22 Jul 2016 05:39:13 -0400 |
parents | |
children | fe0ee346b17d |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pre_sartools.py Fri Jul 22 05:39:13 2016 -0400 @@ -0,0 +1,74 @@ +#!/usr/bin/env python +#Author: Coline Billerey + +from os.path import basename, join +from os import getcwd, system +import argparse +from shutil import copyfile +import tempfile +import csv + +def __main__(): + + parser = argparse.ArgumentParser() + parser.add_argument('--batch') + parser.add_argument('--inputs', action='append' ,nargs='*') + parser.add_argument('--outfile') + parser.add_argument('--outarch') + args = parser.parse_args() + + batch=args.batch + outfile=args.outfile + outarch=args.outarch + inputs=args.inputs + counts_files = open( outfile, 'w' ) + + working_directory = getcwd() + file_zip=working_directory+"/counts.zip" + + + zip_cmd='zip -j %s' % (file_zip) + if batch and batch!="NULL": + counts_files.write("label\tfiles\tgroup\tbatch\n") + for (level, filename, label, batch_name ) in inputs: + filename_base = basename(filename) + # For RSEM files we process files as HTSeq count output + tmpdir = tempfile.mkdtemp() + with open(filename, 'rb') as csvfile: + with open(join(tmpdir, basename(filename)), 'wb') as out: + spamwriter = csv.writer(out, delimiter='\t') + reader = csv.DictReader(csvfile, delimiter='\t', skipinitialspace=True) + if len(reader.fieldnames) > 2: + for row in reader: + spamwriter.writerow((row['gene_id'], int(float(row['effective_length'])))) + zip_cmd += ' %s ' % (join(tmpdir, basename(filename))) + else : + zip_cmd += ' %s ' % (filename) + counts_files.write( label + "\t" + filename_base + "\t" + level + "\t" + batch_name + "\n" ) + else : + counts_files.write("label\tfiles\tgroup\n") + for (level, filename, label) in inputs: + filename_base = basename(filename) + # For RSEM files we process files as HTSeq count output + tmpdir = tempfile.mkdtemp() + with open(filename, 'rb') as csvfile: + with open(join(tmpdir, basename(filename)), 'wb') as out: + spamwriter = csv.writer(out, delimiter='\t') + reader = csv.DictReader(csvfile, delimiter='\t', skipinitialspace=True) + if len(reader.fieldnames) > 2: + for row in reader: + spamwriter.writerow((row['gene_id'], int(float(row['effective_length'])))) + zip_cmd += ' %s ' % (join(tmpdir, basename(filename))) + else: + zip_cmd += ' %s ' % (filename) + + counts_files.write( label + "\t" + filename_base + "\t" + level + "\n" ) + + counts_files.close() + system(zip_cmd) + copyfile(file_zip,outarch) + + + +if __name__=="__main__": + __main__()