Mercurial > repos > lgueguen > sartools
comparison pre_sartools.py @ 0:581d217c7337 draft
Planemo upload
author | lgueguen |
---|---|
date | Fri, 22 Jul 2016 05:39:13 -0400 |
parents | |
children | fe0ee346b17d |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:581d217c7337 |
---|---|
1 #!/usr/bin/env python | |
2 #Author: Coline Billerey | |
3 | |
4 from os.path import basename, join | |
5 from os import getcwd, system | |
6 import argparse | |
7 from shutil import copyfile | |
8 import tempfile | |
9 import csv | |
10 | |
11 def __main__(): | |
12 | |
13 parser = argparse.ArgumentParser() | |
14 parser.add_argument('--batch') | |
15 parser.add_argument('--inputs', action='append' ,nargs='*') | |
16 parser.add_argument('--outfile') | |
17 parser.add_argument('--outarch') | |
18 args = parser.parse_args() | |
19 | |
20 batch=args.batch | |
21 outfile=args.outfile | |
22 outarch=args.outarch | |
23 inputs=args.inputs | |
24 counts_files = open( outfile, 'w' ) | |
25 | |
26 working_directory = getcwd() | |
27 file_zip=working_directory+"/counts.zip" | |
28 | |
29 | |
30 zip_cmd='zip -j %s' % (file_zip) | |
31 if batch and batch!="NULL": | |
32 counts_files.write("label\tfiles\tgroup\tbatch\n") | |
33 for (level, filename, label, batch_name ) in inputs: | |
34 filename_base = basename(filename) | |
35 # For RSEM files we process files as HTSeq count output | |
36 tmpdir = tempfile.mkdtemp() | |
37 with open(filename, 'rb') as csvfile: | |
38 with open(join(tmpdir, basename(filename)), 'wb') as out: | |
39 spamwriter = csv.writer(out, delimiter='\t') | |
40 reader = csv.DictReader(csvfile, delimiter='\t', skipinitialspace=True) | |
41 if len(reader.fieldnames) > 2: | |
42 for row in reader: | |
43 spamwriter.writerow((row['gene_id'], int(float(row['effective_length'])))) | |
44 zip_cmd += ' %s ' % (join(tmpdir, basename(filename))) | |
45 else : | |
46 zip_cmd += ' %s ' % (filename) | |
47 counts_files.write( label + "\t" + filename_base + "\t" + level + "\t" + batch_name + "\n" ) | |
48 else : | |
49 counts_files.write("label\tfiles\tgroup\n") | |
50 for (level, filename, label) in inputs: | |
51 filename_base = basename(filename) | |
52 # For RSEM files we process files as HTSeq count output | |
53 tmpdir = tempfile.mkdtemp() | |
54 with open(filename, 'rb') as csvfile: | |
55 with open(join(tmpdir, basename(filename)), 'wb') as out: | |
56 spamwriter = csv.writer(out, delimiter='\t') | |
57 reader = csv.DictReader(csvfile, delimiter='\t', skipinitialspace=True) | |
58 if len(reader.fieldnames) > 2: | |
59 for row in reader: | |
60 spamwriter.writerow((row['gene_id'], int(float(row['effective_length'])))) | |
61 zip_cmd += ' %s ' % (join(tmpdir, basename(filename))) | |
62 else: | |
63 zip_cmd += ' %s ' % (filename) | |
64 | |
65 counts_files.write( label + "\t" + filename_base + "\t" + level + "\n" ) | |
66 | |
67 counts_files.close() | |
68 system(zip_cmd) | |
69 copyfile(file_zip,outarch) | |
70 | |
71 | |
72 | |
73 if __name__=="__main__": | |
74 __main__() |