0
|
1 #!/usr/bin/env python
|
|
2 #Author: Coline Billerey
|
|
3
|
|
4 from os.path import basename, join
|
|
5 from os import getcwd, system
|
|
6 import argparse
|
|
7 from shutil import copyfile
|
|
8 import tempfile
|
|
9 import csv
|
|
10
|
|
11 def __main__():
|
|
12
|
|
13 parser = argparse.ArgumentParser()
|
|
14 parser.add_argument('--batch')
|
|
15 parser.add_argument('--inputs', action='append' ,nargs='*')
|
|
16 parser.add_argument('--outfile')
|
|
17 parser.add_argument('--outarch')
|
|
18 args = parser.parse_args()
|
|
19
|
|
20 batch=args.batch
|
|
21 outfile=args.outfile
|
|
22 outarch=args.outarch
|
|
23 inputs=args.inputs
|
|
24 counts_files = open( outfile, 'w' )
|
|
25
|
|
26 working_directory = getcwd()
|
|
27 file_zip=working_directory+"/counts.zip"
|
|
28
|
|
29
|
|
30 zip_cmd='zip -j %s' % (file_zip)
|
|
31 if batch and batch!="NULL":
|
|
32 counts_files.write("label\tfiles\tgroup\tbatch\n")
|
|
33 for (level, filename, label, batch_name ) in inputs:
|
|
34 filename_base = basename(filename)
|
|
35 # For RSEM files we process files as HTSeq count output
|
|
36 tmpdir = tempfile.mkdtemp()
|
|
37 with open(filename, 'rb') as csvfile:
|
|
38 with open(join(tmpdir, basename(filename)), 'wb') as out:
|
|
39 spamwriter = csv.writer(out, delimiter='\t')
|
|
40 reader = csv.DictReader(csvfile, delimiter='\t', skipinitialspace=True)
|
|
41 if len(reader.fieldnames) > 2:
|
|
42 for row in reader:
|
|
43 spamwriter.writerow((row['gene_id'], int(float(row['effective_length']))))
|
|
44 zip_cmd += ' %s ' % (join(tmpdir, basename(filename)))
|
|
45 else :
|
|
46 zip_cmd += ' %s ' % (filename)
|
|
47 counts_files.write( label + "\t" + filename_base + "\t" + level + "\t" + batch_name + "\n" )
|
|
48 else :
|
|
49 counts_files.write("label\tfiles\tgroup\n")
|
|
50 for (level, filename, label) in inputs:
|
|
51 filename_base = basename(filename)
|
|
52 # For RSEM files we process files as HTSeq count output
|
|
53 tmpdir = tempfile.mkdtemp()
|
|
54 with open(filename, 'rb') as csvfile:
|
|
55 with open(join(tmpdir, basename(filename)), 'wb') as out:
|
|
56 spamwriter = csv.writer(out, delimiter='\t')
|
|
57 reader = csv.DictReader(csvfile, delimiter='\t', skipinitialspace=True)
|
|
58 if len(reader.fieldnames) > 2:
|
|
59 for row in reader:
|
|
60 spamwriter.writerow((row['gene_id'], int(float(row['effective_length']))))
|
|
61 zip_cmd += ' %s ' % (join(tmpdir, basename(filename)))
|
|
62 else:
|
|
63 zip_cmd += ' %s ' % (filename)
|
|
64
|
|
65 counts_files.write( label + "\t" + filename_base + "\t" + level + "\n" )
|
|
66
|
|
67 counts_files.close()
|
|
68 system(zip_cmd)
|
|
69 copyfile(file_zip,outarch)
|
|
70
|
|
71
|
|
72
|
|
73 if __name__=="__main__":
|
|
74 __main__()
|