annotate pre_sartools.py @ 3:de6d0b7c17af draft

release 1.6.3
author lgueguen
date Mon, 01 Oct 2018 05:07:56 -0400
parents fe0ee346b17d
children 05c9b1a7f44e
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
1 #!/usr/bin/env python
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
2 #Author: Coline Billerey
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
3
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
4 from os.path import basename, join
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
5 from os import getcwd, system
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
6 import argparse
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
7 from shutil import copyfile
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
8 import tempfile
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
9 import csv
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
10
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
11 def __main__():
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
12
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
13 parser = argparse.ArgumentParser()
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
14 parser.add_argument('--batch')
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
15 parser.add_argument('--inputs', action='append' ,nargs='*')
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
16 parser.add_argument('--outfile')
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
17 parser.add_argument('--outarch')
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
18 args = parser.parse_args()
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
19
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
20 batch=args.batch
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
21 outfile=args.outfile
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
22 outarch=args.outarch
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
23 inputs=args.inputs
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
24 counts_files = open( outfile, 'w' )
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
25
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
26 working_directory = getcwd()
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
27 file_zip=working_directory+"/counts.zip"
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
28
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
29
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
30 zip_cmd='zip -j %s' % (file_zip)
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
31 if batch and batch!="NULL":
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
32 counts_files.write("label\tfiles\tgroup\tbatch\n")
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
33 for (level, filename, label, batch_name ) in inputs:
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
34 filename_base = basename(filename)
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
35 # For RSEM files we process files as HTSeq count output
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
36 tmpdir = tempfile.mkdtemp()
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
37 with open(filename, 'rb') as csvfile:
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
38 with open(join(tmpdir, basename(filename)), 'wb') as out:
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
39 spamwriter = csv.writer(out, delimiter='\t')
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
40 reader = csv.DictReader(csvfile, delimiter='\t', skipinitialspace=True)
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
41 if len(reader.fieldnames) > 2:
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
42 for row in reader:
1
fe0ee346b17d RSEM process files corrected in pre_sartools.py
lgueguen
parents: 0
diff changeset
43 spamwriter.writerow((row['gene_id'], int(float(row['expected_count']))))
0
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
44 zip_cmd += ' %s ' % (join(tmpdir, basename(filename)))
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
45 else :
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
46 zip_cmd += ' %s ' % (filename)
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
47 counts_files.write( label + "\t" + filename_base + "\t" + level + "\t" + batch_name + "\n" )
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
48 else :
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
49 counts_files.write("label\tfiles\tgroup\n")
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
50 for (level, filename, label) in inputs:
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
51 filename_base = basename(filename)
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
52 # For RSEM files we process files as HTSeq count output
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
53 tmpdir = tempfile.mkdtemp()
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
54 with open(filename, 'rb') as csvfile:
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
55 with open(join(tmpdir, basename(filename)), 'wb') as out:
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
56 spamwriter = csv.writer(out, delimiter='\t')
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
57 reader = csv.DictReader(csvfile, delimiter='\t', skipinitialspace=True)
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
58 if len(reader.fieldnames) > 2:
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
59 for row in reader:
1
fe0ee346b17d RSEM process files corrected in pre_sartools.py
lgueguen
parents: 0
diff changeset
60 spamwriter.writerow((row['gene_id'], int(float(row['expected_count']))))
0
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
61 zip_cmd += ' %s ' % (join(tmpdir, basename(filename)))
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
62 else:
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
63 zip_cmd += ' %s ' % (filename)
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
64
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
65 counts_files.write( label + "\t" + filename_base + "\t" + level + "\n" )
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
66
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
67 counts_files.close()
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
68 system(zip_cmd)
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
69 copyfile(file_zip,outarch)
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
70
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
71
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
72
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
73 if __name__=="__main__":
581d217c7337 Planemo upload
lgueguen
parents:
diff changeset
74 __main__()