comparison assembly_stats_txt.py @ 1:7556309ffbaf draft default tip

"planemo upload for repository https://github.com/phac-nml/galaxy_tools commit fb4c29f720748f46ff501140f2cd306bab6614f9"
author nml
date Fri, 29 May 2020 13:51:50 -0400
parents ad2b274663f8
children
comparison
equal deleted inserted replaced
0:ad2b274663f8 1:7556309ffbaf
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*- 2 # -*- coding: utf-8 -*-
3 3
4 # Version 1.01 - bugs kindly corrected by Jan van Haarst 4 # Version 1.01 - bugs kindly corrected by Jan van Haarst
5 # Modified by Matthew Gopez October 13th, 2017 5 # Modified by Matthew Gopez October 13th, 2017
6 # Rewritten by Matthew Gopez May 25th, 2020
6 7
7 import logging 8 import argparse
8 import os 9 import os
10 import shutil
9 import subprocess 11 import subprocess
10 import sys 12 from pathlib import Path
11 13
12 14
13 log = logging.getLogger(__name__) 15 PERL_OUT_FILES = ['stats.txt', 'sorted_contigs.fa', 'histogram_bins.dat.png',
14 16 'summed_contig_lengths.dat.png', 'histogram_bins.dat',
15 assert sys.version_info[:2] >= (2, 4) 17 'summed_contig_lengths.dat']
16 18
17 19
18 def stop_err(msg): 20 def init_parser():
19 sys.stderr.write('%s\n' % msg) 21 """Create argument parser and return parser obj."""
20 sys.exit() 22 parser = argparse.ArgumentParser(description="usage: %prog [options]")
23
24 parser.add_argument(
25 "-d",
26 "--working-dir",
27 dest="working_dir",
28 required=True)
29
30 parser.add_argument(
31 "-t",
32 "--type",
33 dest="file_type",
34 required=True)
35
36 parser.add_argument(
37 "-b",
38 "--bucket",
39 dest="bucket",
40 action='store_true')
41
42 parser.add_argument(
43 "-i",
44 "--input",
45 dest="input",
46 required=True)
47
48 parser.add_argument(
49 "-s",
50 "--stats",
51 dest="stats",
52 required=True)
53
54 parser.add_argument(
55 "-sc",
56 "--sorted-contigs",
57 dest="sorted_contigs",
58 required=True)
59
60 parser.add_argument(
61 "-hpng",
62 "--histogram-png",
63 dest="histogram_png",
64 required=True)
65
66 parser.add_argument(
67 "-spng",
68 "--summed-contigs-png",
69 dest="summed_contigs_png",
70 required=True)
71
72 parser.add_argument(
73 "-hd",
74 "--histogram-data",
75 dest="histogram_data",
76 required=True)
77
78 parser.add_argument(
79 "-scd",
80 "--summed-config-data",
81 dest="summed_contig_data",
82 required=True)
83
84 return parser
21 85
22 86
23 def __main__(): 87 def exec_fasta_summary(input_data, file_type, bucket, working_dir):
88 """Execute fasta_summary.pl script with user arguments."""
89 script_dir = Path(__file__).parent.absolute()
24 90
25 # Parse Command Line 91 if bucket:
92 bucket_arg = '-b'
93 else:
94 bucket_arg = ''
26 95
27 working_dir = sys.argv[2] 96 cli_command = \
28 type = sys.argv[3] 97 '{}/fasta_summary.pl -i {} -t {} {} -o {} > /dev/null'.format(
29 bucket = sys.argv[4] 98 script_dir, input_data, file_type, bucket_arg, working_dir)
30 input = sys.argv[5]
31 stats = sys.argv[6]
32 sortedcontigs = sys.argv[7]
33 histogrampng = sys.argv[8]
34 summedcontigspng = sys.argv[9]
35 histogramdata = sys.argv[10]
36 summedcontigdata = sys.argv[11]
37 try: # for test - needs this done
38 os.makedirs(working_dir)
39 except Exception, e:
40 stop_err('Error running assembly_stats_txt.py ' + str(e))
41 99
42 cmdline = '%s/fasta_summary.pl -i %s -t %s %s -o %s > /dev/null' \
43 % (os.path.dirname(sys.argv[0]), input, type, bucket,
44 working_dir)
45 try: 100 try:
46 proc = subprocess.Popen(args=cmdline, shell=True, 101 subprocess.check_output(
47 stderr=subprocess.PIPE) 102 cli_command,
48 returncode = proc.wait() 103 stderr=subprocess.STDOUT,
49 104 shell=True,
50 # get stderr, allowing for case where it's very large 105 universal_newlines=True)
51 106 except subprocess.CalledProcessError as exc:
52 stderr = '' 107 raise RuntimeError('Error running assembly_stats.py!\n'
53 buffsize = 1048576 108 'Return Code: {}\nOutput: {}'.format(
54 try: 109 exc.returncode, exc.output))
55 while True:
56 stderr += proc.stderr.read(buffsize)
57 if not stderr or len(stderr) % buffsize != 0:
58 break
59 except OverflowError:
60 pass
61 if returncode != 0:
62 raise Exception
63 except Exception, e:
64 stop_err('Error running assembly_stats.py ' + str(e))
65
66 stats_path = os.path.join(working_dir, 'stats.txt')
67 sorted_contigs_path = os.path.join(working_dir, 'sorted_contigs.fa')
68 histogram_png_path = os.path.join(working_dir,
69 'histogram_bins.dat.png')
70 summed_contigs_path = os.path.join(working_dir,
71 'summed_contig_lengths.dat.png')
72 histogram_data_path = os.path.join(working_dir, 'histogram_bins.dat')
73 summed_contigs_data_path = os.path.join(working_dir,
74 'summed_contig_lengths.dat')
75
76 out = open(stats, 'w')
77 for line in open(stats_path):
78 out.write('%s' % line)
79 out.close()
80
81 out = open(sortedcontigs, 'w')
82 for line in open(sorted_contigs_path):
83 out.write('%s' % line)
84 out.close()
85
86 out = open(histogrampng, 'w')
87 for line in open(histogram_png_path):
88 out.write('%s' % line)
89 out.close()
90
91 out = open(summedcontigspng, 'w')
92 for line in open(summed_contigs_path):
93 out.write('%s' % line)
94 out.close()
95
96 out = open(histogramdata, 'w')
97 for line in open(histogram_data_path):
98 out.write('%s' % line)
99 out.close()
100
101 out = open(summedcontigdata, 'w')
102 for line in open(summed_contigs_data_path):
103 out.write('%s' % line)
104 out.close()
105 110
106 111
107 if __name__ == '__main__': 112 def main():
108 __main__() 113 """This is where the magic happens. (not really)
114
115 1. Gets command line arguments.
116 2. Grabs the user's desired parameters for running the perl script.
117 3. Ensures the directories are in place.
118 4. Executes fasta_summary.pl
119 5. Move the out files from the perl script to the desired
120 location the user specified.
121
122 """
123 parser = init_parser()
124 args = parser.parse_args()
125
126 working_dir = args.working_dir
127
128 out_file_names = [args.stats, args.sorted_contigs, args.histogram_png,
129 args.summed_contigs_png, args.histogram_data,
130 args.summed_contig_data]
131
132 # Ensure working directory is created.
133 Path(working_dir).mkdir(parents=True, exist_ok=True)
134
135 # Execute Perl Script
136 exec_fasta_summary(args.input, args.file_type, args.bucket, working_dir)
137
138 # Rename out files to desired file names
139 for perl_out_file, dest_file in zip(PERL_OUT_FILES, out_file_names):
140 shutil.move(os.path.join(working_dir, perl_out_file),
141 dest_file)
142
143
144 if __name__ == "__main__":
145 main()