Mercurial > repos > nml > assemblystats
comparison assembly_stats_txt.py @ 1:7556309ffbaf draft default tip
"planemo upload for repository https://github.com/phac-nml/galaxy_tools commit fb4c29f720748f46ff501140f2cd306bab6614f9"
author | nml |
---|---|
date | Fri, 29 May 2020 13:51:50 -0400 |
parents | ad2b274663f8 |
children |
comparison
equal
deleted
inserted
replaced
0:ad2b274663f8 | 1:7556309ffbaf |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 # -*- coding: utf-8 -*- | 2 # -*- coding: utf-8 -*- |
3 | 3 |
4 # Version 1.01 - bugs kindly corrected by Jan van Haarst | 4 # Version 1.01 - bugs kindly corrected by Jan van Haarst |
5 # Modified by Matthew Gopez October 13th, 2017 | 5 # Modified by Matthew Gopez October 13th, 2017 |
6 # Rewritten by Matthew Gopez May 25th, 2020 | |
6 | 7 |
7 import logging | 8 import argparse |
8 import os | 9 import os |
10 import shutil | |
9 import subprocess | 11 import subprocess |
10 import sys | 12 from pathlib import Path |
11 | 13 |
12 | 14 |
13 log = logging.getLogger(__name__) | 15 PERL_OUT_FILES = ['stats.txt', 'sorted_contigs.fa', 'histogram_bins.dat.png', |
14 | 16 'summed_contig_lengths.dat.png', 'histogram_bins.dat', |
15 assert sys.version_info[:2] >= (2, 4) | 17 'summed_contig_lengths.dat'] |
16 | 18 |
17 | 19 |
18 def stop_err(msg): | 20 def init_parser(): |
19 sys.stderr.write('%s\n' % msg) | 21 """Create argument parser and return parser obj.""" |
20 sys.exit() | 22 parser = argparse.ArgumentParser(description="usage: %prog [options]") |
23 | |
24 parser.add_argument( | |
25 "-d", | |
26 "--working-dir", | |
27 dest="working_dir", | |
28 required=True) | |
29 | |
30 parser.add_argument( | |
31 "-t", | |
32 "--type", | |
33 dest="file_type", | |
34 required=True) | |
35 | |
36 parser.add_argument( | |
37 "-b", | |
38 "--bucket", | |
39 dest="bucket", | |
40 action='store_true') | |
41 | |
42 parser.add_argument( | |
43 "-i", | |
44 "--input", | |
45 dest="input", | |
46 required=True) | |
47 | |
48 parser.add_argument( | |
49 "-s", | |
50 "--stats", | |
51 dest="stats", | |
52 required=True) | |
53 | |
54 parser.add_argument( | |
55 "-sc", | |
56 "--sorted-contigs", | |
57 dest="sorted_contigs", | |
58 required=True) | |
59 | |
60 parser.add_argument( | |
61 "-hpng", | |
62 "--histogram-png", | |
63 dest="histogram_png", | |
64 required=True) | |
65 | |
66 parser.add_argument( | |
67 "-spng", | |
68 "--summed-contigs-png", | |
69 dest="summed_contigs_png", | |
70 required=True) | |
71 | |
72 parser.add_argument( | |
73 "-hd", | |
74 "--histogram-data", | |
75 dest="histogram_data", | |
76 required=True) | |
77 | |
78 parser.add_argument( | |
79 "-scd", | |
80 "--summed-config-data", | |
81 dest="summed_contig_data", | |
82 required=True) | |
83 | |
84 return parser | |
21 | 85 |
22 | 86 |
23 def __main__(): | 87 def exec_fasta_summary(input_data, file_type, bucket, working_dir): |
88 """Execute fasta_summary.pl script with user arguments.""" | |
89 script_dir = Path(__file__).parent.absolute() | |
24 | 90 |
25 # Parse Command Line | 91 if bucket: |
92 bucket_arg = '-b' | |
93 else: | |
94 bucket_arg = '' | |
26 | 95 |
27 working_dir = sys.argv[2] | 96 cli_command = \ |
28 type = sys.argv[3] | 97 '{}/fasta_summary.pl -i {} -t {} {} -o {} > /dev/null'.format( |
29 bucket = sys.argv[4] | 98 script_dir, input_data, file_type, bucket_arg, working_dir) |
30 input = sys.argv[5] | |
31 stats = sys.argv[6] | |
32 sortedcontigs = sys.argv[7] | |
33 histogrampng = sys.argv[8] | |
34 summedcontigspng = sys.argv[9] | |
35 histogramdata = sys.argv[10] | |
36 summedcontigdata = sys.argv[11] | |
37 try: # for test - needs this done | |
38 os.makedirs(working_dir) | |
39 except Exception, e: | |
40 stop_err('Error running assembly_stats_txt.py ' + str(e)) | |
41 | 99 |
42 cmdline = '%s/fasta_summary.pl -i %s -t %s %s -o %s > /dev/null' \ | |
43 % (os.path.dirname(sys.argv[0]), input, type, bucket, | |
44 working_dir) | |
45 try: | 100 try: |
46 proc = subprocess.Popen(args=cmdline, shell=True, | 101 subprocess.check_output( |
47 stderr=subprocess.PIPE) | 102 cli_command, |
48 returncode = proc.wait() | 103 stderr=subprocess.STDOUT, |
49 | 104 shell=True, |
50 # get stderr, allowing for case where it's very large | 105 universal_newlines=True) |
51 | 106 except subprocess.CalledProcessError as exc: |
52 stderr = '' | 107 raise RuntimeError('Error running assembly_stats.py!\n' |
53 buffsize = 1048576 | 108 'Return Code: {}\nOutput: {}'.format( |
54 try: | 109 exc.returncode, exc.output)) |
55 while True: | |
56 stderr += proc.stderr.read(buffsize) | |
57 if not stderr or len(stderr) % buffsize != 0: | |
58 break | |
59 except OverflowError: | |
60 pass | |
61 if returncode != 0: | |
62 raise Exception | |
63 except Exception, e: | |
64 stop_err('Error running assembly_stats.py ' + str(e)) | |
65 | |
66 stats_path = os.path.join(working_dir, 'stats.txt') | |
67 sorted_contigs_path = os.path.join(working_dir, 'sorted_contigs.fa') | |
68 histogram_png_path = os.path.join(working_dir, | |
69 'histogram_bins.dat.png') | |
70 summed_contigs_path = os.path.join(working_dir, | |
71 'summed_contig_lengths.dat.png') | |
72 histogram_data_path = os.path.join(working_dir, 'histogram_bins.dat') | |
73 summed_contigs_data_path = os.path.join(working_dir, | |
74 'summed_contig_lengths.dat') | |
75 | |
76 out = open(stats, 'w') | |
77 for line in open(stats_path): | |
78 out.write('%s' % line) | |
79 out.close() | |
80 | |
81 out = open(sortedcontigs, 'w') | |
82 for line in open(sorted_contigs_path): | |
83 out.write('%s' % line) | |
84 out.close() | |
85 | |
86 out = open(histogrampng, 'w') | |
87 for line in open(histogram_png_path): | |
88 out.write('%s' % line) | |
89 out.close() | |
90 | |
91 out = open(summedcontigspng, 'w') | |
92 for line in open(summed_contigs_path): | |
93 out.write('%s' % line) | |
94 out.close() | |
95 | |
96 out = open(histogramdata, 'w') | |
97 for line in open(histogram_data_path): | |
98 out.write('%s' % line) | |
99 out.close() | |
100 | |
101 out = open(summedcontigdata, 'w') | |
102 for line in open(summed_contigs_data_path): | |
103 out.write('%s' % line) | |
104 out.close() | |
105 | 110 |
106 | 111 |
107 if __name__ == '__main__': | 112 def main(): |
108 __main__() | 113 """This is where the magic happens. (not really) |
114 | |
115 1. Gets command line arguments. | |
116 2. Grabs the user's desired parameters for running the perl script. | |
117 3. Ensures the directories are in place. | |
118 4. Executes fasta_summary.pl | |
119 5. Move the out files from the perl script to the desired | |
120 location the user specified. | |
121 | |
122 """ | |
123 parser = init_parser() | |
124 args = parser.parse_args() | |
125 | |
126 working_dir = args.working_dir | |
127 | |
128 out_file_names = [args.stats, args.sorted_contigs, args.histogram_png, | |
129 args.summed_contigs_png, args.histogram_data, | |
130 args.summed_contig_data] | |
131 | |
132 # Ensure working directory is created. | |
133 Path(working_dir).mkdir(parents=True, exist_ok=True) | |
134 | |
135 # Execute Perl Script | |
136 exec_fasta_summary(args.input, args.file_type, args.bucket, working_dir) | |
137 | |
138 # Rename out files to desired file names | |
139 for perl_out_file, dest_file in zip(PERL_OUT_FILES, out_file_names): | |
140 shutil.move(os.path.join(working_dir, perl_out_file), | |
141 dest_file) | |
142 | |
143 | |
144 if __name__ == "__main__": | |
145 main() |