Mercurial > repos > nml > assemblystats
comparison assembly_stats_txt.py @ 1:7556309ffbaf draft default tip
"planemo upload for repository https://github.com/phac-nml/galaxy_tools commit fb4c29f720748f46ff501140f2cd306bab6614f9"
| author | nml |
|---|---|
| date | Fri, 29 May 2020 13:51:50 -0400 |
| parents | ad2b274663f8 |
| children |
comparison
equal
deleted
inserted
replaced
| 0:ad2b274663f8 | 1:7556309ffbaf |
|---|---|
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
| 2 # -*- coding: utf-8 -*- | 2 # -*- coding: utf-8 -*- |
| 3 | 3 |
| 4 # Version 1.01 - bugs kindly corrected by Jan van Haarst | 4 # Version 1.01 - bugs kindly corrected by Jan van Haarst |
| 5 # Modified by Matthew Gopez October 13th, 2017 | 5 # Modified by Matthew Gopez October 13th, 2017 |
| 6 # Rewritten by Matthew Gopez May 25th, 2020 | |
| 6 | 7 |
| 7 import logging | 8 import argparse |
| 8 import os | 9 import os |
| 10 import shutil | |
| 9 import subprocess | 11 import subprocess |
| 10 import sys | 12 from pathlib import Path |
| 11 | 13 |
| 12 | 14 |
| 13 log = logging.getLogger(__name__) | 15 PERL_OUT_FILES = ['stats.txt', 'sorted_contigs.fa', 'histogram_bins.dat.png', |
| 14 | 16 'summed_contig_lengths.dat.png', 'histogram_bins.dat', |
| 15 assert sys.version_info[:2] >= (2, 4) | 17 'summed_contig_lengths.dat'] |
| 16 | 18 |
| 17 | 19 |
| 18 def stop_err(msg): | 20 def init_parser(): |
| 19 sys.stderr.write('%s\n' % msg) | 21 """Create argument parser and return parser obj.""" |
| 20 sys.exit() | 22 parser = argparse.ArgumentParser(description="usage: %prog [options]") |
| 23 | |
| 24 parser.add_argument( | |
| 25 "-d", | |
| 26 "--working-dir", | |
| 27 dest="working_dir", | |
| 28 required=True) | |
| 29 | |
| 30 parser.add_argument( | |
| 31 "-t", | |
| 32 "--type", | |
| 33 dest="file_type", | |
| 34 required=True) | |
| 35 | |
| 36 parser.add_argument( | |
| 37 "-b", | |
| 38 "--bucket", | |
| 39 dest="bucket", | |
| 40 action='store_true') | |
| 41 | |
| 42 parser.add_argument( | |
| 43 "-i", | |
| 44 "--input", | |
| 45 dest="input", | |
| 46 required=True) | |
| 47 | |
| 48 parser.add_argument( | |
| 49 "-s", | |
| 50 "--stats", | |
| 51 dest="stats", | |
| 52 required=True) | |
| 53 | |
| 54 parser.add_argument( | |
| 55 "-sc", | |
| 56 "--sorted-contigs", | |
| 57 dest="sorted_contigs", | |
| 58 required=True) | |
| 59 | |
| 60 parser.add_argument( | |
| 61 "-hpng", | |
| 62 "--histogram-png", | |
| 63 dest="histogram_png", | |
| 64 required=True) | |
| 65 | |
| 66 parser.add_argument( | |
| 67 "-spng", | |
| 68 "--summed-contigs-png", | |
| 69 dest="summed_contigs_png", | |
| 70 required=True) | |
| 71 | |
| 72 parser.add_argument( | |
| 73 "-hd", | |
| 74 "--histogram-data", | |
| 75 dest="histogram_data", | |
| 76 required=True) | |
| 77 | |
| 78 parser.add_argument( | |
| 79 "-scd", | |
| 80 "--summed-config-data", | |
| 81 dest="summed_contig_data", | |
| 82 required=True) | |
| 83 | |
| 84 return parser | |
| 21 | 85 |
| 22 | 86 |
| 23 def __main__(): | 87 def exec_fasta_summary(input_data, file_type, bucket, working_dir): |
| 88 """Execute fasta_summary.pl script with user arguments.""" | |
| 89 script_dir = Path(__file__).parent.absolute() | |
| 24 | 90 |
| 25 # Parse Command Line | 91 if bucket: |
| 92 bucket_arg = '-b' | |
| 93 else: | |
| 94 bucket_arg = '' | |
| 26 | 95 |
| 27 working_dir = sys.argv[2] | 96 cli_command = \ |
| 28 type = sys.argv[3] | 97 '{}/fasta_summary.pl -i {} -t {} {} -o {} > /dev/null'.format( |
| 29 bucket = sys.argv[4] | 98 script_dir, input_data, file_type, bucket_arg, working_dir) |
| 30 input = sys.argv[5] | |
| 31 stats = sys.argv[6] | |
| 32 sortedcontigs = sys.argv[7] | |
| 33 histogrampng = sys.argv[8] | |
| 34 summedcontigspng = sys.argv[9] | |
| 35 histogramdata = sys.argv[10] | |
| 36 summedcontigdata = sys.argv[11] | |
| 37 try: # for test - needs this done | |
| 38 os.makedirs(working_dir) | |
| 39 except Exception, e: | |
| 40 stop_err('Error running assembly_stats_txt.py ' + str(e)) | |
| 41 | 99 |
| 42 cmdline = '%s/fasta_summary.pl -i %s -t %s %s -o %s > /dev/null' \ | |
| 43 % (os.path.dirname(sys.argv[0]), input, type, bucket, | |
| 44 working_dir) | |
| 45 try: | 100 try: |
| 46 proc = subprocess.Popen(args=cmdline, shell=True, | 101 subprocess.check_output( |
| 47 stderr=subprocess.PIPE) | 102 cli_command, |
| 48 returncode = proc.wait() | 103 stderr=subprocess.STDOUT, |
| 49 | 104 shell=True, |
| 50 # get stderr, allowing for case where it's very large | 105 universal_newlines=True) |
| 51 | 106 except subprocess.CalledProcessError as exc: |
| 52 stderr = '' | 107 raise RuntimeError('Error running assembly_stats.py!\n' |
| 53 buffsize = 1048576 | 108 'Return Code: {}\nOutput: {}'.format( |
| 54 try: | 109 exc.returncode, exc.output)) |
| 55 while True: | |
| 56 stderr += proc.stderr.read(buffsize) | |
| 57 if not stderr or len(stderr) % buffsize != 0: | |
| 58 break | |
| 59 except OverflowError: | |
| 60 pass | |
| 61 if returncode != 0: | |
| 62 raise Exception | |
| 63 except Exception, e: | |
| 64 stop_err('Error running assembly_stats.py ' + str(e)) | |
| 65 | |
| 66 stats_path = os.path.join(working_dir, 'stats.txt') | |
| 67 sorted_contigs_path = os.path.join(working_dir, 'sorted_contigs.fa') | |
| 68 histogram_png_path = os.path.join(working_dir, | |
| 69 'histogram_bins.dat.png') | |
| 70 summed_contigs_path = os.path.join(working_dir, | |
| 71 'summed_contig_lengths.dat.png') | |
| 72 histogram_data_path = os.path.join(working_dir, 'histogram_bins.dat') | |
| 73 summed_contigs_data_path = os.path.join(working_dir, | |
| 74 'summed_contig_lengths.dat') | |
| 75 | |
| 76 out = open(stats, 'w') | |
| 77 for line in open(stats_path): | |
| 78 out.write('%s' % line) | |
| 79 out.close() | |
| 80 | |
| 81 out = open(sortedcontigs, 'w') | |
| 82 for line in open(sorted_contigs_path): | |
| 83 out.write('%s' % line) | |
| 84 out.close() | |
| 85 | |
| 86 out = open(histogrampng, 'w') | |
| 87 for line in open(histogram_png_path): | |
| 88 out.write('%s' % line) | |
| 89 out.close() | |
| 90 | |
| 91 out = open(summedcontigspng, 'w') | |
| 92 for line in open(summed_contigs_path): | |
| 93 out.write('%s' % line) | |
| 94 out.close() | |
| 95 | |
| 96 out = open(histogramdata, 'w') | |
| 97 for line in open(histogram_data_path): | |
| 98 out.write('%s' % line) | |
| 99 out.close() | |
| 100 | |
| 101 out = open(summedcontigdata, 'w') | |
| 102 for line in open(summed_contigs_data_path): | |
| 103 out.write('%s' % line) | |
| 104 out.close() | |
| 105 | 110 |
| 106 | 111 |
| 107 if __name__ == '__main__': | 112 def main(): |
| 108 __main__() | 113 """This is where the magic happens. (not really) |
| 114 | |
| 115 1. Gets command line arguments. | |
| 116 2. Grabs the user's desired parameters for running the perl script. | |
| 117 3. Ensures the directories are in place. | |
| 118 4. Executes fasta_summary.pl | |
| 119 5. Move the out files from the perl script to the desired | |
| 120 location the user specified. | |
| 121 | |
| 122 """ | |
| 123 parser = init_parser() | |
| 124 args = parser.parse_args() | |
| 125 | |
| 126 working_dir = args.working_dir | |
| 127 | |
| 128 out_file_names = [args.stats, args.sorted_contigs, args.histogram_png, | |
| 129 args.summed_contigs_png, args.histogram_data, | |
| 130 args.summed_contig_data] | |
| 131 | |
| 132 # Ensure working directory is created. | |
| 133 Path(working_dir).mkdir(parents=True, exist_ok=True) | |
| 134 | |
| 135 # Execute Perl Script | |
| 136 exec_fasta_summary(args.input, args.file_type, args.bucket, working_dir) | |
| 137 | |
| 138 # Rename out files to desired file names | |
| 139 for perl_out_file, dest_file in zip(PERL_OUT_FILES, out_file_names): | |
| 140 shutil.move(os.path.join(working_dir, perl_out_file), | |
| 141 dest_file) | |
| 142 | |
| 143 | |
| 144 if __name__ == "__main__": | |
| 145 main() |
