Mercurial > repos > nml > assemblystats
diff assembly_stats_txt.py @ 1:7556309ffbaf draft default tip
"planemo upload for repository https://github.com/phac-nml/galaxy_tools commit fb4c29f720748f46ff501140f2cd306bab6614f9"
author | nml |
---|---|
date | Fri, 29 May 2020 13:51:50 -0400 |
parents | ad2b274663f8 |
children |
line wrap: on
line diff
--- a/assembly_stats_txt.py Tue Nov 07 12:28:31 2017 -0500 +++ b/assembly_stats_txt.py Fri May 29 13:51:50 2020 -0400 @@ -3,106 +3,143 @@ # Version 1.01 - bugs kindly corrected by Jan van Haarst # Modified by Matthew Gopez October 13th, 2017 +# Rewritten by Matthew Gopez May 25th, 2020 -import logging +import argparse import os +import shutil import subprocess -import sys +from pathlib import Path -log = logging.getLogger(__name__) - -assert sys.version_info[:2] >= (2, 4) - - -def stop_err(msg): - sys.stderr.write('%s\n' % msg) - sys.exit() +PERL_OUT_FILES = ['stats.txt', 'sorted_contigs.fa', 'histogram_bins.dat.png', + 'summed_contig_lengths.dat.png', 'histogram_bins.dat', + 'summed_contig_lengths.dat'] -def __main__(): +def init_parser(): + """Create argument parser and return parser obj.""" + parser = argparse.ArgumentParser(description="usage: %prog [options]") - # Parse Command Line + parser.add_argument( + "-d", + "--working-dir", + dest="working_dir", + required=True) - working_dir = sys.argv[2] - type = sys.argv[3] - bucket = sys.argv[4] - input = sys.argv[5] - stats = sys.argv[6] - sortedcontigs = sys.argv[7] - histogrampng = sys.argv[8] - summedcontigspng = sys.argv[9] - histogramdata = sys.argv[10] - summedcontigdata = sys.argv[11] - try: # for test - needs this done - os.makedirs(working_dir) - except Exception, e: - stop_err('Error running assembly_stats_txt.py ' + str(e)) + parser.add_argument( + "-t", + "--type", + dest="file_type", + required=True) - cmdline = '%s/fasta_summary.pl -i %s -t %s %s -o %s > /dev/null' \ - % (os.path.dirname(sys.argv[0]), input, type, bucket, - working_dir) - try: - proc = subprocess.Popen(args=cmdline, shell=True, - stderr=subprocess.PIPE) - returncode = proc.wait() + parser.add_argument( + "-b", + "--bucket", + dest="bucket", + action='store_true') - # get stderr, allowing for case where it's very large + parser.add_argument( + "-i", + "--input", + dest="input", + required=True) - stderr = '' - buffsize = 1048576 - try: - while True: - stderr += proc.stderr.read(buffsize) - if not stderr or len(stderr) % buffsize != 0: - break - except OverflowError: - pass - if returncode != 0: - raise Exception - except Exception, e: - stop_err('Error running assembly_stats.py ' + str(e)) + parser.add_argument( + "-s", + "--stats", + dest="stats", + required=True) - stats_path = os.path.join(working_dir, 'stats.txt') - sorted_contigs_path = os.path.join(working_dir, 'sorted_contigs.fa') - histogram_png_path = os.path.join(working_dir, - 'histogram_bins.dat.png') - summed_contigs_path = os.path.join(working_dir, - 'summed_contig_lengths.dat.png') - histogram_data_path = os.path.join(working_dir, 'histogram_bins.dat') - summed_contigs_data_path = os.path.join(working_dir, - 'summed_contig_lengths.dat') + parser.add_argument( + "-sc", + "--sorted-contigs", + dest="sorted_contigs", + required=True) + + parser.add_argument( + "-hpng", + "--histogram-png", + dest="histogram_png", + required=True) - out = open(stats, 'w') - for line in open(stats_path): - out.write('%s' % line) - out.close() - - out = open(sortedcontigs, 'w') - for line in open(sorted_contigs_path): - out.write('%s' % line) - out.close() - - out = open(histogrampng, 'w') - for line in open(histogram_png_path): - out.write('%s' % line) - out.close() + parser.add_argument( + "-spng", + "--summed-contigs-png", + dest="summed_contigs_png", + required=True) - out = open(summedcontigspng, 'w') - for line in open(summed_contigs_path): - out.write('%s' % line) - out.close() + parser.add_argument( + "-hd", + "--histogram-data", + dest="histogram_data", + required=True) - out = open(histogramdata, 'w') - for line in open(histogram_data_path): - out.write('%s' % line) - out.close() + parser.add_argument( + "-scd", + "--summed-config-data", + dest="summed_contig_data", + required=True) - out = open(summedcontigdata, 'w') - for line in open(summed_contigs_data_path): - out.write('%s' % line) - out.close() + return parser -if __name__ == '__main__': - __main__() +def exec_fasta_summary(input_data, file_type, bucket, working_dir): + """Execute fasta_summary.pl script with user arguments.""" + script_dir = Path(__file__).parent.absolute() + + if bucket: + bucket_arg = '-b' + else: + bucket_arg = '' + + cli_command = \ + '{}/fasta_summary.pl -i {} -t {} {} -o {} > /dev/null'.format( + script_dir, input_data, file_type, bucket_arg, working_dir) + + try: + subprocess.check_output( + cli_command, + stderr=subprocess.STDOUT, + shell=True, + universal_newlines=True) + except subprocess.CalledProcessError as exc: + raise RuntimeError('Error running assembly_stats.py!\n' + 'Return Code: {}\nOutput: {}'.format( + exc.returncode, exc.output)) + + +def main(): + """This is where the magic happens. (not really) + + 1. Gets command line arguments. + 2. Grabs the user's desired parameters for running the perl script. + 3. Ensures the directories are in place. + 4. Executes fasta_summary.pl + 5. Move the out files from the perl script to the desired + location the user specified. + + """ + parser = init_parser() + args = parser.parse_args() + + working_dir = args.working_dir + + out_file_names = [args.stats, args.sorted_contigs, args.histogram_png, + args.summed_contigs_png, args.histogram_data, + args.summed_contig_data] + + # Ensure working directory is created. + Path(working_dir).mkdir(parents=True, exist_ok=True) + + # Execute Perl Script + exec_fasta_summary(args.input, args.file_type, args.bucket, working_dir) + + # Rename out files to desired file names + for perl_out_file, dest_file in zip(PERL_OUT_FILES, out_file_names): + shutil.move(os.path.join(working_dir, perl_out_file), + dest_file) + + +if __name__ == "__main__": + main()