# HG changeset patch # User ahosny # Date 1473255144 14400 # Node ID e0f5a71e94ed4bed628fd1e884fa218e182173e6 # Parent 12eb1e77bcfa994c379a8c9741167f1c634dfc8c Uploaded diff -r 12eb1e77bcfa -r e0f5a71e94ed cnv-sim.py --- a/cnv-sim.py Wed Sep 07 09:31:48 2016 -0400 +++ b/cnv-sim.py Wed Sep 07 09:32:24 2016 -0400 @@ -11,11 +11,23 @@ from cnvsim.exome_simulator import * from cnvsim.genome_simulator import * +class CapitalisedHelpFormatter(argparse.HelpFormatter): + def add_usage(self, usage, actions, groups, prefix=None): + if prefix is None: + prefix = 'Usage: ' + return super(CapitalisedHelpFormatter, self).add_usage(usage, actions, groups, prefix) + def log(message): print '[CNV SIM {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now()) + "] " + message + def main(): - parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser = argparse.ArgumentParser(add_help=True, formatter_class=CapitalisedHelpFormatter, \ + description='Generates NGS short reads that encompass copy number variations in whole genome and targeted exome sequencing') + parser._positionals.title = 'Positional arguments' + parser._optionals.title = 'Optional arguments' + parser.add_argument('-v', '--version', action='version', version = 'CNV-Sim v0.9.2', help = "Show program's version number and exit.") + parser.add_argument("simulation_type", type=str, choices=['genome', 'exome'], \ help="simulate copy number variations in whole genome or exome regions") parser.add_argument("genome", type=file, \ @@ -23,7 +35,7 @@ parser.add_argument("target", type=file, nargs='?', default=None, \ help="path to the target regions file in BED format (if using exome)") - parser.add_argument("-o", "--output_dir_name",type=str, default="test", \ + parser.add_argument("-o", "--output_dir_name",type=str, default="simulation_output", \ help="a name to be used to create the output directory (overrides existing directory with the same name).") parser.add_argument("-n", "--n_reads", type=int, default=10000, \ help="total number of reads without variations") @@ -33,16 +45,20 @@ help="path to a CNV list file in BED format chr | start | end | variation. If not passed, it is randomly generated using CNV list parameters below") cnv_sim_group = parser.add_argument_group('CNV list parameters', "parameters to be used if CNV list is not passed") - cnv_sim_group.add_argument("-g", "--regions_count", type=int, default=30, \ - help="number of CNV regions to be randomly generated") - cnv_sim_group.add_argument("-a", "--amplifications", type=float, default=0.30, \ + cnv_sim_group.add_argument("-g", "--regions_count", type=int, default=20, \ + help="number of CNV regions to be generated randomly") + cnv_sim_group.add_argument("-r_min", "--region_minimum_length", type=int, default=1000, \ + help="minimum length of each CNV region") + cnv_sim_group.add_argument("-r_max", "--region_maximum_length", type=int, default=100000, \ + help="maximum length of each CNV region") + cnv_sim_group.add_argument("-a", "--amplifications", type=float, default=0.50, \ help="percentage of amplifications in range [0.0: 1.0].") - cnv_sim_group.add_argument("-d", "--deletions", type=float, default=0.20, \ + cnv_sim_group.add_argument("-d", "--deletions", type=float, default=0.50, \ help="percentage of deletions in range [0.0: 1.0].") - cnv_sim_group.add_argument("-min", "--minimum", type=float, default=3, \ - help="minimum number of amplifications/deletions introduced") - cnv_sim_group.add_argument("-max", "--maximum", type=float, default=10, \ - help="maximum number of amplifications/deletions introduced") + cnv_sim_group.add_argument("-cn_min", "--copy_number_minimum", type=float, default=3, \ + help="minimum level of variations (copy number) introduced") + cnv_sim_group.add_argument("-cn_max", "--copy_number_maximum", type=float, default=10, \ + help="maximum level of variation (copy number) introduced") args = parser.parse_args() @@ -64,10 +80,16 @@ cnv_list_parameters = {} cnv_list_parameters['regions_count'] = args.regions_count + cnv_list_parameters['minimum_length'] = args.region_minimum_length + cnv_list_parameters['maximum_length'] = args.region_maximum_length cnv_list_parameters['amplifications'] = args.amplifications cnv_list_parameters['deletions'] = args.deletions - cnv_list_parameters['minimum_variations'] = args.minimum - cnv_list_parameters['maximum_variations'] = args.maximum + cnv_list_parameters['minimum_variations'] = args.copy_number_minimum + cnv_list_parameters['maximum_variations'] = args.copy_number_maximum + + if cnv_list_parameters['amplifications'] + cnv_list_parameters['deletions'] != 1.0: + log("ERROR: percentage of amplifications + percentage of deletions must be equal to 1.0") + exit() if simulation_parameters['type'] == 'genome': simulate_genome_cnv(simulation_parameters, cnv_list_parameters)