diff cnv-sim.py @ 13:e0f5a71e94ed draft

Uploaded
author ahosny
date Wed, 07 Sep 2016 09:32:24 -0400
parents 4a4d2b78eb55
children
line wrap: on
line diff
--- a/cnv-sim.py	Wed Sep 07 09:31:48 2016 -0400
+++ b/cnv-sim.py	Wed Sep 07 09:32:24 2016 -0400
@@ -11,11 +11,23 @@
 from cnvsim.exome_simulator import *
 from cnvsim.genome_simulator import *
 
+class CapitalisedHelpFormatter(argparse.HelpFormatter):
+    def add_usage(self, usage, actions, groups, prefix=None):
+        if prefix is None:
+            prefix = 'Usage: '
+            return super(CapitalisedHelpFormatter, self).add_usage(usage, actions, groups, prefix)
+
 def log(message):
     print '[CNV SIM {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now()) + "] " + message
 
+
 def main():
-    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser = argparse.ArgumentParser(add_help=True, formatter_class=CapitalisedHelpFormatter, \
+                                     description='Generates NGS short reads that encompass copy number variations in whole genome and targeted exome sequencing')
+    parser._positionals.title = 'Positional arguments'
+    parser._optionals.title = 'Optional arguments'
+    parser.add_argument('-v', '--version', action='version', version = 'CNV-Sim v0.9.2', help = "Show program's version number and exit.")
+
     parser.add_argument("simulation_type", type=str, choices=['genome', 'exome'], \
                         help="simulate copy number variations in whole genome or exome regions")
     parser.add_argument("genome", type=file, \
@@ -23,7 +35,7 @@
     parser.add_argument("target", type=file, nargs='?', default=None, \
                         help="path to the target regions file in BED format (if using exome)")
 
-    parser.add_argument("-o", "--output_dir_name",type=str, default="test", \
+    parser.add_argument("-o", "--output_dir_name",type=str, default="simulation_output", \
                         help="a name to be used to create the output directory (overrides existing directory with the same name).")
     parser.add_argument("-n", "--n_reads", type=int, default=10000, \
                         help="total number of reads without variations")
@@ -33,16 +45,20 @@
                         help="path to a CNV list file in BED format chr | start | end | variation. If not passed, it is randomly generated using CNV list parameters below")
 
     cnv_sim_group = parser.add_argument_group('CNV list parameters', "parameters to be used if CNV list is not passed")
-    cnv_sim_group.add_argument("-g", "--regions_count", type=int, default=30, \
-                        help="number of CNV regions to be randomly generated")
-    cnv_sim_group.add_argument("-a", "--amplifications", type=float, default=0.30, \
+    cnv_sim_group.add_argument("-g", "--regions_count", type=int, default=20, \
+                        help="number of CNV regions to be generated randomly")
+    cnv_sim_group.add_argument("-r_min", "--region_minimum_length", type=int, default=1000, \
+                               help="minimum length of each CNV region")
+    cnv_sim_group.add_argument("-r_max", "--region_maximum_length", type=int, default=100000, \
+                               help="maximum length of each CNV region")
+    cnv_sim_group.add_argument("-a", "--amplifications", type=float, default=0.50, \
                         help="percentage of amplifications in range [0.0: 1.0].")
-    cnv_sim_group.add_argument("-d", "--deletions", type=float, default=0.20, \
+    cnv_sim_group.add_argument("-d", "--deletions", type=float, default=0.50, \
                         help="percentage of deletions in range [0.0: 1.0].")
-    cnv_sim_group.add_argument("-min", "--minimum", type=float, default=3, \
-                        help="minimum number of amplifications/deletions introduced")
-    cnv_sim_group.add_argument("-max", "--maximum", type=float, default=10, \
-                        help="maximum number of amplifications/deletions introduced")
+    cnv_sim_group.add_argument("-cn_min", "--copy_number_minimum", type=float, default=3, \
+                        help="minimum level of variations (copy number) introduced")
+    cnv_sim_group.add_argument("-cn_max", "--copy_number_maximum", type=float, default=10, \
+                        help="maximum level of variation (copy number) introduced")
 
     args = parser.parse_args()
 
@@ -64,10 +80,16 @@
 
     cnv_list_parameters = {}
     cnv_list_parameters['regions_count'] = args.regions_count
+    cnv_list_parameters['minimum_length'] = args.region_minimum_length
+    cnv_list_parameters['maximum_length'] = args.region_maximum_length
     cnv_list_parameters['amplifications'] = args.amplifications
     cnv_list_parameters['deletions'] = args.deletions
-    cnv_list_parameters['minimum_variations'] = args.minimum
-    cnv_list_parameters['maximum_variations'] = args.maximum
+    cnv_list_parameters['minimum_variations'] = args.copy_number_minimum
+    cnv_list_parameters['maximum_variations'] = args.copy_number_maximum
+
+    if cnv_list_parameters['amplifications'] + cnv_list_parameters['deletions'] != 1.0:
+        log("ERROR: percentage of amplifications + percentage of deletions must be equal to 1.0")
+        exit()
 
     if simulation_parameters['type'] == 'genome':
         simulate_genome_cnv(simulation_parameters, cnv_list_parameters)