annotate cnv-sim.py @ 17:e4ebf3435054 draft

Uploaded
author ahosny
date Wed, 07 Sep 2016 09:36:31 -0400
parents e0f5a71e94ed
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
1 #!/usr/bin/python
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
2
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
3 __author__ = 'Abdelrahman Hosny'
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
4
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
5 import os.path
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
6 import datetime
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
7 import argparse
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
8 import shutil
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
9
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
10 from cnvsim.fileio import *
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
11 from cnvsim.exome_simulator import *
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
12 from cnvsim.genome_simulator import *
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
13
13
e0f5a71e94ed Uploaded
ahosny
parents: 4
diff changeset
14 class CapitalisedHelpFormatter(argparse.HelpFormatter):
e0f5a71e94ed Uploaded
ahosny
parents: 4
diff changeset
15 def add_usage(self, usage, actions, groups, prefix=None):
e0f5a71e94ed Uploaded
ahosny
parents: 4
diff changeset
16 if prefix is None:
e0f5a71e94ed Uploaded
ahosny
parents: 4
diff changeset
17 prefix = 'Usage: '
e0f5a71e94ed Uploaded
ahosny
parents: 4
diff changeset
18 return super(CapitalisedHelpFormatter, self).add_usage(usage, actions, groups, prefix)
e0f5a71e94ed Uploaded
ahosny
parents: 4
diff changeset
19
4
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
20 def log(message):
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
21 print '[CNV SIM {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now()) + "] " + message
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
22
13
e0f5a71e94ed Uploaded
ahosny
parents: 4
diff changeset
23
4
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
24 def main():
13
e0f5a71e94ed Uploaded
ahosny
parents: 4
diff changeset
25 parser = argparse.ArgumentParser(add_help=True, formatter_class=CapitalisedHelpFormatter, \
e0f5a71e94ed Uploaded
ahosny
parents: 4
diff changeset
26 description='Generates NGS short reads that encompass copy number variations in whole genome and targeted exome sequencing')
e0f5a71e94ed Uploaded
ahosny
parents: 4
diff changeset
27 parser._positionals.title = 'Positional arguments'
e0f5a71e94ed Uploaded
ahosny
parents: 4
diff changeset
28 parser._optionals.title = 'Optional arguments'
e0f5a71e94ed Uploaded
ahosny
parents: 4
diff changeset
29 parser.add_argument('-v', '--version', action='version', version = 'CNV-Sim v0.9.2', help = "Show program's version number and exit.")
e0f5a71e94ed Uploaded
ahosny
parents: 4
diff changeset
30
4
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
31 parser.add_argument("simulation_type", type=str, choices=['genome', 'exome'], \
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
32 help="simulate copy number variations in whole genome or exome regions")
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
33 parser.add_argument("genome", type=file, \
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
34 help="path to the referece genome file in FASTA format ")
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
35 parser.add_argument("target", type=file, nargs='?', default=None, \
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
36 help="path to the target regions file in BED format (if using exome)")
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
37
13
e0f5a71e94ed Uploaded
ahosny
parents: 4
diff changeset
38 parser.add_argument("-o", "--output_dir_name",type=str, default="simulation_output", \
4
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
39 help="a name to be used to create the output directory (overrides existing directory with the same name).")
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
40 parser.add_argument("-n", "--n_reads", type=int, default=10000, \
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
41 help="total number of reads without variations")
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
42 parser.add_argument("-l", "--read_length", type=int, default=100, \
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
43 help="read length (bp)")
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
44 parser.add_argument("--cnv_list", type=file, default=None, \
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
45 help="path to a CNV list file in BED format chr | start | end | variation. If not passed, it is randomly generated using CNV list parameters below")
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
46
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
47 cnv_sim_group = parser.add_argument_group('CNV list parameters', "parameters to be used if CNV list is not passed")
13
e0f5a71e94ed Uploaded
ahosny
parents: 4
diff changeset
48 cnv_sim_group.add_argument("-g", "--regions_count", type=int, default=20, \
e0f5a71e94ed Uploaded
ahosny
parents: 4
diff changeset
49 help="number of CNV regions to be generated randomly")
e0f5a71e94ed Uploaded
ahosny
parents: 4
diff changeset
50 cnv_sim_group.add_argument("-r_min", "--region_minimum_length", type=int, default=1000, \
e0f5a71e94ed Uploaded
ahosny
parents: 4
diff changeset
51 help="minimum length of each CNV region")
e0f5a71e94ed Uploaded
ahosny
parents: 4
diff changeset
52 cnv_sim_group.add_argument("-r_max", "--region_maximum_length", type=int, default=100000, \
e0f5a71e94ed Uploaded
ahosny
parents: 4
diff changeset
53 help="maximum length of each CNV region")
e0f5a71e94ed Uploaded
ahosny
parents: 4
diff changeset
54 cnv_sim_group.add_argument("-a", "--amplifications", type=float, default=0.50, \
4
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
55 help="percentage of amplifications in range [0.0: 1.0].")
13
e0f5a71e94ed Uploaded
ahosny
parents: 4
diff changeset
56 cnv_sim_group.add_argument("-d", "--deletions", type=float, default=0.50, \
4
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
57 help="percentage of deletions in range [0.0: 1.0].")
13
e0f5a71e94ed Uploaded
ahosny
parents: 4
diff changeset
58 cnv_sim_group.add_argument("-cn_min", "--copy_number_minimum", type=float, default=3, \
e0f5a71e94ed Uploaded
ahosny
parents: 4
diff changeset
59 help="minimum level of variations (copy number) introduced")
e0f5a71e94ed Uploaded
ahosny
parents: 4
diff changeset
60 cnv_sim_group.add_argument("-cn_max", "--copy_number_maximum", type=float, default=10, \
e0f5a71e94ed Uploaded
ahosny
parents: 4
diff changeset
61 help="maximum level of variation (copy number) introduced")
4
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
62
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
63 args = parser.parse_args()
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
64
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
65 simulation_parameters = {}
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
66 simulation_parameters['type'] = args.simulation_type
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
67 simulation_parameters['genome_file'] = args.genome.name
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
68 if args.target is not None:
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
69 simulation_parameters['target_file'] = args.target.name
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
70 else:
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
71 simulation_parameters['target_file'] = None
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
72 simulation_parameters['output_dir'] = os.path.join(os.getcwd(), args.output_dir_name)
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
73 simulation_parameters['number_of_reads'] = args.n_reads
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
74 simulation_parameters['read_length'] = args.read_length
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
75 if args.cnv_list is not None:
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
76 simulation_parameters['cnv_list_file'] = args.cnv_list.name
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
77 else:
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
78 simulation_parameters['cnv_list_file'] = None
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
79 simulation_parameters['tmp_dir'] = os.path.join(os.getcwd(), args.output_dir_name , "tmp")
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
80
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
81 cnv_list_parameters = {}
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
82 cnv_list_parameters['regions_count'] = args.regions_count
13
e0f5a71e94ed Uploaded
ahosny
parents: 4
diff changeset
83 cnv_list_parameters['minimum_length'] = args.region_minimum_length
e0f5a71e94ed Uploaded
ahosny
parents: 4
diff changeset
84 cnv_list_parameters['maximum_length'] = args.region_maximum_length
4
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
85 cnv_list_parameters['amplifications'] = args.amplifications
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
86 cnv_list_parameters['deletions'] = args.deletions
13
e0f5a71e94ed Uploaded
ahosny
parents: 4
diff changeset
87 cnv_list_parameters['minimum_variations'] = args.copy_number_minimum
e0f5a71e94ed Uploaded
ahosny
parents: 4
diff changeset
88 cnv_list_parameters['maximum_variations'] = args.copy_number_maximum
e0f5a71e94ed Uploaded
ahosny
parents: 4
diff changeset
89
e0f5a71e94ed Uploaded
ahosny
parents: 4
diff changeset
90 if cnv_list_parameters['amplifications'] + cnv_list_parameters['deletions'] != 1.0:
e0f5a71e94ed Uploaded
ahosny
parents: 4
diff changeset
91 log("ERROR: percentage of amplifications + percentage of deletions must be equal to 1.0")
e0f5a71e94ed Uploaded
ahosny
parents: 4
diff changeset
92 exit()
4
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
93
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
94 if simulation_parameters['type'] == 'genome':
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
95 simulate_genome_cnv(simulation_parameters, cnv_list_parameters)
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
96 else:
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
97 simulate_exome_cnv(simulation_parameters, cnv_list_parameters)
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
98
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
99
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
100 if __name__ == '__main__':
4a4d2b78eb55 Main Python Code
ahosny
parents:
diff changeset
101 main()