4
|
1 #!/usr/bin/python
|
|
2
|
|
3 __author__ = 'Abdelrahman Hosny'
|
|
4
|
|
5 import os.path
|
|
6 import datetime
|
|
7 import argparse
|
|
8 import shutil
|
|
9
|
|
10 from cnvsim.fileio import *
|
|
11 from cnvsim.exome_simulator import *
|
|
12 from cnvsim.genome_simulator import *
|
|
13
|
|
14 def log(message):
|
|
15 print '[CNV SIM {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now()) + "] " + message
|
|
16
|
|
17 def main():
|
|
18 parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
|
19 parser.add_argument("simulation_type", type=str, choices=['genome', 'exome'], \
|
|
20 help="simulate copy number variations in whole genome or exome regions")
|
|
21 parser.add_argument("genome", type=file, \
|
|
22 help="path to the referece genome file in FASTA format ")
|
|
23 parser.add_argument("target", type=file, nargs='?', default=None, \
|
|
24 help="path to the target regions file in BED format (if using exome)")
|
|
25
|
|
26 parser.add_argument("-o", "--output_dir_name",type=str, default="test", \
|
|
27 help="a name to be used to create the output directory (overrides existing directory with the same name).")
|
|
28 parser.add_argument("-n", "--n_reads", type=int, default=10000, \
|
|
29 help="total number of reads without variations")
|
|
30 parser.add_argument("-l", "--read_length", type=int, default=100, \
|
|
31 help="read length (bp)")
|
|
32 parser.add_argument("--cnv_list", type=file, default=None, \
|
|
33 help="path to a CNV list file in BED format chr | start | end | variation. If not passed, it is randomly generated using CNV list parameters below")
|
|
34
|
|
35 cnv_sim_group = parser.add_argument_group('CNV list parameters', "parameters to be used if CNV list is not passed")
|
|
36 cnv_sim_group.add_argument("-g", "--regions_count", type=int, default=30, \
|
|
37 help="number of CNV regions to be randomly generated")
|
|
38 cnv_sim_group.add_argument("-a", "--amplifications", type=float, default=0.30, \
|
|
39 help="percentage of amplifications in range [0.0: 1.0].")
|
|
40 cnv_sim_group.add_argument("-d", "--deletions", type=float, default=0.20, \
|
|
41 help="percentage of deletions in range [0.0: 1.0].")
|
|
42 cnv_sim_group.add_argument("-min", "--minimum", type=float, default=3, \
|
|
43 help="minimum number of amplifications/deletions introduced")
|
|
44 cnv_sim_group.add_argument("-max", "--maximum", type=float, default=10, \
|
|
45 help="maximum number of amplifications/deletions introduced")
|
|
46
|
|
47 args = parser.parse_args()
|
|
48
|
|
49 simulation_parameters = {}
|
|
50 simulation_parameters['type'] = args.simulation_type
|
|
51 simulation_parameters['genome_file'] = args.genome.name
|
|
52 if args.target is not None:
|
|
53 simulation_parameters['target_file'] = args.target.name
|
|
54 else:
|
|
55 simulation_parameters['target_file'] = None
|
|
56 simulation_parameters['output_dir'] = os.path.join(os.getcwd(), args.output_dir_name)
|
|
57 simulation_parameters['number_of_reads'] = args.n_reads
|
|
58 simulation_parameters['read_length'] = args.read_length
|
|
59 if args.cnv_list is not None:
|
|
60 simulation_parameters['cnv_list_file'] = args.cnv_list.name
|
|
61 else:
|
|
62 simulation_parameters['cnv_list_file'] = None
|
|
63 simulation_parameters['tmp_dir'] = os.path.join(os.getcwd(), args.output_dir_name , "tmp")
|
|
64
|
|
65 cnv_list_parameters = {}
|
|
66 cnv_list_parameters['regions_count'] = args.regions_count
|
|
67 cnv_list_parameters['amplifications'] = args.amplifications
|
|
68 cnv_list_parameters['deletions'] = args.deletions
|
|
69 cnv_list_parameters['minimum_variations'] = args.minimum
|
|
70 cnv_list_parameters['maximum_variations'] = args.maximum
|
|
71
|
|
72 if simulation_parameters['type'] == 'genome':
|
|
73 simulate_genome_cnv(simulation_parameters, cnv_list_parameters)
|
|
74 else:
|
|
75 simulate_exome_cnv(simulation_parameters, cnv_list_parameters)
|
|
76
|
|
77
|
|
78 if __name__ == '__main__':
|
|
79 main() |