annotate rDiff/src/configure_rdiff.m @ 2:233c30f91d66

updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
author vipints <vipin@cbio.mskcc.org>
date Tue, 08 Oct 2013 07:15:44 -0400
parents 0f80a5141704
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
1 function CFG = configure_rdiff(CFG)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
2 % configure_rdiff(CFG)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
3
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
4 %%% rDiff parameters %%%
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
5
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
6 % Give the filenames of the bam-files to be considered
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
7 CFG.BAM_FILES={'condition_A_replicate_1.bam','condition_A_replicate_2.bam','condition_B_replicate_1.bam','condition_B_replicate_2.bam'};
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
8
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
9 %Name of the experiment. Use the FILENAMES if the entries are empty.
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
10 CFG.NAMES={'A1','A2','B1','B2'};
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
11
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
12
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
13 % Give the directory where the bam-files are
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
14 CFG.data_dir = '' ;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
15
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
16 % Indicate to which sample the bam-files belong
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
17 CFG.SAMPLES=[1,1,2,2];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
18
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
19 % Location of the gene structure
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
20 CFG.genes_path='' ;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
21
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
22 % Output directory
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
23 CFG.out_base = '' ;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
24
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
25 % Output directory for temporary files
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
26 CFG.out_base_temp = '' ;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
27
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
28 %Length of the reads
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
29 CFG.sequenced_length=75;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
30
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
31 % Prefix for the chromosome name when getting geetting reads from
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
32 % the bam-files
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
33 CFG.chr_prefix='';
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
34
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
35 %%% Read filters %%%
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
36
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
37 % Minimal read length
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
38 CFG.min_read_length=30;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
39
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
40
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
41
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
42 %%% Parameters for gene expression estimation
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
43 %Count the number of reads ( CFG.estimate_gene_expression=1 for yes
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
44 %give the Files for the expresison in CFG.GENE_EXPR_FILES
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
45 CFG.estimate_gene_expression=1;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
46
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
47 % Use the following files in CFG.GENE_EXPR_FILES for the
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
48 % gene_expression. Those must be Tab-delimitered files where each
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
49 % line contains the gene name folowed by the expressiob
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
50 CFG.Counts_gene_expression='';
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
51 CFG.Counts_rDiff_parametric='';
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
52 CFG.Counts_rDiff_nonparametric='';
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
53
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
54
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
55
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
56 %%% Parameters for variance function
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
57
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
58 % Use a parametric form for the variance function for sample 1: sigma= a + bx + cx^2
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
59 % (CFG.predefined_variance_function1=[] if not; CFG.predefined_variance_function1=[a,b,c] otherwise)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
60 % If CFG.predefined_variance_function1=[a,b,c] is given, the other
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
61 % parameters for the variance function estimations are ignored for
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
62 % sample 1
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
63 CFG.predefined_variance_function1=[];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
64
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
65 % Use a parametric form for the variance function for sample 2: sigma= a + bx + cx^2
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
66 % (CFG.predefined_variance_function2=[] if not; CFG.predefined_variance_function2=[a,b,c] otherwise)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
67 % If CFG.predefined_variance_function2=[a,b,c] is given, the other
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
68 % parameters for the variance function estimations are ignored
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
69 % for sample 2
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
70 CFG.predefined_variance_function2=[];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
71
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
72 % compute variance function for sample 1 ( 1 = yes , 0 = use precomputed
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
73 % variance function saved under CFG.variance_function_1)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
74 CFG.compute_variance_function_1=1;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
75 CFG.variance_function_1='';
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
76 CFG.save_variance_function_1='variance_function_1.mat';
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
77
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
78 % compute variance function for sample 2 ( 1 = yes , 0 = use precomputed
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
79 % variance function saved under CFG.variance_function2)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
80 CFG.compute_variance_function_2=1;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
81 CFG.variance_function_2='';
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
82 CFG.save_variance_function_2='variance_function_2.mat';
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
83
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
84 % subsample points for the variance function estimate for rDiff.nonparametric
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
85 CFG.rDiff_nonparametric_subsample_variance_estimation=10000;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
86
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
87 % Subsample the mean-variance pairs to increas the speed of the
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
88 % local regression.CFG.variance_samplebins is the number of bins to
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
89 % use and CFG.variance_samples_per_bin is how many samples should
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
90 % be drwan per bin
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
91 CFG.variance_samplebins=100;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
92 CFG.variance_samples_per_bin=500;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
93
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
94
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
95
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
96 %%% Testing parameters %%%
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
97
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
98 % subsample reads down to rDiff.subsample to increase speed ( If no
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
99 % subsampling shall be done set CFG.rDiff_subsample to 0
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
100 CFG.rDiff_subsample=10000;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
101
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
102 % Clib the first CFG.bases_to_clip bases at the end of the reads
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
103 CFG.bases_to_clip=3;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
104
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
105 %Number of bootraps for nonparametric test
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
106 CFG.bootstraps=1000;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
107
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
108 %Number of bins for variance matching
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
109 CFG.nr_of_slices=10;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
110
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
111 % Tests to perform
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
112 CFG.perform_nonparametric=0;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
113 CFG.perform_parametric=0;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
114 CFG.perform_mmd=0;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
115 CFG.perform_poisson=0;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
116
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
117
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
118 %%%%% rproc settings %%%%%
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
119 CFG.use_rproc = 0; % 1: cluster submission or 0: locally
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
120 if CFG.use_rproc,
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
121 CFG.rproc_num_jobs = 100;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
122 CFG.rproc_memreq = 8000;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
123 CFG.rproc_par.priority = 55;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
124 CFG.rproc_par.resubmit = 3;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
125 CFG.rproc_par.mem_req_resubmit = [ 24000 40000 60000];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
126 CFG.rproc_par.time_req_resubmit = [60*60 100*60 90*60];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
127 CFG.rproc_par.express = 0;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
128 CFG.rproc_par.immediately_bg = 0;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
129 CFG.rproc_par.immediately = 0;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
130 CFG.rproc_par.arch = 64;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
131 CFG.rproc_par.identifier = '';
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
132 CFG.rproc_par.verbosity = 0;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
133 CFG.rproc_time = 15*60; % mins
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
134 else
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
135 CFG.rproc_num_jobs = 1;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
136 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
137
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
138
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
139
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
140
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
141