annotate GALAXY_FILES/tools/EMBER/Ember_Galaxy.c @ 3:037c3edda16e

Uploaded
author mmaiensc
date Thu, 22 Mar 2012 13:49:52 -0400
parents 003f802d4c7d
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1 /*
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
2 * ember.c : Expectation Maximization of Binding and Expression pRofiles
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
3 * ----------------------------------------------------------------
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
4 * algorithm adapted from MEME algorithm: Bailey and Elkan, Proc Int Conf Intell Syst Mol Biol, 2:28-36 (1994).
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
5 * See Eqs. 3-13, plus "Implementation of MM" section for detailed description
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
6 * changes are as follows:
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
7 * background changes with column (j=1,...,numclasses), so it is a 2D histogram of the same size as the motif model; same for psuedocounts
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
8 * background model is determined by full list of genes (second input); z-values of genes in first input are subtracted
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
9 * update of background can be turned off by commenting out 8th line of z_update()
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
10 * in this case, also comment out index_genes at end of initialize(), as it will save some time
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
11 * sequences cannot overlap, since they are fixed per gene; this simplifies the enumeration of all possibilities
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
12 * a ZOOPS options is included (change in code, parameter zoops); this does not alter the algorithm,
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
13 * only adds a restriction on which genes are chosen as targets in choose_targets()
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
14 * general notes:
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
15 * the common dimensions are:
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
16 * numpeaks - number of peaks, size of peaklist, indexed by m
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
17 * peaklist[m].numgenes - number of genes matched to a peak, size of peaklist[m].genelist, indexed by n
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
18 * numclasses - number of comparisons made (i.e. motif width), indexed by j
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
19 * 5 - number of classifications in each comparison, assumed to be -2,-1,0,1,2, indexed by k
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
20 * 2 - number of models (motif=0 and background=1), indexed by l
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
21 * nmots - total number of motifs to find, indexed by i in main (always stored at l=0, as found motifs are erased)
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
22 * col_tog can be used to force the algorithm to ignore certain columns (behavior dimensions)
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
23 *
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
24 * things to add/consider:
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
25 * - best measure of motif quality score?
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
26 * - want to ensure that the same sequence can't get assigned to more than one motif?
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
27 */
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
28
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
29 #include "all.h"
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
30
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
31 typedef struct {
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
32 char id[50];
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
33 char name[500];
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
34 char chrom[50];
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
35 long start;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
36 long end;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
37 int sense;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
38 double z;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
39 double erase;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
40 double score;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
41 int index; // will refer to index of this id in bklist
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
42 int used; // set to 0 if a non-classified gene
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
43 int assigned; // set to 1 if assigned
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
44 int *values;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
45 } gene;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
46
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
47 typedef struct {
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
48 char info[5000];
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
49 char chrom[50];
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
50 long pkposn;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
51 int numgenes;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
52 int numused;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
53 int assgngene; // set to # targets picked for this gene
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
54 gene *genelist;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
55 } peak;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
56
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
57 // FREE PARAMETERS TO CHOOSE IN COMMAND LINE
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
58 double mu; // scale background histogram by mu for psuedocounts
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
59 double tscale; // scales the threshold (to make choices more or less stringent)
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
60 int zoops; // set to 1 to enforce ZOOPS model (Zero Or One Per Strand)
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
61 int printall; // set to 1 to print all genes, not just targets
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
62 int mots; // number of motifs to search for
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
63 int repeat; // number of times to wean columns and repeat search
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
64 double rsca; // percent of average to set relative entropy cutoff
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
65 // END PARAMETERS TO CHOOSE
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
66
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
67 char *input, *background, *pref, *togs;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
68 char *target_out, *list_out, *model_out, *annot_out;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
69 long iseedval;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
70 int numpeaks, numclasses, usedclasses, numbk, bkused, nmots;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
71 peak *peaklist;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
72 gene *bklist;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
73 double ***hist, ***oldhist, lambda, t, **beta;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
74 double *Beta;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
75 int *col_tog, col_togl, set_togs, *col_tog_cpy;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
76 int verbose;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
77
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
78 void read_cmd_line( int argc, char **argv );
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
79 void print_usage();
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
80 void read_data( char *filename );
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
81 void read_bkgrnd( char *filename );
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
82 void initialize( long *seed );
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
83 void reinitialize( long *seed );
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
84 void beta_update();
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
85 void index_genes();
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
86
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
87 void hist_update();
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
88 void bkhist_update();
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
89 void lambda_update();
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
90 void toggle_update();
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
91 void toggle_reset();
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
92 void z_update();
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
93 void erase_update();
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
94 double lprob( int m, int n, int l );
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
95 double prob( int m, int n, int l );
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
96 void copy_hist();
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
97 double hist_diff();
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
98 double score( int m, int n );
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
99 void t_update();
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
100 double ElogL();
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
101 double ratio_score();
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
102 double rel_entropy();
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
103 double col_rel_entropy( int j );
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
104 double chisq();
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
105 int motif_count();
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
106
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
107 int assign_gene( int m, long *seed );
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
108 void score_genes();
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
109 void choose_targets();
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
110
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
111 void print_peak( FILE *fp, int i );
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
112 void print_gene( FILE *fp, int i, int j );
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
113 void print_state( FILE *fp, char *filename );
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
114 //void print_targets( FILE *fp, char *filename );
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
115 void print_model( FILE *fp, char *filename );
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
116 //void print_annotpeak( FILE *fp, char *filename, int motif );
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
117 char *nice_d( double val );
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
118
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
119 int main(int argc, char *argv[]){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
120 int i,iter,r;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
121 int pfreq;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
122 double diff, conv = 1e-6;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
123 long seed;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
124 FILE *fout;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
125
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
126 if(argc == 1 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
127 print_usage();
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
128 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
129
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
130 /*
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
131 * read command line params
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
132 */
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
133 read_cmd_line( argc, argv );
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
134
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
135 /*
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
136 * allocate output names
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
137 */
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
138 // target_out = (char *) malloc((strlen(pref)+20)*sizeof(char));
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
139 // list_out = (char *) malloc((strlen(pref)+20)*sizeof(char));
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
140 // model_out = (char *) malloc((strlen(pref)+20)*sizeof(char));
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
141 // annot_out = (char *) malloc((strlen(pref)+20)*sizeof(char));
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
142
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
143 /*
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
144 * read in data, copy values from global to local variables
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
145 */
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
146 if( verbose != 0 ) printf("\nReading data and allocating\n");
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
147 read_data( input );
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
148 read_bkgrnd( background );
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
149 nmots = mots;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
150 seed = iseedval;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
151
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
152 /*
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
153 * allocate memory for other arrays
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
154 */
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
155 hist = (double ***) alloc3d(sizeof(double),2,numclasses,5);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
156 oldhist = (double ***) alloc3d(sizeof(double),2,numclasses,5);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
157 beta = (double **) alloc2d(sizeof(double),numclasses,5);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
158 Beta = (double *) malloc(numclasses*sizeof(double));
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
159
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
160 /*
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
161 * find nmots motif models
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
162 */
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
163 for(i=1; i<= nmots; i++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
164 /*
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
165 * initialize, fill initial parameter values from z-guesses
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
166 */
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
167 if( verbose != 0 ) printf("Expression Pattern %i,\n Initializing\n", i);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
168 if( i==1 ) initialize( &seed );
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
169 else reinitialize( &seed );
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
170 hist_update();
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
171 lambda_update();
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
172
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
173 /*
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
174 * now sample for awhile, repeating repeat times
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
175 * each repeat starts at the previously refined search (?)
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
176 */
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
177 for(r=0; r<= repeat; r++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
178 if( verbose != 0 ) printf(" Sampling\n");
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
179 if( verbose != 0 && repeat> 0 ) printf(" Repeat %i\n", r );
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
180 diff = 1;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
181 pfreq = 10;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
182 iter=1;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
183 while( diff > conv ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
184 z_update(); // E step
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
185 copy_hist();
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
186 hist_update(); // M step
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
187 bkhist_update(); // M step
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
188 lambda_update(); // M step
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
189 diff = hist_diff();
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
190 if( iter%pfreq == 0 && verbose != 0 ) printf(" iteration %i, (normalized) ElogL %.2f, change %.2e\n", iter, ElogL(), diff);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
191 iter++;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
192 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
193 if( repeat > 0 && r< repeat ) toggle_update();
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
194 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
195
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
196 /*
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
197 * score and choose final targets
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
198 */
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
199 t_update();
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
200 if( verbose != 0 ) printf(" Done, scoring and choosing targets on threshold %.3f\n\n", t);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
201 score_genes();
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
202 choose_targets();
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
203
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
204 /*
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
205 * print final targets
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
206 */
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
207 // sprintf( target_out,"%s-%i.targets", pref, i);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
208 // sprintf( list_out,"%s-%i.list", pref, i);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
209 // sprintf( model_out,"%s-%i.model", pref, i);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
210 // sprintf( annot_out,"%s-%i.xls", pref, i);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
211 print_state(fout, target_out);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
212 print_model(fout, model_out);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
213 // print_targets(fout, list_out);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
214 // print_annotpeak( fout, annot_out, i);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
215
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
216 /*
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
217 * erase found motifs (only if we're going around again)
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
218 */
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
219 if( i< nmots ) erase_update();
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
220 if( repeat > 0 ) toggle_reset();
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
221 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
222
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
223 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
224
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
225 /*==================== FUNCTIONS ======================*/
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
226
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
227 /*
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
228 * read command line inputs and options
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
229 */
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
230 void read_cmd_line( int argc, char **argv ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
231 int i, j;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
232 int found_reqs=0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
233
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
234 // default values
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
235 mots = 1;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
236 zoops = 0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
237 printall = 0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
238 mu = 0.1;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
239 tscale = 1.0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
240 iseedval = make_seed();
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
241 set_togs = 0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
242 verbose = 1;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
243 repeat = 0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
244 rsca = 1.0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
245
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
246 // parse options
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
247 for(i=1; i< argc; i++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
248 if( argv[i][0] != '-' ) print_usage();
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
249 i++;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
250 if( i >= argc || argv[i][0] == '-' ) print_usage();
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
251 switch( argv[i-1][1] ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
252 case 'i':
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
253 input = (char *) malloc((strlen(argv[i])+5)*sizeof(char));
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
254 strcpy(input, argv[i]);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
255 found_reqs++;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
256 break;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
257 case 'b':
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
258 background = (char *) malloc((strlen(argv[i])+5)*sizeof(char));
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
259 strcpy(background, argv[i]);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
260 found_reqs++;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
261 break;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
262 case 'o':
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
263 target_out = (char *) malloc((strlen(argv[i])+5)*sizeof(char));
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
264 strcpy(target_out, argv[i]);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
265 found_reqs++;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
266 break;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
267 case 'p':
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
268 model_out = (char *) malloc((strlen(argv[i])+5)*sizeof(char));
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
269 strcpy(model_out, argv[i]);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
270 found_reqs++;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
271 break;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
272 case 'n':
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
273 mots = atoi(argv[i]);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
274 if( mots <= 0 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
275 printf("\nError: choose number of expression patterns to be at least 1\n");
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
276 print_usage();
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
277 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
278 break;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
279 case 'z':
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
280 if( argv[i][0] == 'y' ) zoops = 1;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
281 else if( argv[i][0] == 'n' ) zoops = 0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
282 else{
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
283 printf("\nError: if using -z option, choose y or n\n");
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
284 print_usage();
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
285 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
286 break;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
287 case 'v':
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
288 if( argv[i][0] == 'y' ) verbose = 1;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
289 else if( argv[i][0] == 'n' ) verbose = 0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
290 else{
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
291 printf("\nError: if using -v option, choose y or n\n");
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
292 print_usage();
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
293 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
294 break;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
295 case 'm':
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
296 mu = atof(argv[i]);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
297 if( mu<= 0 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
298 printf("\nError: choose mu to be positive\n");
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
299 print_usage();
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
300 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
301 break;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
302 case 't':
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
303 tscale = atof(argv[i]);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
304 if( tscale<= 0 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
305 printf("\nError: choose threshold to be positive\n");
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
306 print_usage();
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
307 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
308 break;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
309 case 's':
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
310 iseedval = -1*atol(argv[i]);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
311 if( iseedval>= 0 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
312 printf("\nError: choose seed to be a large positive integer\n");
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
313 print_usage();
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
314 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
315 break;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
316 case 'c':
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
317 set_togs = 1;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
318 togs = (char *) malloc((strlen(argv[i])+5)*sizeof(char));
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
319 strcpy( togs, argv[i] );
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
320 break;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
321 case 'a':
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
322 if( argv[i][0] == 'y' ) printall = 1;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
323 else if( argv[i][0] == 'n' ) printall = 0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
324 else{
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
325 printf("\nError: if using -a option, choose y or n\n");
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
326 print_usage();
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
327 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
328 break;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
329 case 'r':
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
330 repeat = atoi(argv[i]);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
331 if( repeat< 0 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
332 printf("\nError: choose repeat to be a non-negative integer\n");
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
333 print_usage();
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
334 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
335 break;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
336 case 'R':
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
337 rsca = atof(argv[i]);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
338 if( rsca <= 0 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
339 printf("\nError: choose repeat scalar to be positive\n");
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
340 print_usage();
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
341 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
342 break;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
343 default:
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
344 printf("\nError: unkown option: -%c\n", argv[i-1][1]);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
345 print_usage();
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
346 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
347 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
348 // double check that the correct number of required inputs has been read in
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
349 if( found_reqs != 4 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
350 printf("\nError: improper number of required inputs found\n");
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
351 print_usage();
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
352 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
353 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
354
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
355 /*
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
356 * usage info
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
357 */
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
358 void print_usage(){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
359 printf("\nUsage: -i matched_data -b background_data -o target_output_name -p model_output_name [option value]\n");
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
360 printf("Options:\n");
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
361 printf(" -n number of expression patterns to find (positive integer, default 1)\n");
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
362 printf(" -z enforce ZOOPS model (y or n, default n)\n");
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
363 printf(" -m scalar for psuedocounts (positive real number, default 0.1)\n");
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
364 printf(" -t scalar for threshold (positive real number, default 1.0)\n");
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
365 printf(" -s seed (positive long integer, default chosen from date and time)\n");
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
366 printf(" -c column toggle file, used to ignore certain classifications (default: all on)\n");
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
367 printf(" -v verboseness, turn on/off printing to stdout (y or n, default: y)\n");
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
368 printf(" -a print all genes (and scores), rather than only those above the threshold (y or n, default: n)\n");
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
369 printf(" -r repeat search: after convergence of each expression pattern, turn off columns with lower relative entropy\n");
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
370 printf(" than average*scalar and repeat expression pattern search (non-negative integer, default 0)\n");
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
371 printf(" -R relative entropy scalar: if using the -r option, multiplies the average relative entropy\n");
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
372 printf(" by this value to set the threshold for turning off columns (positive real number, default 1.0)\n");
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
373 printf("\n");
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
374 exit(0);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
375 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
376
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
377 /*
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
378 * function: read data from filename into the peaklist data structure
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
379 * notes on assumed format of data:
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
380 * peak lines start with "PEAK:" field
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
381 * gene lines start with "GENE:" field
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
382 * gene lines with only 1 classification are ignored
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
383 * assume all fields are <5000 characters
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
384 */
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
385 void read_data( char *filename ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
386 int j,m,n;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
387 char val[5000], other[5000];
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
388 FILE *fp;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
389
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
390 /*
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
391 * first count number of peaks, and allocate
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
392 */
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
393 fp = fopen(filename,"rt");
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
394 if( fp == NULL ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
395 printf("Error: can't open file %s\n", filename);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
396 exit(0);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
397 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
398 numpeaks = 0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
399 while( fscanf(fp, "%s", &val ) != EOF ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
400 if( contains( val, "PEAK" ) == 1 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
401 numpeaks++;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
402 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
403 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
404 fclose(fp);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
405 peaklist = (peak *) malloc(numpeaks*sizeof(peak));
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
406
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
407 /*
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
408 * now count number of genes w/>1 classification, and allocate
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
409 * also count number of classifications, and allocate that at the end
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
410 * n tracks number of genes, j field; subtract genes that don't have full classifications
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
411 * m tracks peak index; record peak info
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
412 */
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
413 fp = fopen(filename,"rt");
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
414 n=-1;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
415 m=-1;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
416 j=0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
417 sprintf(other,"NA");
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
418 numclasses=0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
419 while( fscanf(fp, "%s", &val ) != EOF ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
420 if( contains( val, "PEAK" ) == 1 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
421 if( j==7 ) ;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
422 else{
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
423 if( j-6 > numclasses ) numclasses = j-6;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
424 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
425 if( m>=0 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
426 peaklist[m].numgenes = n;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
427 peaklist[m].genelist = (gene *) malloc( peaklist[m].numgenes*sizeof(gene));
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
428 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
429 n=0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
430 j=0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
431 if( strcmp(other, "NA") != 0 ) strcpy(peaklist[m].info, other);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
432 m++;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
433 sprintf(other,"NA");
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
434 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
435 else if( contains( val, "GENE" ) == 1 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
436 n++;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
437 j=0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
438 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
439 else if( n==0 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
440 if( j==0 ) strcpy(peaklist[m].chrom, val);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
441 if( j==1 ) peaklist[m].pkposn = atol(val);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
442 if( j> 1 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
443 if( strcmp(other, "NA") == 0 ) strcpy(other, val);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
444 else sprintf(other,"%s %s", other, val);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
445 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
446 j++;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
447 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
448 else{
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
449 j++;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
450 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
451 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
452 peaklist[m].numgenes = n;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
453 peaklist[m].genelist = (gene *) malloc( peaklist[m].numgenes*sizeof(gene));
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
454 strcpy( peaklist[m].info, other);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
455 fclose(fp);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
456
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
457 for(m=0; m< numpeaks; m++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
458 for(n=0; n< peaklist[m].numgenes; n++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
459 peaklist[m].genelist[n].values = (int *) malloc(numclasses*sizeof(int));
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
460 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
461 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
462
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
463 /*
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
464 * now read in gene info
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
465 */
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
466 fp = fopen(filename,"rt");
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
467 m=-1;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
468 j=0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
469 while( fscanf(fp, "%s", &val ) != EOF ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
470 if( contains( val, "PEAK" ) == 1 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
471 if( j==7 ) peaklist[m].genelist[n].used = 0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
472 if( j==6+numclasses) peaklist[m].genelist[n].used = 1;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
473 n=-1;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
474 j=0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
475 m++;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
476 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
477 else if( contains( val, "GENE" ) == 1 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
478 if( j==7 ) peaklist[m].genelist[n].used = 0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
479 if( j==6+numclasses ) peaklist[m].genelist[n].used = 1;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
480 n++;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
481 j=0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
482 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
483 else if( n==-1 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
484 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
485 else {
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
486 if( j==0 ) strcpy( peaklist[m].genelist[n].id, val );
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
487 if( j==1 ) strcpy( peaklist[m].genelist[n].name, val );
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
488 if( j==2 ) strcpy( peaklist[m].genelist[n].chrom, val );
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
489 if( j==3 ) peaklist[m].genelist[n].start = atol(val);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
490 if( j==4 ) peaklist[m].genelist[n].end = atol(val);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
491 if( j==5 ) peaklist[m].genelist[n].sense = atoi(val);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
492 if( j>=6 ) peaklist[m].genelist[n].values[j-6] = atoi(val);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
493 j++;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
494 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
495 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
496 if( j==7 ) peaklist[m].genelist[n].used = 0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
497 if( j==6+numclasses) peaklist[m].genelist[n].used = 1;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
498 fclose(fp);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
499
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
500 /*
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
501 * now count how many genes are to be used for each
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
502 */
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
503 for(m=0; m< numpeaks; m++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
504 peaklist[m].numused = 0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
505 for(n=0; n< peaklist[m].numgenes; n++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
506 peaklist[m].numused+= peaklist[m].genelist[n].used;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
507 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
508 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
509 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
510
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
511 void read_bkgrnd( char *filename ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
512 int j,n;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
513 char val[500];
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
514 FILE *fp;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
515
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
516 /*
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
517 * first count number of genes, and allocate
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
518 */
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
519 numbk = count_lines( fp, filename );;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
520 bklist = (gene *) malloc(numbk*sizeof(gene));
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
521
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
522 for(n=0; n< numbk; n++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
523 bklist[n].values = (int *) malloc(numclasses*sizeof(int));
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
524 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
525
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
526 /*
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
527 * now read in gene info
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
528 */
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
529 fp = fopen(filename,"rt");
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
530 n=-1;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
531 j=0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
532 while( fscanf(fp, "%s", &val ) != EOF ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
533 if( contains( val, "_a" ) == 1 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
534 if( j==7 ) bklist[n].used = 0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
535 if( j==6+numclasses ) bklist[n].used = 1;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
536 j=1;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
537 n++;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
538 strcpy( bklist[n].id, val );
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
539 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
540 else{
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
541 if( j==1 ) strcpy( bklist[n].name, val );
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
542 if( j==2 ) strcpy( bklist[n].chrom, val );
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
543 if( j==3 ) bklist[n].start = atol(val);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
544 if( j==4 ) bklist[n].end = atol(val);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
545 if( j==5 ) bklist[n].sense = atoi(val);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
546 if( j>=6 ) bklist[n].values[j-6] = atoi(val);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
547 j++;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
548 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
549 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
550 if( j==7 ) bklist[n].used = 0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
551 if( j==6+numclasses) bklist[n].used = 1;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
552 fclose(fp);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
553
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
554 /*
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
555 * now count how many genes are to be used for each
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
556 */
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
557 bkused = 0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
558 for(n=0; n< numbk; n++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
559 bkused+= bklist[n].used;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
560 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
561 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
562
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
563 /*
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
564 * make initial guesses of z-values, set erase to 1
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
565 * read in column toggle file if provided
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
566 * also histogram background and scale into beta
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
567 */
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
568 void initialize( long *seed ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
569 int j,m,n;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
570 int val;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
571 FILE *fp;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
572
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
573 for(m=0; m< numpeaks; m++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
574 for(n=0; n< peaklist[m].numgenes; n++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
575 if( peaklist[m].genelist[n].used == 1 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
576 peaklist[m].genelist[n].erase = 1.0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
577 peaklist[m].genelist[n].z = 0.0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
578 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
579 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
580 if( peaklist[m].numused > 0 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
581 n = assign_gene( m, seed );
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
582 peaklist[m].genelist[n].z = 1.0; // pick one random gene from each set to read into motif model
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
583 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
584 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
585
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
586 col_tog = (int *) malloc(numclasses*sizeof(int));
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
587 col_tog_cpy = (int *) malloc(numclasses*sizeof(int));
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
588 if( set_togs == 0 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
589 for(j=0; j< numclasses; j++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
590 col_tog[j] = 1;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
591 col_tog_cpy[j] = 1;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
592 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
593 usedclasses = numclasses;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
594 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
595 else{
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
596 // first count number of fields
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
597 col_togl = 0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
598 fp = fopen(togs,"rt");
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
599 if( fp == NULL ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
600 printf("Error: can't open file %s\n", togs);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
601 exit(0);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
602 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
603 j=0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
604 while( fscanf(fp,"%i", &val) != EOF ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
605 col_togl++;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
606 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
607 fclose(fp);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
608 if( col_togl != numclasses ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
609 printf("Error: number of fields in toggle file (%i) is different from number of classifications in data (%i)\n", col_togl, numclasses);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
610 exit(0);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
611 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
612 // now read in values
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
613 fp = fopen(togs,"rt");
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
614 j=0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
615 usedclasses = 0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
616 while( fscanf(fp,"%i", &val) != EOF ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
617 col_tog[j] = val;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
618 col_tog_cpy[j] = val;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
619 j++;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
620 if( val == 1 ) usedclasses++;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
621 if( val != 1 && val != 0 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
622 printf("Error: choose the toggle values to be 0 or 1\n");
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
623 exit(0);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
624 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
625 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
626 fclose(fp);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
627 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
628
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
629 for(n=0; n< numbk; n++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
630 bklist[n].z = 1.0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
631 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
632 beta_update();
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
633 if( verbose != 0 ) printf(" Indexing genes in first pass\n");
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
634 index_genes();
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
635 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
636
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
637 /*
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
638 * make initial guesses of z-values for a second, etc motif
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
639 * recount number of used genes per peak to those with erase > 0.5
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
640 */
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
641 void reinitialize( long *seed ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
642 int m,n;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
643 for(m=0; m< numpeaks; m++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
644 peaklist[m].numused = 0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
645 for(n=0; n< peaklist[m].numgenes; n++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
646 peaklist[m].genelist[n].z = 0.0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
647 if( peaklist[m].genelist[n].used == 1 && peaklist[m].genelist[n].erase> 0.5 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
648 peaklist[m].numused++;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
649 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
650 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
651 if( peaklist[m].numused > 0 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
652 n = assign_gene( m, seed );
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
653 peaklist[m].genelist[n].z = 1.0; // pick one random gene from each set to read into motif model
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
654 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
655 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
656 for(n=0; n< numbk; n++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
657 bklist[n].z = 1.0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
658 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
659 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
660
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
661 /*
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
662 * histogram background into beta
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
663 */
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
664 void beta_update(){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
665 int j,k,n;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
666 double tot=0.0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
667 bkhist_update();
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
668 for(j=0; j< numclasses; j++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
669 // if( col_tog[j] == 1 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
670 Beta[j] = 0.0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
671 for(k=0; k< 5; k++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
672 beta[j][k] = hist[1][j][k]*mu;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
673 Beta[j]+= beta[j][k];
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
674 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
675 // }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
676 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
677 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
678
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
679 /*
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
680 * for all the genes in peaklist[*].genelist[*], record the index of that gene in bklist
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
681 */
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
682 void index_genes(){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
683 int m,n,nu;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
684 int found;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
685 for(m=0; m< numpeaks; m++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
686 for(n=0; n< peaklist[m].numgenes; n++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
687 if( peaklist[m].genelist[n].used == 1 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
688 found = 0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
689 nu = 0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
690 while(nu< numbk && found == 0){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
691 if( strcmp(peaklist[m].genelist[n].id, bklist[nu].id) == 0 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
692 peaklist[m].genelist[n].index = nu;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
693 found = 1;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
694 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
695 nu++;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
696 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
697 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
698 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
699 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
700 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
701
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
702 /*
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
703 * update motif model histogram
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
704 */
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
705 void hist_update(){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
706 int j,k,m,n;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
707 double tot=0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
708 for(j=0; j< numclasses; j++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
709 // if( col_tog[j] == 1 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
710 for(k=0; k< 5; k++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
711 hist[0][j][k] = 0.0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
712 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
713 // }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
714 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
715 for(m=0; m< numpeaks; m++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
716 for(n=0; n< peaklist[m].numgenes; n++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
717 if( peaklist[m].genelist[n].used == 1 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
718 tot+=peaklist[m].genelist[n].erase*peaklist[m].genelist[n].z;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
719 for(j=0; j< numclasses; j++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
720 // if( col_tog[j] == 1 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
721 k = peaklist[m].genelist[n].values[j]+2;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
722 hist[0][j][k]+= peaklist[m].genelist[n].erase*peaklist[m].genelist[n].z;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
723 // }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
724 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
725 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
726 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
727 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
728 for(j=0; j< numclasses; j++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
729 // if( col_tog[j] == 1 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
730 for(k=0; k< 5; k++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
731 hist[0][j][k]+= beta[j][k];
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
732 hist[0][j][k]/= (tot+Beta[j]);;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
733 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
734 // }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
735 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
736 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
737
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
738 /*
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
739 * update background hist using z values from bklist
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
740 */
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
741 void bkhist_update(){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
742 int j,k,n;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
743 double tot=0.0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
744 for(j=0; j< numclasses; j++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
745 // if( col_tog[j] == 1 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
746 for(k=0; k< 5; k++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
747 hist[1][j][k] = 0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
748 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
749 // }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
750 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
751 for(n=0; n< numbk; n++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
752 if( bklist[n].used == 1 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
753 tot+=bklist[n].z;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
754 for(j=0; j< numclasses; j++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
755 // if( col_tog[j] == 1 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
756 k = bklist[n].values[j]+2;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
757 hist[1][j][k]+= bklist[n].z;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
758 // }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
759 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
760 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
761 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
762 for(j=0; j< numclasses; j++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
763 // if( col_tog[j] == 1 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
764 for(k=0; k< 5; k++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
765 hist[1][j][k]/= tot;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
766 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
767 // }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
768 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
769 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
770
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
771 /*
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
772 * update lambda value
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
773 */
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
774 void lambda_update(){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
775 int m,n;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
776 double tot=0.0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
777 lambda=0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
778 for(m=0; m< numpeaks; m++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
779 for(n=0; n< peaklist[m].numgenes; n++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
780 if( peaklist[m].genelist[n].used == 1 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
781 lambda+= peaklist[m].genelist[n].z;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
782 tot++;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
783 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
784 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
785 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
786 lambda/=tot;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
787 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
788
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
789 /*
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
790 * update the toggle columns based on which columns have better than average relative entropy
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
791 */
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
792 void toggle_update(){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
793 int j;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
794 double avg=0.0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
795 avg = rsca*rel_entropy();
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
796 usedclasses=0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
797 for(j=0; j< numclasses; j++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
798 if( col_tog[j] == 1 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
799 if( col_rel_entropy(j) < avg ) col_tog[j] = 0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
800 if( col_tog[j] == 1 ) usedclasses++;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
801 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
802 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
803 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
804
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
805 /*
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
806 * reset col_tog values with their original ones
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
807 */
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
808 void toggle_reset(){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
809 int j;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
810 usedclasses=0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
811 for(j=0; j< numclasses; j++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
812 col_tog[j] = col_tog_cpy[j];
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
813 if( col_tog[j] == 1 ) usedclasses++;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
814 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
815 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
816
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
817 /*
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
818 * update z
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
819 */
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
820 void z_update(){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
821 int j,m,n;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
822 double a, c, denom;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
823 for(m=0; m< numpeaks; m++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
824 for(n=0; n< peaklist[m].numgenes; n++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
825 if( peaklist[m].genelist[n].used == 1 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
826 // new z value for motif model
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
827 a = lprob(m,n,0)+log(lambda);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
828 c = lprob(m,n,1)+log(1-lambda);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
829 if( a> c ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
830 denom = a + log(1+exp(c-a));
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
831 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
832 else{
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
833 denom = c + log(1+exp(a-c));
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
834 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
835 peaklist[m].genelist[n].z = exp( lprob(m,n,0)+log(lambda) - denom );
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
836 // peaklist[m].genelist[n].z = prob(m,n,0)*lambda/( prob(m,n,0)*lambda + prob(m,n,1)*(1-lambda) );
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
837 // new z value for corresponding gene in background model
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
838 bklist[peaklist[m].genelist[n].index].z = 1.0-peaklist[m].genelist[n].z;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
839 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
840 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
841 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
842 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
843
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
844 /*
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
845 * update erase parameters
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
846 */
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
847 void erase_update(){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
848 int j,m,n;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
849 for(m=0; m< numpeaks; m++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
850 for(n=0; n< peaklist[m].numgenes; n++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
851 if( peaklist[m].genelist[n].used == 1 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
852 peaklist[m].genelist[n].erase*= (1-peaklist[m].genelist[n].z);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
853 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
854 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
855 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
856 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
857
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
858 /*
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
859 * calculate (log) (multinomial) probability of sequence m,n in model l
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
860 */
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
861 double lprob( int m, int n, int l ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
862 int j,k;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
863 double val=0.0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
864 for(j=0; j< numclasses; j++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
865 if( col_tog[j] == 1 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
866 k = peaklist[m].genelist[n].values[j]+2;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
867 val+= log(hist[l][j][k]);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
868 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
869 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
870 return val;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
871 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
872 double prob( int m, int n, int l ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
873 int j,k;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
874 double val=1.0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
875 for(j=0; j< numclasses; j++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
876 if( col_tog[j] == 1 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
877 k = peaklist[m].genelist[n].values[j]+2;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
878 val*= hist[l][j][k];
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
879 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
880 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
881 return val;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
882 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
883
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
884 /*
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
885 * copy hist into oldhist before updating
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
886 */
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
887 void copy_hist(){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
888 int j,k,l;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
889 for(l=0; l< 2; l++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
890 for(j=0; j< numclasses; j++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
891 // if( col_tog[j] == 1 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
892 for(k=0; k< 5; k++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
893 oldhist[l][j][k] = hist[l][j][k];
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
894 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
895 // }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
896 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
897 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
898 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
899
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
900 /*
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
901 * compute change in hist
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
902 */
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
903 double hist_diff(){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
904 int j,k,l;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
905 double val=0.0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
906 for(l=0; l< 2; l++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
907 for(j=0; j< numclasses; j++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
908 if( col_tog[j] == 1 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
909 for(k=0; k< 5; k++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
910 val+= fabs( oldhist[l][j][k]-hist[l][j][k] );
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
911 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
912 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
913 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
914 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
915 return val;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
916 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
917
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
918 /*
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
919 * calculate the score of a sequence
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
920 */
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
921 double score( int m, int n ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
922 int j,k;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
923 double val=0.0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
924 for(j=0; j< numclasses; j++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
925 if( col_tog[j] == 1 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
926 k = peaklist[m].genelist[n].values[j]+2;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
927 val+= log( hist[0][j][k]/hist[1][j][k] );
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
928 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
929 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
930 return val;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
931 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
932
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
933 /*
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
934 * calculate value of t (threshold)
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
935 */
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
936 void t_update(){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
937 t = tscale*log( (1-lambda)/lambda );
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
938 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
939
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
940 /*
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
941 * quality scores of a motif, averaging over only those sequences picked as targets
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
942 * ElogL: log-likelihood of the combination of models, averaged by # genes
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
943 * ratio_score: log-likelihood ratio of motif vs background models
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
944 * rel_entropy: relative entropy of motif to background
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
945 * col_rel_entropy: same as rel_entropy, but for just one column (rel_entropy is average over all cols)
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
946 * chisq: chi-square statistic
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
947 */
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
948 double ElogL(){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
949 int m,n;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
950 double val=0.0, tot=0.0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
951 for(m=0; m< numpeaks; m++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
952 for(n=0; n< peaklist[m].numgenes; n++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
953 if( peaklist[m].genelist[n].used == 1 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
954 val+= peaklist[m].genelist[n].z*lprob(m, n, 0 ) \
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
955 + (1-peaklist[m].genelist[n].z)*lprob(m, n, 1 )\
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
956 + peaklist[m].genelist[n].z*log(lambda) \
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
957 + (1-peaklist[m].genelist[n].z)*log(1-lambda);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
958 tot++;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
959 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
960 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
961 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
962 return val/tot;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
963 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
964
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
965 double ratio_score(){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
966 int m,n;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
967 double val=0.0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
968 double tot=0.0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
969 for(m=0; m< numpeaks; m++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
970 for(n=0; n< peaklist[m].numgenes; n++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
971 if( peaklist[m].genelist[n].assigned == 1 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
972 val+= lprob(m,n,0)-lprob(m,n,1);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
973 tot++;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
974 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
975 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
976 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
977 return val/tot;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
978 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
979
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
980 double rel_entropy(){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
981 int j,k;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
982 double val=0.0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
983 for(j=0; j< numclasses; j++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
984 if( col_tog[j] == 1 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
985 val+= col_rel_entropy( j );
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
986 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
987 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
988 return val/usedclasses;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
989 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
990
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
991 double col_rel_entropy( int j ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
992 int k;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
993 double val=0.0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
994 for(k=0; k< 5; k++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
995 if( hist[1][j][j] > 0 && hist[0][j][k] > 0 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
996 // do in log base 2 for bits
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
997 val+= hist[0][j][k]*log(hist[0][j][k]/hist[1][j][k])/log(2);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
998 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
999 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1000 return val;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1001 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1002
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1003 double chisq(){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1004 int j, k;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1005 double val=0.0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1006 for(j=0; j< numclasses; j++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1007 if( col_tog[j] == 1 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1008 for(k=0; k< 5; k++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1009 val+= pow(hist[0][j][k] - hist[1][j][k],2)/hist[1][j][k];
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1010 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1011 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1012 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1013 return val;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1014 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1015
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1016 /*
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1017 * number of members of a motif model
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1018 */
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1019 int motif_count(){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1020 int m,n;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1021 int tot=0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1022 for(m=0; m< numpeaks; m++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1023 for(n=0; n< peaklist[m].numgenes; n++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1024 if( peaklist[m].genelist[n].assigned == 1 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1025 tot++;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1026 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1027 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1028 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1029 return tot;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1030 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1031
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1032 /*
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1033 * assign a random used gene: one that is used and hasn't been erased
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1034 */
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1035 int assign_gene( int m, long *seed ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1036 int n;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1037 n = floor( peaklist[m].numgenes*ran1(seed) );
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1038 while( peaklist[m].genelist[n].used == 0 || peaklist[m].genelist[n].erase < 0.5 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1039 n = floor( peaklist[m].numgenes*ran1(seed) );
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1040 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1041 return n;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1042 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1043
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1044 /*
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1045 * score all genes using the models
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1046 */
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1047 void score_genes(){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1048 int m,n;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1049 for(m=0; m< numpeaks; m++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1050 for(n=0; n< peaklist[m].numgenes; n++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1051 if( peaklist[m].genelist[n].used == 1 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1052 peaklist[m].genelist[n].score= score( m, n );
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1053 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1054 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1055 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1056 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1057
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1058 /*
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1059 * choose targets using the scores, and ZOOPS setting
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1060 * zoops = 1 -> 0 or 1 target/strand
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1061 * else -> all above threshold
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1062 */
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1063 void choose_targets(){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1064 int m,n;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1065 int best;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1066 for(m=0; m< numpeaks; m++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1067 peaklist[m].assgngene = 0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1068 best=-1;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1069 for(n=0; n< peaklist[m].numgenes; n++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1070 if( peaklist[m].genelist[n].used == 1 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1071 if( peaklist[m].genelist[n].score > t ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1072 peaklist[m].genelist[n].assigned = 1;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1073 peaklist[m].assgngene++;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1074 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1075 else peaklist[m].genelist[n].assigned = 0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1076 if( best == -1 ) best = n;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1077 else if( peaklist[m].genelist[n].score > peaklist[m].genelist[best].score ) best = n;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1078 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1079 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1080 if( zoops == 1 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1081 for(n=0; n< peaklist[m].numgenes; n++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1082 if( peaklist[m].genelist[n].used == 1 && n != best ) peaklist[m].genelist[n].assigned = 0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1083 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1084 if( peaklist[m].genelist[best].score > t ) peaklist[m].assgngene = 1;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1085 else peaklist[m].assgngene = 0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1086 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1087 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1088 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1089
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1090 /*
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1091 * functions to print to output (fp must already be opened for top two)
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1092 */
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1093 void print_peak( FILE *fp, int m ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1094 int n;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1095 fprintf(fp,"PEAK: %s %ld %s\n", peaklist[m].chrom, peaklist[m].pkposn, peaklist[m].info);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1096 for(n=0; n< peaklist[m].numgenes; n++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1097 if( printall == 0 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1098 if( peaklist[m].genelist[n].used == 1 && peaklist[m].genelist[n].assigned == 1 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1099 print_gene( fp, m, n );
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1100 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1101 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1102 if( printall == 1 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1103 if( peaklist[m].genelist[n].used == 1 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1104 print_gene( fp, m, n );
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1105 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1106 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1107 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1108 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1109
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1110 void print_gene( FILE *fp, int m, int n ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1111 int j;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1112 if( peaklist[m].genelist[n].used == 1 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1113 fprintf(fp, "TGENE: %0.3f %s %s %s %ld %ld %i", peaklist[m].genelist[n].score, peaklist[m].genelist[n].id, peaklist[m].genelist[n].name, peaklist[m].genelist[n].chrom, peaklist[m].genelist[n].start, peaklist[m].genelist[n].end, peaklist[m].genelist[n].sense);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1114 for(j=0; j< numclasses; j++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1115 if( col_tog[j] == 0 ) fprintf(fp," %i ", peaklist[m].genelist[n].values[j]);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1116 else fprintf(fp," %i", peaklist[m].genelist[n].values[j]);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1117 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1118 fprintf(fp,"\n");
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1119 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1120 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1121
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1122 void print_state(FILE *fp, char *filename){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1123 int m;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1124 fp = fopen(filename,"w");
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1125 for(m=0; m< numpeaks; m++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1126 print_peak(fp, m);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1127 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1128 fclose(fp);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1129 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1130
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1131 /*
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1132 * this prints a list of <region id> <name>
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1133 */
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1134 /****************
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1135 void print_targets( FILE *fp, char *filename ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1136 int m,n,nu;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1137 fp = fopen(filename,"w");
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1138 for(m=0; m< numpeaks; m++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1139 if( peaklist[m].assgngene > 0 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1140 fprintf(fp,"%s ", peaklist[m].regid);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1141 n=0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1142 nu=0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1143 while( n< peaklist[m].numgenes ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1144 if( peaklist[m].genelist[n].used == 1 && peaklist[m].genelist[n].assigned == 1 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1145 if( nu< peaklist[m].assgngene-1 ) fprintf(fp,"%s,", peaklist[m].genelist[n].name);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1146 else fprintf(fp,"%s\n", peaklist[m].genelist[n].name);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1147 nu++;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1148 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1149 n++;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1150 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1151 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1152 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1153 fclose(fp);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1154 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1155 *************/
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1156
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1157 /*
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1158 * print the motif model and its score
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1159 */
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1160 void print_model( FILE *fp, char *filename ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1161 int j,k;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1162 fp = fopen(filename,"w");
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1163 fprintf(fp,"comparison\\class; avg relative entropy %0.3f, %i genes, threshold %0.3f, quality %0.1f\n", rel_entropy(), motif_count(), t, rel_entropy()*motif_count() );
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1164 fprintf(fp," ++ + 0 - --\n");
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1165 for(j=0; j< numclasses; j++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1166 if( j< 10 ) fprintf(fp," ");
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1167 if( col_tog[j] == 0 ) fprintf(fp,"#");
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1168 else fprintf(fp," ");
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1169 fprintf(fp,"%i,%.3f",j+1, col_rel_entropy(j) );
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1170 for(k=4; k>=0; k--){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1171 if( hist[1][j][k] > 0 ) fprintf(fp,"%s", nice_d(log(hist[0][j][k]/hist[1][j][k])) );
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1172 else fprintf(fp," NA");
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1173 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1174 fprintf(fp,"\n");
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1175 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1176 fclose(fp);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1177 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1178
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1179 /*
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1180 * prints a double with 3 digits, aligned by spaces
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1181 * longest would be -999.000, so 8 characters
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1182 * shortest would be 1.000, 5 characters
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1183 * up to 3 spaces
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1184 */
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1185 char *nice_d( double val ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1186 char *returnval;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1187 char *dval;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1188 char *spaces;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1189 int i, numspaces=0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1190
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1191 returnval = (char *) malloc(10*sizeof(char));
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1192 dval = (char *) malloc(10*sizeof(char));
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1193 spaces = (char *) malloc(10*sizeof(char));
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1194
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1195 sprintf(dval,"%.3f", val);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1196
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1197 if( val >= 0 ) numspaces++;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1198 if( fabs(val) < 100 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1199 if( fabs(val) < 10 ) numspaces+=2;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1200 else numspaces++;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1201 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1202 if( numspaces == 0 ) sprintf(spaces,"");
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1203 if( numspaces == 1 ) sprintf(spaces," ");
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1204 if( numspaces == 2 ) sprintf(spaces," ");
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1205 if( numspaces == 3 ) sprintf(spaces," ");
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1206 sprintf(returnval,"%s%s", spaces, dval);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1207 return returnval;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1208 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1209
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1210 /*
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1211 * print the peaks only in annotation format, for use in comppeaks, subpeaks, etc
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1212 */
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1213 /**************
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1214 void print_annotpeak( FILE *fp, char *filename, int motif ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1215 int m;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1216 fp = fopen(filename,"w");
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1217 for(m=0; m< numpeaks; m++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1218 if( peaklist[m].assgngene > 0 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1219 fprintf(fp,"%s-%i %s %s %ld %ld %ld ", peaklist[m].filename, motif, peaklist[m].regid, peaklist[m].chrom, peaklist[m].pkposn, peaklist[m].start, peaklist[m].end );
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1220 fprintf(fp,"%f 1.0 1.0 1 1 1.0 ", peaklist[m].height );
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1221 fprintf(fp,"1 1.0 1.0 1.0 1 -1.0 poop 1\n");
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1222 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1223 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1224 close(fp);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1225 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1226 *************/
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1227