diff SNV/SNVMix2_source/SNVMix2-v0.12.1-rc1/SNVMix2.c @ 0:74f5ea818cea

Uploaded
author ryanmorin
date Wed, 12 Oct 2011 19:50:38 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SNV/SNVMix2_source/SNVMix2-v0.12.1-rc1/SNVMix2.c	Wed Oct 12 19:50:38 2011 -0400
@@ -0,0 +1,1828 @@
+/* The MIT License
+
+   Copyright (c) 2009, by Sohrab Shah <sshah@bccrc.ca> and Rodrigo Goya <rgoya@bcgsc.ca>
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   "Software"), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be
+   included in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+   SOFTWARE.
+*/
+
+/*
+C Implementation of SNVMix2
+*/
+
+#define VERSION "0.12.1-rc1"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include "SNVMix2.h"
+
+#include "bam.h"
+
+#define START_QNUM 1000
+
+int main(int argc, char **argv) {
+	
+	param_struct params;// = {NULL, NULL, NULL, NULL, NULL, 0, 0, 0, 0, 0};
+
+	initSNVMix(argc, argv, &params);
+
+	if(params.classify || params.filter) {
+		if(params.input_type == Q_PILEUP)
+			snvmixClassify_qualities(&params);
+		else if(params.input_type == M_PILEUP || params.input_type == S_PILEUP)
+			snvmixClassify_pileup(&params);
+		else if(params.input_type == BAM_FILE) {
+			fprintf(stderr,"Output is:\n");
+			fprintf(stderr,"\tchr:pos ref nref ref:ref_count,nref:nref_count,pAA,pAB,pBB,maxP");
+			fprintf(stderr,"\tref_fwd ref_rev");
+			fprintf(stderr,"\tnref_fwd nref_rev");
+			fprintf(stderr,"\tnref_center nref_edges");
+			fprintf(stderr,"\tindel_pos");
+			fprintf(stderr,"\tref_clean nref_clean");
+			fprintf(stderr,"\tref_bad_pair nref_bad_pair");
+			fprintf(stderr,"\tref_ins nref_ins");
+			fprintf(stderr,"\tref_del nref_del");
+			fprintf(stderr,"\tref_junc nref_junc");
+			fprintf(stderr,"\tnref_in_unique_pos");
+			fprintf(stderr,"\traw[A,C,G,T,N]");
+			fprintf(stderr,"\tthr[A,C,G,T,N]\n");
+			snvmixClassify_bamfile(&params);
+		}
+	} else if(params.train) {
+		if(params.input_type == M_PILEUP || params.input_type == S_PILEUP)
+			snvmixTrain_pileup(&params);
+		else if(params.input_type == Q_PILEUP) {
+			fprintf(stderr,"Sorry, Training with quality-type input is not yet implemented\n");
+			exit(1);
+		}
+	}
+	return(0);
+}
+
+/*
+	void snvmixClassify_qualities
+	Arguments:
+		*params : pointer to model parameters and command line options
+	Function:
+		Reads a pilep-style file that has been preprocessed to include the quality of
+		calls being the reference allele and the maping quality both in decimal values.
+
+		For each line it evaluates de model according to the parameters in *params and
+		outputs the corresponding SNV prediction values.
+
+		This function may come in handy when filtering of calls is done by a
+		different program or the base-calling and maping quality information is not
+		in a pileup/phred format.
+	
+		The file read is a tab-separated file where the columns are:
+			-  target sequence (e.g. chromosome)
+			-  sequence position
+			-  reference base
+			-  non-reference base
+			-  comma separated probabilities of the call being the reference
+			-  comma separated mapping qualities, one for each base call
+
+*/
+void snvmixClassify_qualities(param_struct *params) {
+	FILE *fptrIN, *fptrOUT;
+
+	char *line = NULL;
+	size_t line_len = 0, prev_len = 0;
+	int read_len = 0;
+
+	int qual_num = 0, col_num = 0;
+
+	char *col, *qual;
+	char *col_sep = "\t", *col_str, *col_ptr;
+	char *qual_sep = ",", *qual_str, *qual_ptr;
+	char *chr, *pos, *ref, *nref;
+
+	long double *bQ, *mQ, *tmpQ;
+	int pos_int, depth = 0, maxp;
+	long int depth_allocated = 0;
+
+	long double bsum = 0, msum = 0, *b, *mu, *notmu, *pi, *log_pi, z = 0;
+	int i, k, states;
+
+	states = params->states;
+
+	// Initialize local values of parameters
+	mu = params->mu;
+	pi = params->pi;
+
+	if( ( b = malloc(states * sizeof(long double)) ) == NULL) { perror("[snvmixClassify_qualities] Could not allocate b\n"); exit(1); }
+	if( ( notmu = malloc(states * sizeof(long double)) ) == NULL) { perror("[snvmixClassify_qualities] Could not allocate notmu\n"); exit(1); }
+	if( ( log_pi = malloc(states * sizeof(long double)) ) == NULL) { perror("[snvmixClassify_qualities] Could not allocate log_pi\n"); exit(1); }
+
+	for(k = 0; k < states; k++) {
+		notmu[k] = 1 - mu[k];
+		log_pi[k] = logl(pi[k]);
+	}
+	fptrIN = params->input;
+	fptrOUT = params->output;
+
+
+	if( (bQ = malloc(START_QNUM * sizeof (long double))) == NULL) {
+		perror("ERROR allocating initial space for bQ"); exit(1);
+	}
+	if( (mQ = malloc(START_QNUM * sizeof (long double))) == NULL) {
+		perror("ERROR allocating initial space for mQ"); exit(1);
+	}
+	depth_allocated = START_QNUM;
+#if defined __linux__ || defined _GNU_SOURCE
+	while( (read_len = getline(&line,&line_len,fptrIN)) != -1 )
+#endif
+#ifdef __APPLE__
+	while( (line = fgetln(fptrIN,&line_len)) != NULL )
+#endif
+	{
+		line[line_len-1] = '\0';
+		depth = 0;
+		col_ptr = NULL;
+		for(col_num = 0, col_str = line; ; col_num++, col_str = NULL) {
+			col = strtok_r(col_str, "\t", &col_ptr);
+			if(col == NULL) {
+				break;
+			}
+			if(col_num == 0) {
+				chr = col;
+			} else if(col_num == 1) {
+				pos = col;
+				pos_int = strtol(pos, NULL, 10);
+			} else if(col_num == 2) {
+				ref = col;
+			} else if(col_num == 3) {
+				nref = col;
+			} else if(col_num == 4) {
+				for(qual_num = 0, qual_str = col; ; qual_num++, qual_str = NULL) {
+					qual = strtok_r(qual_str, ",", &qual_ptr);
+					if(qual == NULL) {
+						break;
+					}
+					if(qual_num >= depth_allocated) {
+						if( (bQ = realloc(bQ, sizeof(long double) * (depth_allocated + START_QNUM))) == NULL) {
+							perror("ERROR allocating bQ"); exit(1);
+						}
+						if( (mQ = realloc(mQ, sizeof(long double) * (depth_allocated + START_QNUM))) == NULL) {
+							perror("ERROR allocating mQ"); exit(1);
+						}
+						depth_allocated = depth_allocated + START_QNUM;
+					}
+					bQ[qual_num] = atof(qual);
+				}
+				depth = qual_num;
+			} else if(col_num == 5) {
+				for(qual_num = 0, qual_str = col; ; qual_num++, qual_str = NULL) {
+					qual = strtok_r(qual_str, ",", &qual_ptr);
+					if(qual == NULL) {
+						break;
+					}
+					if(qual_num >= depth_allocated) {
+						fprintf(stderr, "FATAL ERROR: should not have more mapping qualities than base qualities\n");
+						exit(1);
+					}
+					mQ[qual_num] = atof(qual);
+				}
+				if(depth != qual_num) {
+					fprintf(stderr, "FATAL ERROR: should not have more base qualities than mapping qualities\n");
+					exit(1);
+				}
+			}
+		}
+
+
+		for(k = 0; k < states; k++)
+			b[k] = 0;
+		
+    		//b = sum(log(notm*0.5 + m.*(yr.*mur+notyr.*notmur)),2);
+		for(qual_num = 0; qual_num < depth; qual_num++)
+			for(k = 0; k < states; k++)
+				b[k] = b[k] + logl((1- mQ[qual_num])*0.5 + mQ[qual_num] * (bQ[qual_num]*mu[k]+(1-bQ[qual_num])*notmu[k]));
+
+		z = 0;
+		for(k = 0; k < states; k++) {
+			b[k] = expl(b[k] + log_pi[k]);
+			z += b[k];
+		}
+		if(!z) { z = 1; }
+		for(k = 0; k < states; k++)
+			b[k] = b[k] / z;
+		maxp = 0;
+		z = b[0];
+		for(k = 0; k < states; k++)
+			if( b[k] > z) {
+				maxp = k;
+				z = b[k];
+			}
+
+		fprintf(fptrOUT,"%s\t%s\t%s\t%s",chr, pos, ref, nref);
+		for(k = 0; k , states; k++)
+			fprintf(fptrOUT,"%0.10Lf,",b[k]);
+		fprintf(fptrOUT,"%d\n",maxp+1);
+	}
+	fclose(fptrIN);
+	fclose(fptrOUT);
+	free(line);
+
+	free(b); free(notmu); free(log_pi);
+}
+
+
+/*
+	void snvmixClassify_pileup
+	Arguments:
+		*params : pointer to model parameters and command line options
+	Function:
+		Reads a MAQ or Samtools pileup format. For required formatting we refer the user
+		to the corresponding websites:
+			http://maq.sourceforge.net/
+			http://samtools.sourceforge.net/
+
+		In general the format of both files can be described as a tab separated file
+		where columns are:
+			-  target sequence (e.g. chromosome)
+			-  sequence position
+			-  reference base
+			-  depth
+			-  stream of base calls
+			-  stream of base call PHRED-scaled qualities
+			-  stream of mapping PHRED-scaled qualities
+
+		Note: when handling pileup files, care should be taken when doing copy-paste
+		operations not to transform 'tabs' into streams of spaces.
+
+		For each line it evaluates de model according to the parameters in *params and
+		outputs the corresponding SNV prediction values.
+
+		Predictions can be output either only for positions where when non-reference values
+		are observed or, when run with '-f' flag, for all positions. The -f flag is useful
+		when the resulting calls are being compared or joined accros different datasets
+		and all predictions need to be present.
+*/
+void snvmixClassify_pileup(param_struct *params) {
+//MAQ:
+//	1       554484  C       1752    @,,.,.,, @xxx @xxx xxxxx
+//SAM:
+//	1       554484  C       1752    ,,.,.,,	xxx xxxx
+	FILE *fptrIN, *fptrOUT;
+
+	char *line = NULL;
+	size_t line_len = 0, prev_len = 0;
+	int read_len = 0;
+	int col_num = 0;
+	long int line_num = 0;
+
+	char *col, *qual;
+	char *col_sep = "\t", *col_str, *col_ptr;
+	char *qual_sep = ",", *qual_str, *qual_ptr;
+	char *chr, *pos, *ref, nref, call, nref_skip = 'N';
+
+	char *bQ, *mQ;
+	int *calls, *tmpQ;
+	int pos_int, depth = 0, qual_num,  maxp;
+	int depth_allocated = 0;
+
+	long double bsum = 0, msum = 0, *b, *mu, *notmu, *pi, *log_pi, z = 0;
+	int nref_count = 0, ref_count = 0;
+	long double Y, not_Y;
+	long double phred[PHRED_MAX + 1];
+	int i, call_i, k, states;
+
+	//initPhred(phred, PHRED_MAX+1);
+	initPhred(phred, PHRED_MAX);
+
+	states = params->states;
+
+	// Initialize local values of parameters
+	mu = params->mu;
+	pi = params->pi;
+
+	if( ( b = malloc(states * sizeof(long double)) ) == NULL) { perror("[snvmixClassify_pileup] Could not allocate b\n"); exit(1); }
+	if( ( notmu = malloc(states * sizeof(long double)) ) == NULL) { perror("[snvmixClassify_pileup] Could not allocate notmu\n"); exit(1); }
+	if( ( log_pi = malloc(states * sizeof(long double)) ) == NULL) { perror("[snvmixClassify_pileup] Could not allocate log_pi\n"); exit(1); }
+
+	for(k = 0; k < states; k++) {
+		notmu[k] = 1 - mu[k];
+		log_pi[k] = logl(pi[k]);
+	}
+	fptrIN = params->input;
+	fptrOUT = params->output;
+	if(params->full)
+		nref_skip = -2;
+
+
+	if( (calls = malloc(START_QNUM * sizeof (int))) == NULL) {
+		perror("ERROR allocating initial space for calls"); exit(1);
+	}
+	depth_allocated = START_QNUM;
+#if defined __linux__ || defined _GNU_SOURCE
+	while( (read_len = getline(&line,&line_len,fptrIN)) != -1 )
+#endif
+#ifdef __APPLE__
+	while( (line = fgetln(fptrIN,&line_len)) != NULL )
+#endif
+	{
+		line[line_len-1] = '\0';
+		line_num++;
+		depth = 0;
+		nref = 0;
+		col_ptr = NULL;
+		for(col_num = 0, col_str = line; nref != nref_skip ; col_num++, col_str = NULL) {
+			col = strtok_r(col_str, "\t", &col_ptr);
+			if(col == NULL) {
+				break;
+			}
+			if(col_num == 0) {
+				chr = col;
+			} else if(col_num == 1) {
+				pos = col;
+			} else if(col_num == 2) {
+				ref = col;
+			} else if(col_num == 3) {
+				depth = atoi(col);
+				if(depth > depth_allocated) {
+					if( (calls = realloc(calls, sizeof (int) * (depth + START_QNUM))) == NULL) {
+						perror("ERROR allocating space for calls"); exit(1);
+					}
+					depth_allocated = depth + START_QNUM;
+				}
+			} else if(col_num == 4) {
+				if(params->input_type == M_PILEUP)
+					col = col+sizeof(char);
+					snvmixGetCallString(col, calls, depth, &nref);
+			} else if(col_num == 5) {
+				bQ = col;
+				// If it's a MAQ pileup, we need to skip the @ sign
+				if(params->input_type == M_PILEUP)
+					bQ = bQ + sizeof(char);
+				for(call_i = 0; call_i < depth; call_i++)
+					bQ[call_i] = bQ[call_i]-33;
+			} else if(col_num == 6) {
+				mQ = col;
+				// If it's a MAQ pileup, we need to skip the @ sign
+				if(params->input_type == M_PILEUP)
+					mQ = mQ + sizeof(char);
+				for(call_i = 0; call_i < depth; call_i++)
+					mQ[call_i] = mQ[call_i]-33;
+			}
+		}
+		// If we quit the for because no nref was found, skip this too, next line
+	nref = snvmixFilterCalls(calls,depth,bQ,mQ,params);
+	nref_count = 0;
+	ref_count = 0;
+	for(qual_num = 0; qual_num < depth; qual_num++) {
+		//if( snvmixSkipCall(calls,qual_num,params,bQ,mQ) )
+		if(calls[qual_num] == -1)
+			continue;
+		if(calls[qual_num] == 1)
+			ref_count++;
+		if(calls[qual_num] == nref)
+			nref_count++;
+	}
+if(params->filter) {
+	fprintf(fptrOUT,"%s:%s\t%s:%d\t%c:%d\t", chr, pos, ref, ref_count, nref, nref_count);
+	for(qual_num = 0; qual_num < ref_count; qual_num++)
+		fprintf(fptrOUT,",");
+	for(qual_num = 0; qual_num < nref_count; qual_num++)
+		fprintf(fptrOUT,"%c",nref);
+	fprintf(fptrOUT,"\n");
+} else {
+		if(nref == nref_skip)
+			continue;
+		//nref = snvmixFilterCalls(calls,depth,bQ,mQ,params);
+		//if(nref == nref_skip)
+		//	continue;
+		for(k = 0; k < states; k++)
+			b[k] = 0;
+		nref_count = 0;
+		for(qual_num = 0; qual_num < depth; qual_num++) {
+			//if( snvmixSkipCall(calls,qual_num,params,bQ,mQ) )
+			if(calls[qual_num] == -1)
+				continue;
+		// from matlab:
+		// b = sum(log(notm*0.5 + m.*(yr.*mur+notyr.*notmur)),2);
+			if(calls[qual_num] == 1) {
+				// If REF then
+				Y = phred[(unsigned char) bQ[qual_num]];
+				not_Y = 1-phred[(unsigned char) bQ[qual_num]];
+			} else {
+				nref_count++;
+				// If NREF then
+				Y = (1-phred[(unsigned char) bQ[qual_num]])/3;
+				not_Y = phred[(unsigned char) bQ[qual_num]] + 2*(1-phred[(unsigned char)bQ[qual_num]])/3;
+			}
+			for(k = 0; k < states; k++)
+				b[k] = b[k] + logl((1-phred[(unsigned char) mQ[qual_num]])*0.5 + phred[(unsigned char) mQ[qual_num]] * (Y * mu[k] + not_Y * notmu[k]));
+		}
+		// Check if any non-references actually passed the filter
+		//if(!nref_count)
+		//	continue;
+		z = 0;
+		for(k = 0; k < states; k++) {
+			b[k] = expl(b[k] + log_pi[k]);
+			z += b[k];
+		}
+		if(!z) z = 1;
+		for(k = 0; k < states; k++)
+			b[k] = b[k] / z;
+		maxp = 0;
+		z = b[0];
+		for(k = 0; k < states; k++)
+			if( b[k] > z) {
+				maxp = k;
+				z = b[k];
+			}
+
+
+		fprintf(fptrOUT,"%s:%s\t%s\t%c\t%s:%d,%c:%d,",chr,pos, ref, nref, ref, ref_count,  nref, nref_count);
+		for(k = 0; k < states; k++)
+			fprintf(fptrOUT,"%0.10Lf,",b[k]);
+		fprintf(fptrOUT,"%d\n",maxp+1);
+}
+	}
+	fclose(fptrIN);
+	fclose(fptrOUT);
+	free(line);
+
+	free(b); free(notmu); free(log_pi);
+}
+
+int snvmixClassify_bamfile(param_struct *params) {
+	snvmix_pl_t snvmix_pl;
+	int states, i, k, bam_flag_mask;
+
+	snvmix_pl.params = params;
+	snvmix_pl.tid = -1;
+	// TODO: change this to read range
+	snvmix_pl.begin = 0;
+	snvmix_pl.end = 0x7fffffff;
+	snvmix_pl.in = samopen(params->inputfile, "rb", 0);  
+	if (snvmix_pl.in == 0) {
+		fprintf(stderr, "ERROR: Fail to open BAM file %s\n", params->bamfile);
+		return 1;
+	}
+	snvmix_pl.fai = fai_load(params->fastafile);
+	if (snvmix_pl.fai == 0) {
+		fprintf(stderr, "Fail to open BAM file %s\n", params->fastafile);
+		return 1;
+	}
+	if(params->listfile) snvmix_pl.hash = load_pos(params->listfile, snvmix_pl.in->header);
+	snvmix_pl.depth_allocated = 0;
+	snvmix_pl.ref = NULL;
+	snvmix_pl.calls = NULL;
+	//snvmix_pl.bQ = NULL;
+	//snvmix_pl.mQ = NULL;
+
+
+	/* This looks ugly... but we don't want to be allocating new memory for EVERY location. */
+	states = params->states;
+	if( ( snvmix_pl.notmu = malloc(states * sizeof(long double)) ) == NULL) { perror("[snvmixClassify_bamfile] Could not allocate snvmix_pl.notmu\n"); exit(1); }
+	if( ( snvmix_pl.log_pi = malloc(states * sizeof(long double)) ) == NULL) { perror("[snvmixClassify_bamfile] Could not allocate snvmix_pl.log_pi\n"); exit(1); }
+	if( ( snvmix_pl.b = malloc(states * sizeof(long double)) ) == NULL) { perror("[snvmixClassify_bamfile] Could not allocate snvmix_pl.b\n"); exit(1); }
+	for(k = 0; k < states; k++) {
+		snvmix_pl.notmu[k] = 1 - params->mu[k];
+		snvmix_pl.log_pi[k] = logl(params->pi[k]);
+	}
+	initPhred(snvmix_pl.phred, PHRED_MAX);
+
+	/* Init call buffer */
+	snvmix_pl.n_buffer = params->window;
+	if( (snvmix_pl.buffer = malloc(snvmix_pl.n_buffer * sizeof(snvmix_call_t)) ) == NULL) { perror("[snvmixClassify_bamfile] Could not allocate space for buffer\n"); exit(1); }
+	snvmix_pl.b_start = 0;
+	snvmix_pl.b_end = 0;
+	for(i = 0; i < snvmix_pl.n_buffer; i++) {
+		snvmix_pl.buffer[i].tid = 0;
+		snvmix_pl.buffer[i].pos = 0;
+		snvmix_pl.buffer[i].in_use = 0;
+		if( (snvmix_pl.buffer[i].p = malloc(states * sizeof(long double)) ) == NULL) {perror("[snvmixClassify_bamfile] Could not allocate space for buffer.p\n"); exit(1);}
+	}
+	
+	bam_flag_mask = (BAM_FUNMAP | BAM_FSECONDARY); // | BAM_FQCFAIL | BAM_FDUP);
+	if(params->filter_chast)
+		bam_flag_mask |= BAM_FQCFAIL;
+	if(params->filter_dup)
+		bam_flag_mask |= BAM_FDUP;
+
+	/* Call pileup with our function and bam_flag_mask */
+	sampileup(snvmix_pl.in, bam_flag_mask, snvmixClassify_pileup_func, &snvmix_pl);
+
+	free(snvmix_pl.notmu);
+	free(snvmix_pl.log_pi);
+	free(snvmix_pl.b);
+}
+
+int flush_buffer(uint32_t tid, uint32_t pos, snvmix_pl_t *snvmix_pl) {
+	snvmix_call_t *s;
+	int i, k, cnt, buff_id;
+	cnt = 0;
+	#define _fptrOUT snvmix_pl->params->output
+	//#define _fptrOUT stderr
+//fprintf(_fptrOUT, "ENTERING flush_buffer, b_start = %d\tb_end = %d\n",snvmix_pl->b_start, snvmix_pl->b_end);
+	for(i = 0, buff_id = snvmix_pl->b_start; i < snvmix_pl->n_buffer; i++, buff_id=(buff_id+1)%snvmix_pl->params->window) {
+//fprintf(stderr, "ITERATING in flush_buffer, buff_id = %d\n", buff_id);
+		s = snvmix_pl->buffer + buff_id;
+		if(pos+1 >= (s->pos + snvmix_pl->params->window) || tid != s->tid) {
+//fprintf(stderr, "PASSED if in flush_buffer, buff_id = %d\n", buff_id);
+			if(s->in_use) {
+				//fprintf(_fptrOUT,"%d\t",buff_id);
+				fprintf(_fptrOUT,"%s:%d\t%c\t%c\t%c:%d,%c:%d", snvmix_pl->in->header->target_name[s->tid], s->pos, s->ref, s->nref, s->ref, s->ref_count, s->nref, s->nref_count);
+
+				if(!snvmix_pl->params->filter) {
+					fprintf(_fptrOUT,",");
+					for(k = 0; k < snvmix_pl->params->states; k++)
+						fprintf(_fptrOUT,"%0.10Lf,",s->p[k]);
+					fprintf(_fptrOUT,"%d",s->maxP);
+				}
+
+				fprintf(_fptrOUT,"\t%d %d", s->forward[0], s->reverse[0]);
+				fprintf(_fptrOUT,"\t%d %d", s->forward[1], s->reverse[1]);
+				fprintf(_fptrOUT,"\t%d %d", s->nref_center, s->nref_edges);
+				fprintf(_fptrOUT,"\t%d", s->indel_pos);
+				fprintf(_fptrOUT,"\t%d %d", s->c_clean[0], s->c_clean[1]);
+				fprintf(_fptrOUT,"\t%d %d", s->bad_pair[0],  s->bad_pair[1]);
+				fprintf(_fptrOUT,"\t%d %d", s->c_ins[0], s->c_ins[1]);
+				fprintf(_fptrOUT,"\t%d %d", s->c_del[0], s->c_del[1]);
+				fprintf(_fptrOUT,"\t%d %d", s->c_junc[0], s->c_junc[1]);
+				fprintf(_fptrOUT,"\t%d", s->aln_unique_pos);
+				fprintf(_fptrOUT,"\t[%d,%d,%d,%d,%d]", s->raw_cvg[0],s->raw_cvg[1],s->raw_cvg[2],s->raw_cvg[3],s->raw_cvg[4]);
+				fprintf(_fptrOUT,"\t[%d,%d,%d,%d,%d]", s->thr_cvg[0],s->thr_cvg[1],s->thr_cvg[2],s->thr_cvg[3],s->thr_cvg[4]);
+				if(snvmix_pl->params->filter) {
+					fprintf(_fptrOUT,"\t");
+					for(k = 0; k < s->ref_count; k++)
+						fprintf(_fptrOUT,",");
+					for(k = 0; k < s->nref_count; k++)
+						fprintf(_fptrOUT,"%c",s->nref);
+				}
+				fprintf(_fptrOUT,"\n");
+				cnt++;
+				s->in_use = 0;
+				snvmix_pl->b_start = (buff_id+1)%snvmix_pl->params->window;
+			}
+		} else break;
+		if(buff_id == snvmix_pl->b_end)
+			break;
+	}
+//fprintf(_fptrOUT, "EXITING flush_buffer, b_start = %d\tb_end = %d\n",snvmix_pl->b_start, snvmix_pl->b_end);
+	return cnt;
+}
+
+
+static int snvmixClassify_pileup_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl, void *data) {
+
+#define _bQ(p, x) bam1_qual(p[x].b)[p[x].qpos]
+#define _mQ(p, x) p[x].b->core.qual
+
+	//FILE *fptrOUT;
+	char nref_skip = 'N';
+	//int *calls, ;
+	int depth = 0;
+	//long double *b, *notmu, *log_pi, *mu, *pi;
+	long double Y, not_Y, z;
+	//int states;
+	int i, k, qual_num, maxp;
+	//long double *phred;
+	int b_end_tmp = 0;
+	snvmix_call_t *s;
+	snvmix_pl_t *snvmix_pl = (snvmix_pl_t *)data;
+	param_struct *params = snvmix_pl->params;
+	// Avoid allocating new variables, use defines to make things readable
+#define	_calls snvmix_pl->calls
+#define _b snvmix_pl->b
+#define _notmu snvmix_pl->notmu
+#define _log_pi snvmix_pl->log_pi
+#define _mu params->mu
+#define _pi params->pi
+#define	_phred snvmix_pl->phred
+#define _states params->states
+	//fprintf(stderr,"DEBUG: snvmix_pl->buffer[snvmix_pl->b_start].in_use = %d\n", snvmix_pl->buffer[snvmix_pl->b_start].in_use);
+	//if(pos+1 == 148971)
+	//	printf("DEBUG: test 148971, start.in_use = %d\tb_start=%d\tb_end=%d\n",snvmix_pl->buffer[snvmix_pl->b_start].in_use, snvmix_pl->b_start,snvmix_pl->b_end);
+	//if(pos == 148971)
+	//	printf("DEBUG: test 148972, start.in_use = %d\tb_start=%d\tb_end=%d (pos+1 >= (snvmix_pl->buffer[snvmix_pl->b_start].pos + snvmix_pl->params->window) = %d >= %d\n", snvmix_pl->buffer[snvmix_pl->b_start].in_use, snvmix_pl->b_start,snvmix_pl->b_end, pos+1, snvmix_pl->buffer[snvmix_pl->b_start].pos + snvmix_pl->params->window);
+	// If we have gone further than window indicates, or changed tid, then flush buffer
+	if(snvmix_pl->buffer[snvmix_pl->b_start].in_use && (pos+1 >= (snvmix_pl->buffer[snvmix_pl->b_start].pos + snvmix_pl->params->window) || tid != snvmix_pl->buffer[snvmix_pl->b_start].tid ))
+			flush_buffer(tid, pos, snvmix_pl);
+	b_end_tmp = snvmix_pl->b_end;
+	s = snvmix_pl->buffer + snvmix_pl->b_end;
+	snvmix_pl->b_end = (snvmix_pl->b_end+1)%snvmix_pl->params->window;
+
+	if(snvmix_pl->hash) {
+		khint_t k_h = kh_get(64, snvmix_pl->hash, (uint64_t)tid<<32|pos);
+		if (k_h == kh_end(snvmix_pl->hash)) return 0;
+	}
+
+	s->in_use = 1;
+
+
+	if(params->full)
+		nref_skip = -2;
+
+	if (snvmix_pl->fai && (int)tid != snvmix_pl->tid) {
+		free(snvmix_pl->ref);
+		snvmix_pl->ref = fai_fetch(snvmix_pl->fai, snvmix_pl->in->header->target_name[tid], &snvmix_pl->len);
+		snvmix_pl->tid = tid;
+	}
+
+	//chr = snvmix_pl->in->header->target_name[tid],
+	s->tid = tid;
+	s->ref = toupper((snvmix_pl->ref && (int)pos < snvmix_pl->len)? snvmix_pl->ref[pos] : 'N');
+	s->pos = pos + 1;
+	depth = n;
+
+	if(depth > snvmix_pl->depth_allocated) {
+		if( (snvmix_pl->calls = realloc(snvmix_pl->calls, sizeof (int) * (depth + START_QNUM))) == NULL) {
+			perror("ERROR allocating space for snvmix_pl->calls"); exit(1);
+		}
+		//calls = snvmix_pl->calls;
+		snvmix_pl->depth_allocated = depth + START_QNUM;
+	}
+
+	//for(i = 0; i < depth; i++) {
+	// FIXME: watch out for deletions, do they get automatically an "n"? if so, ok
+//fprintf(stderr, "DEBUG: len = %d\tref = %c\tcalls = ", snvmix_pl->len, ref);
+	s->raw_cvg[0] = s->raw_cvg[1] = s->raw_cvg[2] = s->raw_cvg[3] = s->raw_cvg[4] = 0;
+	s->thr_cvg[0] = s->thr_cvg[1] = s->thr_cvg[2] = s->thr_cvg[3] = s->thr_cvg[4] = 0;
+	for(i = depth; i--; ) {
+		if(pl[i].is_del) {
+			_calls[i] = -1;
+			continue;
+		}
+		_calls[i] = "nACnGnnnTnnnnnnn"[bam1_seqi(bam1_seq(pl[i].b), pl[i].qpos)];
+//fprintf(stderr, "%c", _calls[i]);
+		switch(_calls[i]) {
+			case 'A': s->raw_cvg[0]++; break;
+			case 'C': s->raw_cvg[1]++; break;
+			case 'G': s->raw_cvg[2]++; break;
+			case 'T': s->raw_cvg[3]++; break;
+			case 'n': s->raw_cvg[4]++; break;
+	    		default: break;
+		}
+		// Mask calls according to quality
+		if( snvmixSkipCall_alt(params, _calls[i], (char) bam1_qual(pl[i].b)[pl[i].qpos], (char) pl[i].b->core.qual) )
+                        _calls[i] = -1;
+		else {
+			switch(_calls[i]) {
+				case 'A': s->thr_cvg[0]++; break;
+				case 'C': s->thr_cvg[1]++; break;
+				case 'G': s->thr_cvg[2]++; break;
+				case 'T': s->thr_cvg[3]++; break;
+				case 'n': s->thr_cvg[4]++; break;
+	    			default: break;
+	    		}
+			if(_calls[i] == s->ref)
+				_calls[i] = 1;
+			else if(_calls[i] == 'n')
+				 _calls[i] = -1;
+		}
+	}
+
+	s->nref = snvmixFilterCalls(_calls,depth,NULL,NULL,params);
+	s->nref_count = 0;
+	s->ref_count = 0;
+
+	// FIXME: review this section, extra flags
+	for(i = 0; i < 2; i++) {
+	s->forward[i] = 0;
+	s->reverse[i] = 0;
+	s->good_pair[i] = 0;
+	s->bad_pair[i] = 0;
+	s->c_clean[i] = 0;
+	s->c_ins[i] = 0;
+	s->c_del[i] = 0;
+	s->c_junc[i] = 0;
+	}
+	s->nref_edges = 0;    // FIXME: fixed 5 bp edges
+	s->nref_center = 0;    // FIXME: fixed 5 bp edges
+	s->indel_pos = 0;     // How many reads have a deletion at that site
+	s->indel_near = 0;    // How many reasd have a deletion overall
+	s->aln_unique_pos = 0;
+	uint8_t cigar_ops; // 0x1 INS, 0x2 DEL , 0x4 JUNCTION
+	//for(qual_num = 0; qual_num < depth; qual_num++) {
+	uint32_t debug_sort = 0x7fffffff;
+	if(s->nref != nref_skip) {
+		k = -1;
+		for(qual_num = depth; qual_num--; ) {
+			if(_calls[qual_num] == -1) {
+				if(pl[qual_num].is_del)
+					s->indel_pos++;
+				continue;
+			}
+			cigar_ops = 0;
+			for(i=0; i<pl[qual_num].b->core.n_cigar; i++) {
+				int op = bam1_cigar(pl[qual_num].b)[i] & BAM_CIGAR_MASK;
+				if(op == BAM_CINS)
+					cigar_ops |= 0x1;
+				if(op == BAM_CDEL)
+					cigar_ops |= 0x2;
+				if(op == BAM_CREF_SKIP)
+					cigar_ops |= 0x4;
+			}
+			if(pl[qual_num].indel) {
+				s->indel_pos++;
+			}
+			if(_calls[qual_num] == 1) {
+				s->ref_count++;
+				k = 0;
+			} else if(_calls[qual_num] == s->nref) {
+				s->nref_count++;
+				k = 1;
+				if(pl[qual_num].qpos > (pl[qual_num].b->core.l_qseq - 5) || pl[qual_num].qpos < 5)
+					s->nref_edges++;
+				else
+					s->nref_center++;
+				if(pl[qual_num].b->core.pos < debug_sort) { s->aln_unique_pos++; debug_sort = pl[qual_num].b->core.pos; }
+				else if(pl[qual_num].b->core.pos > debug_sort) {fprintf(stderr, "ERROR checking sort in read-position, prev = %Ld, new = %Ld\n", debug_sort, pl[qual_num].b->core.pos); exit(1);}
+			}
+
+			if(k != -1) {
+				if(cigar_ops) {
+					if(cigar_ops & 0x1) s->c_ins[k]++;
+					if(cigar_ops & 0x2) s->c_del[k]++;
+					if(cigar_ops & 0x4) s->c_junc[k]++;
+				} else s->c_clean[k]++;
+
+				if(pl[qual_num].b->core.tid != pl[qual_num].b->core.mtid) s->bad_pair[k]++;
+
+				if(pl[qual_num].b->core.flag & 0x10) s->reverse[k]++;
+				else s->forward[k]++;
+			}
+		}
+	}
+	if(!params->filter) {
+		if(s->nref == nref_skip) {
+			snvmix_pl->b_end = b_end_tmp;
+			s->in_use = 0;
+			return 0;
+		}
+		int block = (_states / 3) * 3;
+		//for(k = 0; k < _states; k++)
+		//for(k = _states; k--; )
+		//	_b[k] = 0;
+		k = 0;
+		while( k < block ) {
+			_b[k] = 0;
+			_b[k+1] = 0;
+			_b[k+2] = 0;
+			k += 3;
+		}
+		if( k < _states) {
+			switch( _states - k ) {
+				case 2: _b[k] = 0; k++;
+				case 1: _b[k] = 0;
+			}
+		}
+		//for(qual_num = 0; qual_num < depth; qual_num++) {
+		for(qual_num = depth; qual_num--; ) {
+			//if( snvmixSkipCall(calls,qual_num,params,bQ,mQ) )
+			if(_calls[qual_num] == -1)
+				continue;
+		// from matlab:
+		// b = sum(log(notm*0.5 + m.*(yr.*mur+notyr.*notmur)),2);
+			if(_calls[qual_num] == 1) {
+				// If REF then
+				Y = _phred[(unsigned char) _bQ(pl, qual_num)];
+				not_Y = 1-_phred[(unsigned char) _bQ(pl, qual_num)];
+			} else {
+				// If NREF then
+				Y = (1-_phred[(unsigned char) _bQ(pl, qual_num)])/3;
+				not_Y = _phred[(unsigned char) _bQ(pl, qual_num)] + 2*(1-_phred[(unsigned char)_bQ(pl, qual_num)])/3;
+			}
+			//for(k = 0; k < _states; k++)
+			//for(k = _states; k--; )
+			//	_b[k] = _b[k] + logl((1-_phred[(unsigned char) _mQ(pl, qual_num)])*0.5 + _phred[(unsigned char) _mQ(pl, qual_num)] * (Y * _mu[k] + not_Y * _notmu[k]));
+			k = 0;
+			while( k < block ) {
+				_b[k] = _b[k] + logl((1-_phred[(unsigned char) _mQ(pl, qual_num)])*0.5 + _phred[(unsigned char) _mQ(pl, qual_num)] * (Y * _mu[k] + not_Y * _notmu[k]));
+				_b[k+1] = _b[k+1] + logl((1-_phred[(unsigned char) _mQ(pl, qual_num)])*0.5 + _phred[(unsigned char) _mQ(pl, qual_num)] * (Y * _mu[k+1] + not_Y * _notmu[k+1]));
+				_b[k+2] = _b[k+2] + logl((1-_phred[(unsigned char) _mQ(pl, qual_num)])*0.5 + _phred[(unsigned char) _mQ(pl, qual_num)] * (Y * _mu[k+2] + not_Y * _notmu[k+2]));
+				k += 3;
+			}
+			if( k < _states) {
+				switch( _states - k ) {
+					case 2: _b[k] = _b[k] + logl((1-_phred[(unsigned char) _mQ(pl, qual_num)])*0.5 + _phred[(unsigned char) _mQ(pl, qual_num)] * (Y * _mu[k] + not_Y * _notmu[k])); k++;
+					case 1: _b[k] = _b[k] + logl((1-_phred[(unsigned char) _mQ(pl, qual_num)])*0.5 + _phred[(unsigned char) _mQ(pl, qual_num)] * (Y * _mu[k] + not_Y * _notmu[k]));
+				}
+			}
+		}
+		z = 0;
+		//for(k = 0; k < _states; k++) {
+		//for(k = _states; k--; ) {
+		//	_b[k] = expl(_b[k] + _log_pi[k]);
+		//	z += _b[k];
+		//}
+		k = 0;
+		while( k < block ) {
+			_b[k] = expl(_b[k] + _log_pi[k]); z += _b[k];
+			_b[k+1] = expl(_b[k+1] + _log_pi[k+1]); z += _b[k+1];
+			_b[k+2] = expl(_b[k+2] + _log_pi[k+2]); z += _b[k+2];
+			k += 3;
+		}
+		if( k < _states ) {
+			switch( _states - k ) {
+				case 2: _b[k] = expl(_b[k] + _log_pi[k]); z += _b[k]; k++;
+				case 1: _b[k] = expl(_b[k] + _log_pi[k]); z += _b[k];
+			}
+		}
+		if(!z) z = 1;
+		//for(k = 0; k < _states; k++)
+		//for(k = _states; k--; )
+		//	_b[k] = _b[k] / z;
+		k = 0;
+		while( k < block ) {
+			_b[k] = _b[k] / z;
+			_b[k+1] = _b[k+1] / z;
+			_b[k+2] = _b[k+2] / z;
+			k += 3;
+		}
+		if( k < _states ) {
+			switch( _states - k ) {
+				case 2: _b[k] = _b[k] / z; k++;
+				case 1: _b[k] = _b[k] / z;
+			}
+		}		
+		maxp = _states - 1;
+		z = _b[maxp];
+		//for(k = 0; k < _states; k++)
+		for(k = _states; k--; )
+			if( _b[k] > z) {
+				maxp = k;
+				z = _b[k];
+			}
+
+
+		for(k = 0; k < _states; k++)
+			s->p[k] = _b[k];
+		s->maxP = maxp+1;
+	}
+}
+
+/*
+	void snvmixGetCallString
+	Arguments:
+		*col : pointer to the current file-line in memory
+		*calls : pointer to array where we will store the final base-calls
+		depth : number of base reads for this position
+		*nref : the observed NREF value will be placed here (-1 if none was found)
+
+	Function:
+		This will parse column 5 of the pileup file, which contains the
+		base calls and will fill the array "calls" with:
+			1 : when a reference call was made
+		       -1 : when an invalid value is seen ('N', 'n', '*') 
+		   [ACTG] : when a non-reference call was made
+
+		
+
+*/
+inline void snvmixGetCallString(char *col, int *calls, int depth, char *nref) {
+	int i;
+	int call_i = 0, call_skip_num = 0;
+	char call_skip_char[10];
+	for(i = 0 ; call_i < depth; i++) {
+		switch(col[i]){
+		case ',':
+		case '.':
+			calls[call_i] = 1;
+			call_i++;
+			break;
+		case 'a': case 'A':
+		case 't': case 'T':
+		case 'g': case 'G':
+		case 'c': case 'C':
+			//calls[call_i] = 0;
+			calls[call_i] = toupper(col[i]);
+			call_i++;
+			//if(*nref == 0)
+				//*nref = toupper(*(col+sizeof(char)*i));
+			if(*nref == 0)
+				*nref = toupper(col[i]);
+				break;
+		case 'N':
+		case 'n':
+		case '*':
+			// Not comparable values, we ignore them, but need to
+			// keep count to compare against the number of mapping qualities
+			calls[call_i] = -1;
+			call_i++;
+			break;
+		case '$':
+			// End of a read, just ignore it
+			break;
+		case '^':
+			// Start of read, ignore it and skip the next value (not base-related)
+			i++;
+			break;
+		case '+':
+		case '-':
+			// Eliminate:
+			//		+2AA
+			//		-3AAA
+			// Start skiping the sign
+			i++;
+			for(call_skip_num = 0; col[i] <= '9' && col[i] >= '0'; call_skip_num++, i++) {
+				//call_skip_char[call_skip_num] = call;
+				call_skip_char[call_skip_num] = col[i];
+				//i++;
+			}
+			// we have the number string in call_skip_char, lets terminate it
+			call_skip_char[call_skip_num] = '\0';
+			// i has been updated to first letter, just need to jump the rest of them
+			i = i+atoi(call_skip_char)-1;
+			break;
+		default:
+			fprintf(stderr,"ERROR: problem reading pileup file calls (%c)\n",col[i]);
+			exit(1);
+			break;
+		}
+	}
+	// If no nref was found, don't bother parsing the other columns, make the for fail with -1
+	if(!*nref)
+		*nref = -1;
+}
+
+/*
+	int snvmixFilterCalls
+	Arguments:
+		*calls : array built by snvmixGetCallString containing
+			1 : when a reference call was made
+		       -1 : when an invalid value is seen ('N', 'n', '*') 
+		   [ACTG] : when a non-reference call was made
+		depth : number of calls for the current position
+		*bQ : base qualities observed for each call
+		*mQ : map quality for each call
+		*params : parameter structure, get thresholding data
+	Function:
+		For each valid call read in the position this function will apply
+		thresholding according to the type selected (-t flag) and the bQ (-q)
+		and mQ (-Q) thresholds provided.
+
+		Any base-call that does not pass thresholds will be switched from it's
+		current value in *calls to -1;
+
+		The most prevalent NREF after filtering will be determined and returned.
+	
+*/
+inline int snvmixFilterCalls(int *calls, int depth, char *bQ, char *mQ, param_struct *params) {
+	int qual_num, nref_counts[5];
+	nref_counts[0] = 0;
+	nref_counts[1] = 0;
+	nref_counts[2] = 0;
+	nref_counts[3] = 0;
+	nref_counts[4] = 0;
+	int max_nref = N;
+
+	char nref = 0;
+
+	//for(qual_num = 0; qual_num < depth; qual_num++) {
+	for(qual_num = depth; qual_num--; ) {
+		//if( snvmixSkipCall(calls,qual_num,params,bQ,mQ) ) {
+		if( bQ != NULL && mQ != NULL && snvmixSkipCall(calls,qual_num,params,bQ,mQ) ) {
+			calls[qual_num] = -1;
+		} else {
+			nref_counts[0]++;
+			switch(calls[qual_num]) {
+				case 'A':
+					nref_counts[A]++;
+					break;
+				case 'C':
+					nref_counts[C]++;
+					break;
+				case 'G':
+					nref_counts[G]++;
+					break;
+				case 'T':
+					nref_counts[T]++;
+					break;
+				case 1:
+				case -1:
+					nref_counts[0]--;
+					break;
+				default:
+					fprintf(stderr,"ERROR: unknown call base\n");
+					exit(1);
+			}
+		}
+	}
+	if(nref_counts[0]) {
+		max_nref = A;
+		if(nref_counts[max_nref] < nref_counts[C])
+			max_nref = C;
+		if(nref_counts[max_nref] < nref_counts[G])
+			max_nref = G;
+		if(nref_counts[max_nref] < nref_counts[T])
+			max_nref = T;
+	} else {
+		//return -1;
+	}
+	//for(qual_num = 0; qual_num < depth; qual_num++) {
+	for(qual_num = depth; qual_num--; ) {
+		if(calls[qual_num] == 1)
+			continue;
+		if(calls[qual_num] != base_code[max_nref])
+			calls[qual_num] = -1;
+	}
+	return base_code[max_nref];
+}
+
+/*
+	int snvmixSkipCall
+	Arguments:
+		*calls : array built by snvmixGetCallString containing
+			1 : when a reference call was made
+		       -1 : when an invalid value is seen ('N', 'n', '*') 
+		   [ACTG] : when a non-reference call was made
+		qual_num : call number that is being evaluated
+		*params : parameter structure, get thresholding data
+		*bQ : base qualities observed for each call
+		*mQ : map quality for each call
+	Function:
+		Evalates quality values in each of the filtering models
+
+		Returns 1 if the calls[qual_num] needs to be filtered out
+		Returns 0 otherwise
+*/
+inline int snvmixSkipCall(int *calls, int qual_num, param_struct *params, char *bQ, char *mQ) {
+	if(calls[qual_num] == -1)
+		return(1);
+	if(params->filter_type == TYPE_mb) {
+		if(bQ[qual_num] <= params->bQ || mQ[qual_num] <= params->mQ)
+			return(1);
+	} else if(params->filter_type == TYPE_b) {
+		if(bQ[qual_num] <= params->bQ)
+			return(1);
+	} else {
+		if(mQ[qual_num] <= params->mQ)
+			return(1);
+		if(params->filter_type == TYPE_m) {
+			// Use mapping as surrogate
+			bQ[qual_num] = mQ[qual_num];
+			// Make mapping one
+			mQ[qual_num] = (char) PHRED_MAX;
+		} else if(params->filter_type == TYPE_M) {
+			// Mapping passed, make it one
+			mQ[qual_num] = (char) PHRED_MAX;
+		} else if(params->filter_type == TYPE_Mb) {
+			// Nothing special here
+		} else if(params->filter_type == TYPE_MB || params->filter_type == TYPE_SNVMix1) {
+			if(bQ[qual_num] <= params->bQ)
+				return(1);
+			if(params->filter_type == TYPE_SNVMix1) {
+				bQ[qual_num] = (char) PHRED_MAX;
+				mQ[qual_num] = (char) PHRED_MAX;
+		}	}
+	}
+	return(0);
+}
+
+inline int snvmixSkipCall_alt(param_struct *params, int call, char bQ, char mQ) {
+	if(call == -1)
+		return(1);
+	if(params->filter_type != TYPE_b && mQ <= params->mQ)
+		return(1);
+	switch(params->filter_type) {
+	case TYPE_mb:
+		if(bQ <= params->bQ || mQ <= params->mQ)
+			return(1);
+		break;
+	case TYPE_b:
+		if(bQ <= params->bQ)
+			return(1);
+		break;
+	case TYPE_m:
+		// Use mapping as surrogate
+		bQ = mQ;
+		// Make mapping one
+		mQ = (char) PHRED_MAX;
+		break;
+	case TYPE_M:
+		// Mapping passed, make it one
+		mQ = (char) PHRED_MAX;
+		break;
+	case TYPE_Mb:
+		// Nothing special here
+		break;
+	case TYPE_MB:
+	case TYPE_SNVMix1:
+		if(bQ <= params->bQ)
+			return(1);
+		if(params->filter_type == TYPE_SNVMix1) {
+			bQ = (char) PHRED_MAX;
+			mQ = (char) PHRED_MAX;
+		}
+		break;
+	}
+	return(0);
+}
+
+
+void resetParams(param_struct *params) {
+	params->input = stdin;
+	params->output = stdout;
+	params->inputfile = NULL;
+	params->outputfile = NULL;
+	params->modelfile = NULL;
+	params->fastafile = NULL;
+	params->listfile = NULL;
+	params->window = 1;
+	params->filter_type = 0;
+	params->filter_dup = 1;
+	params->filter_chast = 1;
+	params->train = 0;
+	params->classify = 0;
+	params->filter = 0;
+	params->full = 0;
+	params->input_type = S_PILEUP; // 0 = processed, 1 = maq pileup, 2 = sam pileup, 3 = bam file
+	params->mu = NULL;
+	params->pi = NULL;
+	params->max_iter = 1000;
+	params->bQ = 19;
+	params->mQ = 19;
+	params->debug = 0;
+	params->states = 3;
+	params->trainP.alpha = NULL;
+	params->trainP.beta = NULL;
+	params->trainP.delta = NULL;
+	params->trainP.param_file = NULL;
+	params->trainP.max_iter = 100;
+	params->trainP.bQ = NULL;
+	params->trainP.mQ = NULL;
+	params->trainP.calls = NULL;
+	params->window = 1;
+}
+
+void initSNVMix(int argc, char **argv, param_struct *params) {
+	char c;
+	int i;
+	resetParams(params);
+	while ((c = getopt (argc, argv, "hDTCFfi:o:m:t:p:q:Q:a:b:d:M:S:r:l:w:cR")) != -1) {
+		switch (c)
+		{
+			case 'm': params->modelfile = optarg; break;
+			case 'i': params->inputfile = optarg; break;
+			case 'o': params->outputfile = optarg; break;
+			// Bam file opts
+			case 'r': params->fastafile = optarg; break;
+			case 'l': params->listfile = optarg; break;
+			case 'T': params->train = 1; break;
+			case 'C': params->classify = 1; break;
+			case 'F': params->filter = 1; break;
+			case 'f': params->full = 1; break;
+			case 'p':
+				if(*optarg == 'q') {
+					params->input_type = Q_PILEUP;
+				} else if(*optarg == 'm') {
+					params->input_type = M_PILEUP;
+				} else if(*optarg == 's') {
+					params->input_type = S_PILEUP;
+				} else if(*optarg == 'b') {
+					params->input_type = BAM_FILE;
+				} else {
+					fprintf(stderr,"ERROR: unknown pileup format %c\n",*optarg);
+					exit(1);
+				}
+				break;
+			case 't':
+				if(strcmp("mb",optarg) == 0)
+					params->filter_type = TYPE_mb;
+				else if(strcmp("m",optarg) == 0)
+					params->filter_type = TYPE_m;
+				else if(strcmp("b",optarg) == 0)
+					params->filter_type = TYPE_b;
+				else if(strcmp("M",optarg) == 0)
+					params->filter_type = TYPE_M;
+				else if(strcmp("Mb",optarg) == 0)
+					params->filter_type = TYPE_Mb;
+				else if(strcmp("MB",optarg) == 0)
+					params->filter_type = TYPE_MB;
+				else if(strcmp("SNVMix1",optarg) == 0)
+					params->filter_type = TYPE_SNVMix1;
+				else {
+					fprintf(stderr,"ERROR: filter type '%s' not recognized\n",optarg);
+					exit(1);
+				}
+				break;
+			case 'q':
+				params->bQ = atoi(optarg);
+				if( params->bQ < 0 || params->bQ >= PHRED_MAX )  {
+					fprintf(stderr,"ERROR: quality threshold value Q%d out of range\n",params->bQ);
+					exit(1);
+				}
+				break;
+			case 'Q':
+				params->mQ = atoi(optarg);
+				if( params->mQ < 0 || params->mQ >= PHRED_MAX )  {
+					fprintf(stderr,"ERROR: mapping threshold value Q%d out of range\n",params->mQ);
+					exit(1);
+				}
+				break;
+			case 'h':
+				usage(argv[0]);
+				break;
+			case 'D': params->debug = 1; break;
+			case 'M': params->trainP.param_file = optarg; break;
+			case 'S':
+				params->states = atoi(optarg);
+				if( params->states < 3 )  {
+					fprintf(stderr,"ERROR: state minimum is 3\n");
+					exit(1);
+				}
+				break;
+			case 'w':
+				params->window = atoi(optarg);
+				if(params->window < 1)
+					params->window = 1;
+				break;
+			case 'c':
+				params->filter_chast = 0;
+				break;
+			case 'R':
+				params->filter_dup = 0;
+				break;
+			default:
+				fprintf(stderr,"Unknown option\n");
+				usage(argv[0]);
+				break;
+			}
+	}
+	/* Decide if we're going to train or classify */
+	if(params->filter) {
+		params->train = 0;
+		params->classify = 0;
+	}
+	if(params->train && params->classify) {
+		fprintf(stderr,"ERROR: choose either train or classify\n");
+		exit(1);
+	} else if(!params->train && !params->classify && !params->filter) {
+		fprintf(stderr,"Train/Classify/Filter not selected, picking default: Classify\n");
+		params->classify = 1;
+	}
+
+	/* Set parameters */
+	allocateParameters(params);
+	if( params->train ) setTrainingParameters(params);
+	if( params->classify ) setClassificationParameters(params);
+
+	/* Open input and output streams */
+	if( params->input_type == BAM_FILE ) {
+		if( params->train ) {
+			fprintf(stderr, "BAM input for training not yet implemented\n");
+			exit(1);
+		}
+		if( params->inputfile == NULL || strcmp(params->inputfile, "-") == 0) {
+			fprintf(stderr, "ERROR: if '-p b' is chosen, input file has to be a bam file\n");
+			exit(1);
+		}
+		if( params->fastafile == NULL ) {
+			fprintf(stderr, "ERROR: if '-p b' is chosen, reference fasta file is required (-r <ref.fa>)\n");
+			exit(1);
+		}
+	} else if( params->inputfile != NULL) {
+			params->input = fopen(params->inputfile, "r");
+			if(params->input == NULL) {
+				perror("ERROR: could not open input file");
+				exit(1);
+			}
+	} else {
+		params->input = stdin;
+	}
+	if( params->outputfile != NULL ) {
+		params->output = fopen(params->outputfile, "w");
+		if(params->output == NULL) {
+			perror("ERROR: could not open output file");
+			exit(1);
+		}
+	} else {
+		params->output = stdout;
+	}
+}
+
+void allocateParameters(param_struct *params) {
+
+	/* Allocate alpha */
+	if( (params->trainP.alpha = malloc(params->states * sizeof(long double))) == NULL) {
+		perror("ERROR: could not allocate space for alpha\n"); exit(1);
+	}
+	/* Allocate beta */
+	if( (params->trainP.beta = malloc(params->states * sizeof(long double))) == NULL) {
+		perror("ERROR: could not allocate space for beta\n"); exit(1);
+	}
+	/* Allocate delta*/
+	if( (params->trainP.delta = malloc(params->states * sizeof(long double))) == NULL) {
+		perror("ERROR: could not allocate space for delta\n"); exit(1);
+	}
+	/* Allocate mu*/
+	if( (params->mu = malloc(params->states * sizeof(long double))) == NULL) {
+		perror("ERROR: could not allocate space for mu\n"); exit(1);
+	}
+	/* Allocate pi*/
+	if( (params->pi = malloc(params->states * sizeof(long double))) == NULL) {
+		perror("ERROR: could not allocate space for pi\n"); exit(1);
+	}
+}
+
+void setTrainingParameters(param_struct *params) {
+	if( params->trainP.param_file != NULL ) {
+		readParamFile(params,'t');
+	} else {
+		if(params->states != 3) {
+			fprintf(stderr, "ERROR: if state space larger than 3 requested, parameters must be provided\n");
+			exit(1);
+		}
+		fprintf(stderr,"Training parameter file not given, using defaults\n");
+		params->trainP.alpha[0] = 1000;
+		params->trainP.alpha[1] = 5000;
+		params->trainP.alpha[2] = 1;
+		params->trainP.beta[0] = 1;
+		params->trainP.beta[1] = 5000;
+		params->trainP.beta[2] = 1000;
+		params->trainP.delta[0] = 10;
+		params->trainP.delta[1] = 1;
+		params->trainP.delta[2] = 1;
+	}
+}
+
+void setClassificationParameters(param_struct *params) {
+	int k;
+	if( params->modelfile != NULL ) {
+		readParamFile(params,'c');
+	} else {
+		if(params->states != 3) {
+				fprintf(stderr, "ERROR: if state space larger than 3 requested, model file must be provided\n");
+				exit(1);
+		}
+		params->mu[0] = 0.995905287891696078261816182930;
+		params->mu[1] = 0.499569401000925172873223800707;
+		params->mu[2] = 0.001002216846753116409260431219;
+		params->pi[0] = 0.672519580755555956841362785781;
+		params->pi[1] = 0.139342327124010650907237618412;
+		params->pi[2] = 0.188138092120433392251399595807;
+		fprintf(stderr,"Classification parameter file not given, using for mQ35 bQ10\n");
+	}
+	fprintf(stderr,"Mu and pi values:\n");
+	for(k = 0; k < params->states; k++)
+		fprintf(stderr,"\tmu[%d] = %Lf\tpi[%d] = %Lf\n", k, params->mu[k], k, params->pi[k]);
+	fprintf(stderr,"\n");
+}
+
+void readParamFile(param_struct *params, char type) {
+	FILE *fptrPARAM;
+	char *line = NULL, *param_name, *param, *param_ptr, *filename;
+	size_t line_len = 0, read_len = 0;
+	long double *target;
+	int i;
+
+	if(type == 't') {
+		filename=params->trainP.param_file;
+	} else if(type == 'c') {
+		filename=params->modelfile;
+	}
+	fptrPARAM=fopen(filename, "r");
+	if(fptrPARAM == NULL) {
+		fprintf(stderr, "ERROR: could not open parameter file %s\n", filename);
+		exit(1);
+	}
+
+#if defined __linux__ || defined _GNU_SOURCE
+	while( (read_len = getline(&line,&line_len,fptrPARAM)) != -1 )
+#endif
+#ifdef __APPLE__
+	while( (line = fgetln(fptrPARAM,&line_len)) != NULL )
+#endif
+	{
+		line[line_len-1] = '\0';
+		target = NULL;
+		param_name = strtok_r(line, " ", &param_ptr);
+		if(type == 't') {
+			     if(strcmp("alpha", param_name) == 0) target = params->trainP.alpha;
+			else if(strcmp("beta", param_name) == 0)  target = params->trainP.beta;
+			else if(strcmp("delta", param_name) == 0) target = params->trainP.delta;
+		} else if(type == 'c') {
+			     if(strcmp("mu", param_name) == 0) target = params->mu;
+			else if(strcmp("pi", param_name) == 0) target = params->pi;
+
+		}
+		if(target == NULL) { fprintf(stderr, "Unknown parameter %s, skipping\n", param_name); continue; }
+
+		for(i = 0; i < params->states; i++) {
+			param = strtok_r(NULL, " ", &param_ptr);
+			if(param == NULL) {
+				fprintf(stderr,"ERROR: missing value #%d for %s\n", i, param_name);
+				exit(1);
+			}
+			target[i] = atof(param);
+fprintf(stderr, "DEBUG: %s[%d] = %Lf\n", param_name, i, target[i]);
+		}
+	}
+	fclose(fptrPARAM);
+	free(line);
+}
+
+void initPhred(long double *phredTable, int elem) {
+	int i;
+	for(i = 0; i < elem; i++) {
+		phredTable[i] = 1-powl(10,(-(long double)i/10));
+	}
+	phredTable[i] = 1;
+}
+
+
+void usage(char *selfname) {
+	param_struct default_params;
+	resetParams(&default_params);
+	fprintf(stderr,"Syntax:\n\t%s\t-m <modelfile> [-i <infile>] [-o <outfile>] [-T | -C | -F] [-p < q | m | s >] [-t < mb | m | b | M | Mb | MB | SNVMix1>] [-q <#>] [-Q <#>] [-M <trainP file>] [-h]\n",selfname);
+	fprintf(stderr,"\tRequired:\n");
+	fprintf(stderr,"\t-m <modelfile>\tmodel file, a line for mu and a line for pi, three\n");
+	fprintf(stderr,"\t\t\tspace-separated values each, like:\n");
+	fprintf(stderr,"\t\t\tmu 0.xxxxxxxx 0.xxxxxxxx 0.xxxxxxxx\n");
+	fprintf(stderr,"\t\t\tpi 0.xxxxxxxx 0.xxxxxxxx 0.xxxxxxxx\n");
+	fprintf(stderr,"\tOptional:\n");
+	fprintf(stderr,"\t-i <infile>\tquality pileup, from pileup2matlab.pl script (def: STDIN)\n");
+	fprintf(stderr,"\t-o <outfile>\twhere to put the results (def: STDOUT)\n");
+	fprintf(stderr,"\t-T | -C | -F\tTrain, Classify or Filter (def: Classify)\n");
+	fprintf(stderr,"\t-p q|m|s\tInput pileup format (def: s)\n\t\t\tq = quality\n\t\t\tm = MAQ\n\t\t\ts = SAMtools\n");
+	fprintf(stderr,"\t-t mb|m|b|M|Mb|MB|SNVMix1\n\t\t\tFilter type (def: mb)\n");
+	fprintf(stderr,"\t\t\tmb\tLowest between map and base quality\n");
+	fprintf(stderr,"\t\t\tm\tFilter on map, and use as surrogate for base quality\n");
+	fprintf(stderr,"\t\t\tb\tFilter on base quality, take map quality as 1\n");
+	fprintf(stderr,"\t\t\tM\tFilter on map quality but use only base quality\n");
+	fprintf(stderr,"\t\t\tMb\tFilter on map quality and use both map and base qualities\n");
+	fprintf(stderr,"\t\t\tMB\tFilter on map quality AND base quality\n");
+	fprintf(stderr,"\t\t\tSNVMix1\tFilter on base quality and map quality, afterwards consider them perfect\n");
+	fprintf(stderr,"\t-q #\t\tCutoff Phred value for Base Quality, anything <= this value is ignored (def: Q%d)\n",default_params.bQ);
+	fprintf(stderr,"\t-Q #\t\tCutoff Phred value for Map Quality, anything <= this value is ignored (def: Q%d)\n",default_params.mQ);
+	fprintf(stderr,"\n\tTraining parameters:\n");
+	fprintf(stderr,"\t-M <file>\tProvide a file containing training parameters\n");
+	fprintf(stderr,"\t\t\tValues are space-separated:\n");
+	fprintf(stderr,"\t\t\talpha # # #\n");
+	fprintf(stderr,"\t\t\tbeta # # #\n");
+	fprintf(stderr,"\t\t\tdelta # # #\n");
+	fprintf(stderr,"\n\t-h\t\tthis message\n");
+	fprintf(stderr,"\nImplementation: Rodrigo Goya, 2009. Version %s\n",VERSION);
+	exit(0);
+}
+
+// Based on classify pileup, but reads the entire file to memory for training purposes.
+void snvmixGetTrainSet_pileup(param_struct *params) {
+//MAQ:
+//	1       554484  C       1752    @,,.,.,, @xxx @xxx xxxxx
+//SAM:
+//	1       554484  C       1752    ,,.,.,,	xxx xxxx
+
+	FILE *fptrIN;
+
+	char *line = NULL;
+	size_t line_len = 0, prev_len = 0;
+	int read_len = 0;
+	int col_num = 0;
+	long int line_num = 0;
+
+	char *col, *qual;
+	char *col_sep = "\t", *col_str, *col_ptr;
+	char *qual_sep = ",", *qual_str, *qual_ptr;
+	char *chr, *pos, *ref, nref, call;	
+
+	char *bQ, *mQ;
+	int *calls, *tmpQ, pos_int;
+	int depth = 0, qual_num,  maxp;
+	int depth_allocated = 0;
+
+	int trainset = 0;
+	int trainset_allocated = 0;
+
+	int nref_count = 0, ref_count = 0;
+	int i, call_i;
+
+	fptrIN = params->input;
+
+	if( (params->trainP.bQ = malloc(START_QNUM * sizeof (unsigned char **))) == NULL) {
+		perror("ERROR allocating initial space for train.bQ"); exit(1);
+	}
+	if( (params->trainP.mQ = malloc(START_QNUM * sizeof (unsigned char **))) == NULL) {
+		perror("ERROR allocating initial space for train.mQ"); exit(1);
+	}
+	if( (params->trainP.calls = malloc(START_QNUM * sizeof (signed char **))) == NULL) {
+		perror("ERROR allocating initial space for train.calls"); exit(1);
+	}
+	if( (params->trainP.depth = malloc(START_QNUM * sizeof (int *))) == NULL) {
+		perror("ERROR allocating initial space for train.depth"); exit(1);
+	}
+	trainset_allocated = START_QNUM;
+
+	if( (calls = malloc(START_QNUM * sizeof (int))) == NULL) {
+		perror("ERROR allocating initial space for train.depth"); exit(1);
+	}
+	depth_allocated = START_QNUM;
+#if defined __linux__ || defined _GNU_SOURCE
+	while( (read_len = getline(&line,&line_len,fptrIN)) != -1 )
+#endif
+#ifdef __APPLE__
+	while( (line = fgetln(fptrIN,&line_len)) != NULL )
+#endif
+	{
+		line[line_len-1] = '\0';
+		depth = 0;
+		nref = 0;
+		col_ptr = NULL;
+		for(col_num = 0, col_str = line;  ; col_num++, col_str = NULL) {
+			col = strtok_r(col_str, "\t", &col_ptr);
+			if(col == NULL) {
+				break;
+			}
+			if(col_num == 0) {
+				chr = col;
+			} else if(col_num == 1) {
+				pos = col;
+				pos_int = strtol(pos, NULL, 10);
+			} else if(col_num == 2) {
+				ref = col;
+			} else if(col_num == 3) {
+				depth = atoi(col);
+				if(depth > depth_allocated) {
+					if( (calls = realloc(calls, sizeof (int) * (depth + START_QNUM))) == NULL) {
+						perror("ERROR allocating space for calls"); exit(1);
+					}
+					depth_allocated = depth + START_QNUM;
+				}
+			} else if(col_num == 4) {
+				if(params->input_type == M_PILEUP)
+					col = col+sizeof(char);
+					snvmixGetCallString(col, calls, depth, &nref);
+			} else if(col_num == 5) {
+				bQ = col;
+				// If it's a MAQ pileup, we need to skip the @ sign
+				if(params->input_type == M_PILEUP)
+					bQ = bQ + sizeof(char);
+				for(call_i = 0; call_i < depth; call_i++)
+					bQ[call_i] = bQ[call_i]-33;
+			} else if(col_num == 6) {
+				mQ = col;
+				// If it's a MAQ pileup, we need to skip the @ sign
+				if(params->input_type == M_PILEUP)
+					mQ = mQ + sizeof(char);
+				for(call_i = 0; call_i < depth; call_i++)
+					mQ[call_i] = mQ[call_i]-33;
+			}
+		}
+		if(line_num >= trainset_allocated) {
+			if( ( params->trainP.bQ = realloc(params->trainP.bQ, (line_num + START_QNUM) * sizeof (unsigned char *)) ) == NULL ) {
+				perror("ERROR reallocating space for trainP.bQ"); exit(1);
+			}
+			if( ( params->trainP.mQ = realloc(params->trainP.mQ, (line_num + START_QNUM) * sizeof (unsigned char *)) ) == NULL ) {
+				perror("ERROR reallocating space for trainP.mQ"); exit(1);
+			}
+			if( ( params->trainP.calls = realloc(params->trainP.calls, (line_num + START_QNUM) * sizeof (signed char *)) ) == NULL) {
+				perror("ERROR reallocating space for trainP.calls"); exit(1);
+			}
+			if( ( params->trainP.depth = realloc(params->trainP.depth, (line_num + START_QNUM) * sizeof (int)) ) == NULL) {
+				perror("ERROR reallocating space for trainP.depth"); exit(1);
+			}
+			trainset_allocated = line_num + START_QNUM;
+		}
+
+		nref = snvmixFilterCalls(calls,depth,bQ,mQ,params);
+		nref_count = 0;
+		ref_count = 0;
+		for(qual_num = 0; qual_num < depth; qual_num++) {
+			if(calls[qual_num] == -1)
+				continue;
+			if(calls[qual_num] == 1)
+				ref_count++;
+			if(calls[qual_num] == nref)
+				nref_count++;
+		}
+		params->trainP.depth[line_num] = ref_count + nref_count;
+
+		if( (params->trainP.bQ[line_num] = malloc(sizeof(unsigned char) * params->trainP.depth[line_num])) == NULL ) {
+			perror("ERROR allocating space for trainP.bQ"); exit(1);
+		}
+		if( (params->trainP.mQ[line_num] = malloc(sizeof(unsigned char) * params->trainP.depth[line_num])) == NULL ) {
+			perror("ERROR allocating space for trainP.mQ"); exit(1);
+		}
+		if( (params->trainP.calls[line_num] = malloc(sizeof(signed char) * params->trainP.depth[line_num])) == NULL ) {
+			perror("ERROR allocating space for trainP.calls"); exit(1);
+		}
+
+		call_i = 0;
+		for(qual_num = 0; qual_num < depth; qual_num++) {
+			if(calls[qual_num] == -1)
+				continue;
+	
+			params->trainP.calls[line_num][call_i] = (signed char) calls[qual_num];
+			params->trainP.bQ[line_num][call_i] = (unsigned char) bQ[qual_num];
+			params->trainP.mQ[line_num][call_i] = (unsigned char) mQ[qual_num];
+			call_i++;
+		}
+		if( params->trainP.depth[line_num] != call_i ) {
+			fprintf(stderr, "ERROR: mismatch between trainP.depth and call_i\n"); exit(1);
+		}
+		line_num++;
+	}
+	params->trainP.len = line_num;
+	params->trainP.len = line_num;
+	fclose(fptrIN);
+	free(line); free(calls);
+}
+
+// Use EM algorithm on a memory-located data set to train the parameters
+void snvmixTrain_pileup(param_struct *params) {
+	int line_num = 0, call_i = 0, k = 0, states;
+	int iter = 0;
+	FILE *fptrOUT;
+
+	long double phred[PHRED_MAX + 1];
+	long double bsum = 0, msum = 0, *b, *mu, *notmu, *pi, *log_pi, z = 0;
+	long double **pG, **xr;
+	long double *xrhat, *sum_pG;
+	long double Y, not_Y, M;
+	int N_sum;
+	int strength;
+
+	long double ll, prev_ll = 0;
+	//initPhred(phred, PHRED_MAX+1);
+	initPhred(phred, PHRED_MAX);
+
+	states = params->states;
+
+	// Initialize local values of parameters
+	mu = params->mu;
+	pi = params->pi;
+
+	if( ( b = malloc(states * sizeof(long double)) ) == NULL) { perror("[snvmixClassify_pileup] Could not allocate b\n"); exit(1); }
+	if( ( notmu = malloc(states * sizeof(long double)) ) == NULL) { perror("[snvmixClassify_pileup] Could not allocate notmu\n"); exit(1); }
+	if( ( log_pi = malloc(states * sizeof(long double)) ) == NULL) { perror("[snvmixClassify_pileup] Could not allocate log_pi\n"); exit(1); }
+	if( ( xrhat = malloc(states * sizeof(long double)) ) == NULL) { perror("[snvmixClassify_pileup] Could not allocate xrhat\n"); exit(1); }
+	if( ( sum_pG = malloc(states * sizeof(long double)) ) == NULL) { perror("[snvmixClassify_pileup] Could not allocate sum_pG\n"); exit(1); }
+
+	snvmixGetTrainSet_pileup(params);
+
+	if(params->debug) {
+	for(line_num = 0; line_num < params->trainP.len; line_num++) {
+		fprintf(stderr, "line_num: %d", line_num);
+		for(call_i = 0; call_i < params->trainP.depth[line_num]; call_i++) {
+			fprintf(stderr, "\t(%d,bQ%d,mQ%d)", params->trainP.calls[line_num][call_i], params->trainP.bQ[line_num][call_i], params->trainP.mQ[line_num][call_i]);
+		}
+		fprintf(stderr, "\n\n");
+	}
+	}
+	// Initialize mu and pi
+
+	fprintf(stderr, "Initializing mu\n");
+	for(k = 0; k < states; k++) {
+		params->mu[k] = (long double) params->trainP.alpha[k] / (params->trainP.alpha[k] + params->trainP.beta[k]);
+		fprintf(stderr,"\talpha[%d] = %Lf,\tbeta[%d] = %Lf,\tmu[%d] = %Lf\n", k, params->trainP.alpha[k], k, params->trainP.beta[k], k, params->mu[k]);
+	}
+
+	fprintf(stderr, "Initializing pi\n");
+	z = (long double) params->trainP.delta[0] + params->trainP.delta[1] + params->trainP.delta[2];
+	if(!z) { z = 1; }
+	for(k = 0; k < states; k++) {
+		params->pi[k] = (long double) params->trainP.delta[k]  / z;
+		fprintf(stderr,"\tdelta[%d] = %Lf,\tpi[%d] = %Lf\n", k, params->trainP.delta[k], k, params->pi[k]);
+	}
+
+	strength = params->trainP.len;
+
+	// Initialize matrices
+	if( (pG = malloc(sizeof(long double *) * params->trainP.len)) == NULL) {
+		perror("ERROR allocating initial space for pG"); exit(1);
+	}
+	if( (xr = malloc(sizeof(long double *) * params->trainP.len)) == NULL) {
+		perror("ERROR allocating initial space for xr"); exit(1);
+	}
+	for(line_num = 0; line_num < params->trainP.len; line_num++) {
+		// [states] cells for each line_num
+		if( (pG[line_num] = malloc(sizeof(long double) * states)) == NULL) {
+			perror("ERROR allocating space for pG"); exit(1);
+		}
+		if( (xr[line_num] = malloc(sizeof(long double) * states)) == NULL) {
+			perror("ERROR allocating space for xr"); exit(1);
+		}
+	}
+
+	for(iter = 0; iter < params->trainP.max_iter; iter++) {
+		// Reset values for this iteration
+		for(k = 0; k < states; k++) {
+			notmu[k] = 1 - mu[k];
+			log_pi[k] = logl(pi[k]);
+
+			sum_pG[k] = 0;
+			xrhat[k] = 0;
+
+		}
+		N_sum = 0;
+		ll = 0;
+
+		// E step
+		for(line_num = 0; line_num < params->trainP.len; line_num++) {
+			if(params->trainP.depth == 0)
+				continue;
+			for(k = 0; k < states; k++) {
+				xr[line_num][k] = 0;
+				b[k] = 0;
+			}
+
+			for(call_i = 0; call_i < params->trainP.depth[line_num]; call_i++) {
+				if(params->trainP.calls[line_num][call_i] == 1) {
+					Y = phred[params->trainP.bQ[line_num][call_i]];
+					not_Y = 1-phred[params->trainP.bQ[line_num][call_i]];
+				} else {
+					Y = (1-phred[params->trainP.bQ[line_num][call_i]])/3;
+					not_Y = phred[params->trainP.bQ[line_num][call_i]] + 2*(1-phred[params->trainP.bQ[line_num][call_i]])/3;
+				}
+				M =  phred[params->trainP.mQ[line_num][call_i]];
+				for(k = 0; k < states; k++) {
+					b[k] = b[k] + logl( (1 - M) * 0.5 + M * (Y * mu[k] + not_Y * notmu[k]) );
+					xr[line_num][k] = xr[line_num][k] + Y;
+				}
+			}
+			z = 0;
+			for(k = 0; k < states; k++) {
+				b[k] = expl(b[k] + log_pi[k]);
+				z = z + b[k];
+			}
+			if(!z) { z=1; }
+			for(k = 0; k < states; k++) {
+				pG[line_num][k] = b[k] / z;
+			}
+
+			ll = ll + logl(z);
+
+			N_sum = N_sum + params->trainP.depth[line_num];
+
+			for(k = 0; k < states; k++) {
+				sum_pG[k] = sum_pG[k] + pG[line_num][k];
+				xrhat[k] = xrhat[k] + xr[line_num][k] * pG[line_num][k];
+			}
+		}
+
+		// Check log likelihood
+		if(iter == 0)
+			prev_ll = ll;
+		else if(ll <= prev_ll)
+			break;
+		prev_ll = ll;
+
+		// M step
+		z = 0;
+		for(k = 0; k < states; k++) {
+			z = z + sum_pG[k] + params->trainP.delta[k];
+		}
+		if(!z) { z=1; }
+		for(k = 0; k < states; k++) {
+			// Update pi
+			params->pi[k] = (sum_pG[k] + params->trainP.delta[k]) / z;
+			// Update mu
+			params->mu[k] = (xrhat[k] + params->trainP.alpha[k]*strength-1) / (N_sum + params->trainP.alpha[k]*strength + params->trainP.beta[k]*strength-2);
+		}
+	}
+
+	if(params->modelfile == NULL) {
+		fptrOUT = stdout;
+	} else {
+		fptrOUT = fopen(params->modelfile, "w");
+		if(fptrOUT == NULL) {
+			perror("ERROR: could not open modelfile for writing, using STDOUT");
+			fptrOUT = stdout;
+		}
+	}
+	fprintf(fptrOUT,"mu");
+	for(k = 0; k < states; k++) {
+		fprintf(fptrOUT, " %0.30Lf", params->mu[k]);
+	}
+	fprintf(fptrOUT,"\n");
+	fprintf(fptrOUT,"pi");
+	for(k = 0; k < states; k++) {
+		fprintf(fptrOUT, " %0.30Lf", params->pi[k]);
+	}
+	fprintf(fptrOUT,"\n");
+
+	/* free unused memory */
+	free(b); free(notmu); free(log_pi);
+	free(xrhat); free(sum_pG);
+	for(line_num = 0; line_num < params->trainP.len; line_num++) {
+		// free [states] cells for each line_num
+		free(pG[line_num]);
+		free(xr[line_num]);
+	}
+	free(pG); free(xr);
+}
+