Mercurial > repos > miller-lab > genome_diversity
changeset 29:fb944979bf35
Update to Miller Lab devshed revision 5f0be4d1db30
author | Richard Burhans <burhans@bx.psu.edu> |
---|---|
date | Thu, 25 Jul 2013 12:01:47 -0400 |
parents | 184d14e4270d |
children | 4188853b940b |
files | genome_diversity/src/dpmix.c genome_diversity/src/sweep.c |
diffstat | 2 files changed, 47 insertions(+), 35 deletions(-) [+] |
line wrap: on
line diff
--- a/genome_diversity/src/dpmix.c Wed Jul 17 12:46:46 2013 -0400 +++ b/genome_diversity/src/dpmix.c Thu Jul 25 12:01:47 2013 -0400 @@ -53,8 +53,6 @@ #include "lib.h" #include <math.h> -//#define ABT - // maximum length of a line from the table #define MOST 50000 @@ -86,11 +84,14 @@ int b, e; } H[MOST]; +#define MAX_CHR_NAME 1000000 + // global variables int *B[7], // backpointer to state at the previous SNP (or event) *P; // chromosome position -int nH, nI, nG, genotypes, nsnp, debug, chr_col, logs, last_snp_pos, pop3; -char this_chr[100]; +int nH, nI, nG, genotypes, nsnp, debug, chr_col, logs, last_snp_pos, pop3, + nchr_name; +char this_chr[100], *chr_name[MAX_CHR_NAME]; double switch_penalty; char buf[MOST], *status; FILE *fp, *out; @@ -220,10 +221,6 @@ B[state][i] = state; continue; } -#ifdef ABT -fprintf(stderr, "%d: ABT genotype=%d, KHO %f,%f YRI %f,%f\n", - p->pos, p->g[a], p->F1, 1.0-p->F1, p->F2, 1.0-p->F2); -#endif for (state = 0; state < 7; ++state) { // find highest path-score from this event onward @@ -237,21 +234,8 @@ B[state][i] = m; ff[state] = f[m]; if (state > 0 && p->type == 0) -{ ff[state] += score(p->F1, p->F2, p->F3, p->g[a], state); -#ifdef ABT -if (state == 2) - fprintf(stderr, " YRI:"); -if (state == 4) - fprintf(stderr, " hetero:"); -if (state == 1) - fprintf(stderr, " KHO:"); -if (pop3 || state == 1 || state == 2 || state == 4) -fprintf(stderr, " score=%f ff=%f B = %d\n", - score(p->F1, p->F2, p->F3, p->g[a], state), ff[state], m); -#endif -} } if (p->type == 1) { // start of heterochomatic interval. Force paths @@ -269,7 +253,7 @@ for (j = 0; j < 7; ++j) { if (pop3 || j == 3 || j == 5 || j == 6) fprintf(stderr, " %f(%d)", from[j], B[j][i]); - } + } putc('\n', stderr); } } @@ -279,10 +263,6 @@ if (from[j] > from[state]) state = j; -#ifdef ABT -fprintf(stderr, "start backtrace in state %d\n", state); -#endif - // trace back to find the switch points // A[a].x[state] records the total length of intervals in each state for (prev_pos = i = 0; i < nsnp; ++i) { @@ -323,14 +303,15 @@ */ void one_chr() { char *s, *z = "\t\n"; - int X[MOST], n, i, g, A1, B1, A2, B2, A3, B3, a, do_read, p, pos, het; - // int j, k, tied; + int X[MOST], n, i, g, A1, B1, A2, B2, A3, B3, a, do_read, p, pos, het, + old_pos; struct snp *new; double F1, F2, F3; - // doublewinner, try; strcpy(this_chr, get_chr_name()); - nsnp = 0; + if (nchr_name == 0) + chr_name[nchr_name++] = copy_string(this_chr); + old_pos = nsnp = 0; last = NULL; // advance to this chromosome in the list of heterochromatic intervals for (het = 0; het < nH && !same_string(this_chr, H[het].chr); ++het) @@ -339,8 +320,17 @@ for (do_read = 0; ; do_read = 1) { if (do_read && (status = fgets(buf, MOST, fp)) == NULL) break; - if (!same_string(get_chr_name(), this_chr)) + if (!same_string(s = get_chr_name(), this_chr)) { + if (nchr_name >= MAX_CHR_NAME) + fatal("Too many chromosome names"); + for (i = 0; + i < nchr_name && !same_string(s, chr_name[i]); ++i) + ; + if (i < nchr_name) + fatalf("SNVs on %s aren't together", s); + chr_name[nchr_name++] = copy_string(s); break; + } // set X[i] = atoi(i-th word of buf), i is base 1 for (i = 1, s = strtok(buf, z); s != NULL; @@ -350,6 +340,9 @@ // insert events (pseudo-SNPs) for heterochomatin intervals // coming before the SNP pos = X[chr_col+1]; + if (pos <= old_pos) + fatalf("SNP at %s %d is out of order", this_chr, pos); + old_pos = pos; while (het < nH && same_string(this_chr, H[het].chr) && H[het].b < pos) { add_het(H[het].b, 1); @@ -373,7 +366,7 @@ if (g == -1) continue; if (g < 0 || g > 2) - fatalf("invalid genotype %d", g); + fatalf("invalid genotype %d in column %d, pos %d", g, n+2, X[2]); if (p == 1) { A1 += g; B1 += (2 - g);
--- a/genome_diversity/src/sweep.c Wed Jul 17 12:46:46 2013 -0400 +++ b/genome_diversity/src/sweep.c Thu Jul 25 12:01:47 2013 -0400 @@ -56,6 +56,10 @@ } W[MAX_WINDOW]; int nW; +#define MAX_CHR_NAME 1000000 +char *chr_name[MAX_CHR_NAME]; +int nchr_name; + // return the linked list of SNPs in positions b to e struct snp *add_snps(int b, int e) { struct snp *first = NULL, *last = NULL, *new; @@ -95,7 +99,8 @@ int get_chr(FILE *fp, int chr_col, int pos_col, int score_col, char *chr) { static char buf[BUF_SIZE]; static int init = 1; - char *status; + int old_pos = 0, p, i; + char *status, *s; if (init) { while ((status = fgets(buf, BUF_SIZE, fp)) != NULL && @@ -111,6 +116,9 @@ if (buf[0] == '#') fatal("cannot happen"); strcpy(chr, get_col(buf, chr_col)); + if (nchr_name == 0) + chr_name[nchr_name++] = copy_string(chr); + S[0].pos = atoi(get_col(buf, pos_col)); if (S[0].pos < 0) fatalf("remove unmapped SNPs (address = -1)"); @@ -120,13 +128,24 @@ buf[0] = '\0'; return 1; } - if (!same_string(chr, get_col(buf, chr_col))) + if (!same_string(chr, s = get_col(buf, chr_col))) break; - S[nS].pos = atoi(get_col(buf, pos_col)); + S[nS].pos = p = atoi(get_col(buf, pos_col)); + if (p <= old_pos) + fatalf("SNV at %s %d is out of order", chr, p); + old_pos = p; if (S[nS].pos < 0) fatalf("remove unmapped SNPs (address = -1)"); S[nS].x = atof(get_col(buf, score_col)); } + if (nchr_name >= MAX_CHR_NAME) + fatal("Too many chromosome names"); + for (i = 0; i < nchr_name && !same_string(s, chr_name[i]); ++i) + ; + if (i < nchr_name) + fatalf("SNVs on %s aren't together", s); + chr_name[nchr_name++] = copy_string(s); + return 1; }