Mercurial > repos > clustalomega > clustalomega
annotate clustalomega/clustal-omega-0.2.0/src/squid/weight.c @ 0:ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
author | clustalomega |
---|---|
date | Tue, 07 Jun 2011 17:04:25 -0400 |
parents | |
children |
rev | line source |
---|---|
0
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1 /***************************************************************** |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
2 * SQUID - a library of functions for biological sequence analysis |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
3 * Copyright (C) 1992-2002 Washington University School of Medicine |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
4 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
5 * This source code is freely distributed under the terms of the |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
6 * GNU General Public License. See the files COPYRIGHT and LICENSE |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
7 * for details. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
8 *****************************************************************/ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
9 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
10 /* weight.c |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
11 * SRE, Thu Mar 3 07:56:01 1994 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
12 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
13 * Calculate weights for sequences in an alignment. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
14 * RCS $Id: weight.c 217 2011-03-19 10:27:10Z andreas $ (Original squid RCS Id: weight.c,v 1.9 2002/10/09 14:26:09 eddy Exp) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
15 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
16 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
17 #include <ctype.h> |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
18 #include <string.h> |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
19 #include "squid.h" |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
20 #include "sre_random.h" |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
21 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
22 static void upweight(struct phylo_s *tree, int nseq, float *lwt, float *rwt, int node); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
23 static void downweight(struct phylo_s *tree, int nseq, float *lwt, float *rwt, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
24 float *fwt, int node); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
25 static float simple_distance(char *s1, char *s2); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
26 static int simple_diffmx(char **aseqs,int num, float ***ret_dmx); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
27 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
28 /* Function: GSCWeights() |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
29 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
30 * Purpose: Use Erik's tree-based algorithm to set weights for |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
31 * sequences in an alignment. upweight() and downweight() |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
32 * are derived from Graeme Mitchison's code. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
33 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
34 * Args: aseq - array of (0..nseq-1) aligned sequences |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
35 * nseq - number of seqs in alignment |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
36 * alen - length of alignment |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
37 * wgt - allocated [0..nseq-1] array of weights to be returned |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
38 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
39 * Return: (void) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
40 * wgt is filled in. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
41 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
42 void |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
43 GSCWeights(char **aseq, int nseq, int alen, float *wgt) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
44 { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
45 float **dmx; /* distance (difference) matrix */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
46 struct phylo_s *tree; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
47 float *lwt, *rwt; /* weight on left, right of this tree node */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
48 float *fwt; /* final weight assigned to this node */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
49 int i; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
50 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
51 /* Sanity check first |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
52 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
53 if (nseq == 1) { wgt[0] = 1.0; return; } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
54 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
55 /* I use a simple fractional difference matrix derived by |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
56 * pairwise identity. Perhaps I should include a Poisson |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
57 * distance correction. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
58 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
59 MakeDiffMx(aseq, nseq, &dmx); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
60 if (! Cluster(dmx, nseq, CLUSTER_MIN, &tree)) Die("Cluster() failed"); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
61 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
62 /* Allocations |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
63 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
64 lwt = MallocOrDie (sizeof(float) * (2 * nseq - 1)); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
65 rwt = MallocOrDie (sizeof(float) * (2 * nseq - 1)); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
66 fwt = MallocOrDie (sizeof(float) * (2 * nseq - 1)); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
67 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
68 /* lwt and rwt are the total branch weight to the left and |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
69 * right of a node or sequence. They are 0..2N-2. 0..N-1 are |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
70 * the sequences; these have weight 0. N..2N-2 are the actual |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
71 * tree nodes. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
72 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
73 for (i = 0; i < nseq; i++) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
74 lwt[i] = rwt[i] = 0.0; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
75 /* recursively calculate rwt, lwt, starting |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
76 at node nseq (the root) */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
77 upweight(tree, nseq, lwt, rwt, nseq); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
78 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
79 /* recursively distribute weight across the |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
80 tree */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
81 fwt[nseq] = nseq; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
82 downweight(tree, nseq, lwt, rwt, fwt, nseq); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
83 /* collect the weights */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
84 for (i = 0; i < nseq; i++) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
85 wgt[i] = fwt[i]; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
86 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
87 FMX2Free(dmx); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
88 FreePhylo(tree, nseq); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
89 free(lwt); free(rwt); free(fwt); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
90 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
91 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
92 static void |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
93 upweight(struct phylo_s *tree, int nseq, float *lwt, float *rwt, int node) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
94 { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
95 int ld,rd; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
96 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
97 ld = tree[node-nseq].left; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
98 if (ld >= nseq) upweight(tree, nseq, lwt, rwt, ld); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
99 rd = tree[node-nseq].right; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
100 if (rd >= nseq) upweight(tree, nseq, lwt, rwt, rd); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
101 lwt[node] = lwt[ld] + rwt[ld] + tree[node-nseq].lblen; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
102 rwt[node] = lwt[rd] + rwt[rd] + tree[node-nseq].rblen; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
103 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
104 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
105 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
106 static void |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
107 downweight(struct phylo_s *tree, int nseq, float *lwt, float *rwt, float *fwt, int node) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
108 { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
109 int ld,rd; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
110 float lnum, rnum; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
111 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
112 ld = tree[node-nseq].left; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
113 rd = tree[node-nseq].right; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
114 if (lwt[node] + rwt[node] > 0.0) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
115 { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
116 fwt[ld] = fwt[node] * (lwt[node] / (lwt[node] + rwt[node])); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
117 fwt[rd] = fwt[node] * (rwt[node] / (lwt[node] + rwt[node])); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
118 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
119 else |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
120 { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
121 lnum = (ld >= nseq) ? tree[ld-nseq].incnum : 1.0; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
122 rnum = (rd >= nseq) ? tree[rd-nseq].incnum : 1.0; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
123 fwt[ld] = fwt[node] * lnum / (lnum + rnum); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
124 fwt[rd] = fwt[node] * rnum / (lnum + rnum); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
125 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
126 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
127 if (ld >= nseq) downweight(tree, nseq, lwt, rwt, fwt, ld); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
128 if (rd >= nseq) downweight(tree, nseq, lwt, rwt, fwt, rd); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
129 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
130 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
131 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
132 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
133 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
134 /* Function: VoronoiWeights() |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
135 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
136 * Purpose: Calculate weights using the scheme of Sibbald & |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
137 * Argos (JMB 216:813-818 1990). The scheme is |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
138 * slightly modified because the original algorithm |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
139 * actually doesn't work on gapped alignments. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
140 * The sequences are assumed to be protein. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
141 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
142 * Args: aseq - array of (0..nseq-1) aligned sequences |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
143 * nseq - number of sequences |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
144 * alen - length of alignment |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
145 * wgt - allocated [0..nseq-1] array of weights to be returned |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
146 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
147 * Return: void |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
148 * wgt is filled in. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
149 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
150 void |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
151 VoronoiWeights(char **aseq, int nseq, int alen, float *wgt) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
152 { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
153 float **dmx; /* distance (difference) matrix */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
154 float *halfmin; /* 1/2 minimum distance to other seqs */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
155 char **psym; /* symbols seen in each column */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
156 int *nsym; /* # syms seen in each column */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
157 int symseen[27]; /* flags for observed syms */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
158 char *randseq; /* randomly generated sequence */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
159 int acol; /* pos in aligned columns */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
160 int idx; /* index in sequences */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
161 int symidx; /* 0..25 index for symbol */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
162 int i; /* generic counter */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
163 float min; /* minimum distance */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
164 float dist; /* distance between random and real */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
165 float challenge, champion; /* for resolving ties */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
166 int itscale; /* how many iterations per seq */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
167 int iteration; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
168 int best; /* index of nearest real sequence */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
169 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
170 /* Sanity check first |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
171 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
172 if (nseq == 1) { wgt[0] = 1.0; return; } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
173 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
174 itscale = 50; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
175 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
176 /* Precalculate 1/2 minimum distance to other |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
177 * sequences for each sequence |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
178 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
179 if (! simple_diffmx(aseq, nseq, &dmx)) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
180 Die("simple_diffmx() failed"); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
181 halfmin = MallocOrDie (sizeof(float) * nseq); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
182 for (idx = 0; idx < nseq; idx++) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
183 { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
184 for (min = 1.0, i = 0; i < nseq; i++) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
185 { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
186 if (i == idx) continue; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
187 if (dmx[idx][i] < min) min = dmx[idx][i]; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
188 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
189 halfmin[idx] = min / 2.0; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
190 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
191 Free2DArray((void **) dmx, nseq); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
192 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
193 /* Set up the random sequence generating model. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
194 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
195 psym = MallocOrDie (alen * sizeof(char *)); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
196 nsym = MallocOrDie (alen * sizeof(int)); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
197 for (acol = 0; acol < alen; acol++) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
198 psym[acol] = MallocOrDie (27 * sizeof(char)); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
199 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
200 /* #ifdef ORIGINAL_SIBBALD_ALGORITHM_IS_BROKEN */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
201 for (acol = 0; acol < alen; acol++) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
202 { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
203 memset(symseen, 0, sizeof(int) * 27); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
204 for (idx = 0; idx < nseq; idx++) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
205 if (! isgap(aseq[idx][acol])) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
206 { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
207 if (isupper((int) aseq[idx][acol])) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
208 symidx = aseq[idx][acol] - 'A'; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
209 else |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
210 symidx = aseq[idx][acol] - 'a'; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
211 if (symidx >= 0 && symidx < 26) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
212 symseen[symidx] = 1; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
213 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
214 else |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
215 symseen[26] = 1; /* a gap */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
216 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
217 for (nsym[acol] = 0, i = 0; i < 26; i++) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
218 if (symseen[i]) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
219 { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
220 psym[acol][nsym[acol]] = 'A'+i; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
221 nsym[acol]++; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
222 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
223 if (symseen[26]) { psym[acol][nsym[acol]] = ' '; nsym[acol]++; } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
224 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
225 /* #endif ORIGINAL_SIBBALD_ALGORITHM_IS_BROKEN */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
226 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
227 /* Note: the original Sibbald&Argos algorithm calls for |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
228 * bounding the sampled space using a template-like random |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
229 * sequence generator. However, this leads to one minor |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
230 * and one major problem. The minor problem is that |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
231 * exceptional amino acids in a column can have a |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
232 * significant effect by altering the amount of sampled |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
233 * sequence space; the larger the data set, the worse |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
234 * this problem becomes. The major problem is that |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
235 * there is no reasonable way to deal with gaps. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
236 * Gapped sequences simply inhabit a different dimensionality |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
237 * and it's pretty painful to imagine calculating Voronoi |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
238 * volumes when the N in your N-space is varying. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
239 * Note that all the examples shown by Sibbald and Argos |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
240 * are *ungapped* examples. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
241 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
242 * The best way I've found to circumvent this problem is |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
243 * just not to bound the sampled space; count gaps as |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
244 * symbols and generate completely random sequences. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
245 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
246 #ifdef ALL_SEQUENCE_SPACE |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
247 for (acol = 0; acol < alen; acol++) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
248 { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
249 strcpy(psym[acol], "ACDEFGHIKLMNPQRSTVWY "); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
250 nsym[acol] = 21; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
251 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
252 #endif |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
253 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
254 /* Sibbald and Argos algorithm: |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
255 * 1) assign all seqs weight 0. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
256 * 2) generate a "random" sequence |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
257 * 3) calculate distance to every other sequence |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
258 * (if we get a distance < 1/2 minimum distance |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
259 * to other real seqs, we can stop) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
260 * 4) if unique closest sequence, increment its weight 1. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
261 * if multiple closest seq, choose one randomly |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
262 * 5) repeat 2-4 for lots of iterations |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
263 * 6) normalize all weights to sum to nseq. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
264 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
265 randseq = MallocOrDie ((alen+1) * sizeof(char)); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
266 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
267 best = 42.; /* solely to silence GCC uninit warnings. */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
268 FSet(wgt, nseq, 0.0); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
269 for (iteration = 0; iteration < itscale * nseq; iteration++) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
270 { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
271 for (acol = 0; acol < alen; acol++) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
272 randseq[acol] = (nsym[acol] == 0) ? ' ' : psym[acol][CHOOSE(nsym[acol])]; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
273 randseq[acol] = '\0'; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
274 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
275 champion = sre_random(); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
276 for (min = 1.0, idx = 0; idx < nseq; idx++) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
277 { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
278 dist = simple_distance(aseq[idx], randseq); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
279 if (dist < halfmin[idx]) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
280 { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
281 best = idx; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
282 break; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
283 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
284 if (dist < min) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
285 { champion = sre_random(); best = idx; min = dist; } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
286 else if (dist == min) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
287 { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
288 challenge = sre_random(); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
289 if (challenge > champion) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
290 { champion = challenge; best = idx; min = dist; } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
291 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
292 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
293 wgt[best] += 1.0; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
294 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
295 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
296 for (idx = 0; idx < nseq; idx++) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
297 wgt[idx] = wgt[idx] / (float) itscale; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
298 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
299 free(randseq); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
300 free(nsym); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
301 free(halfmin); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
302 Free2DArray((void **) psym, alen); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
303 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
304 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
305 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
306 /* Function: simple_distance() |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
307 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
308 * Purpose: For two identical-length null-terminated strings, return |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
309 * the fractional difference between them. (0..1) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
310 * (Gaps don't count toward anything.) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
311 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
312 static float |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
313 simple_distance(char *s1, char *s2) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
314 { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
315 int diff = 0; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
316 int valid = 0; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
317 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
318 for (; *s1 != '\0'; s1++, s2++) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
319 { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
320 if (isgap(*s1) || isgap(*s2)) continue; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
321 if (*s1 != *s2) diff++; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
322 valid++; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
323 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
324 return (valid > 0 ? ((float) diff / (float) valid) : 0.0); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
325 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
326 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
327 /* Function: simple_diffmx() |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
328 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
329 * Purpose: Given a set of flushed, aligned sequences, construct |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
330 * an NxN fractional difference matrix using the |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
331 * simple_distance rule. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
332 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
333 * Args: aseqs - flushed, aligned sequences |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
334 * num - number of aseqs |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
335 * ret_dmx - RETURN: difference matrix (caller must free) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
336 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
337 * Return: 1 on success, 0 on failure. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
338 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
339 static int |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
340 simple_diffmx(char **aseqs, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
341 int num, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
342 float ***ret_dmx) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
343 { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
344 float **dmx; /* RETURN: distance matrix */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
345 int i,j; /* counters over sequences */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
346 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
347 /* Allocate |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
348 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
349 if ((dmx = (float **) malloc (sizeof(float *) * num)) == NULL) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
350 Die("malloc failed"); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
351 for (i = 0; i < num; i++) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
352 if ((dmx[i] = (float *) malloc (sizeof(float) * num)) == NULL) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
353 Die("malloc failed"); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
354 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
355 /* Calculate distances, symmetric matrix |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
356 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
357 for (i = 0; i < num; i++) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
358 for (j = i; j < num; j++) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
359 dmx[i][j] = dmx[j][i] = simple_distance(aseqs[i], aseqs[j]); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
360 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
361 /* Return |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
362 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
363 *ret_dmx = dmx; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
364 return 1; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
365 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
366 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
367 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
368 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
369 /* Function: BlosumWeights() |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
370 * Date: SRE, Fri Jul 16 17:33:59 1999 (St. Louis) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
371 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
372 * Purpose: Assign weights to a set of aligned sequences |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
373 * using the BLOSUM rule: |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
374 * - do single linkage clustering at some pairwise identity |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
375 * - in each cluster, give each sequence 1/clustsize |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
376 * total weight. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
377 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
378 * The clusters have no pairwise link >= maxid. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
379 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
380 * O(N) in memory. Probably ~O(NlogN) in time; O(N^2) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
381 * in worst case, which is no links between sequences |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
382 * (e.g., values of maxid near 1.0). |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
383 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
384 * Args: aseqs - alignment |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
385 * nseq - number of seqs in alignment |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
386 * alen - # of columns in alignment |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
387 * maxid - fractional identity (e.g. 0.62 for BLOSUM62) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
388 * wgt - [0..nseq-1] array of weights to be returned |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
389 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
390 void |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
391 BlosumWeights(char **aseqs, int nseq, int alen, float maxid, float *wgt) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
392 { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
393 int *c, nc; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
394 int *nmem; /* number of seqs in each cluster */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
395 int i; /* loop counter */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
396 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
397 SingleLinkCluster(aseqs, nseq, alen, maxid, &c, &nc); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
398 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
399 FSet(wgt, nseq, 1.0); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
400 nmem = MallocOrDie(sizeof(int) * nc); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
401 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
402 for (i = 0; i < nc; i++) nmem[i] = 0; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
403 for (i = 0; i < nseq; i++) nmem[c[i]]++; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
404 for (i = 0; i < nseq; i++) wgt[i] = 1. / (float) nmem[c[i]]; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
405 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
406 free(nmem); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
407 free(c); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
408 return; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
409 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
410 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
411 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
412 /* Function: PositionBasedWeights() |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
413 * Date: SRE, Fri Jul 16 17:47:22 1999 [St. Louis] |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
414 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
415 * Purpose: Implementation of Henikoff and Henikoff position-based |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
416 * weights (JMB 243:574-578, 1994) [Henikoff94b]. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
417 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
418 * A significant advantage of this approach that Steve and Jorja |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
419 * don't point out is that it is O(N) in memory, unlike |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
420 * many other approaches like GSC weights or Voronoi. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
421 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
422 * A potential disadvantage that they don't point out |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
423 * is that in the theoretical limit of infinite sequences |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
424 * in the alignment, weights go flat: eventually every |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
425 * column has at least one representative of each of 20 aa (or 4 nt) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
426 * in it. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
427 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
428 * They also don't give a rule for how to handle gaps. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
429 * The rule used here seems the obvious and sensible one |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
430 * (ignore them). This means that longer sequences |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
431 * initially get more weight; hence a "double |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
432 * normalization" in which the weights are first divided |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
433 * by sequence length (to compensate for that effect), |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
434 * then normalized to sum to nseq. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
435 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
436 * Limitations: |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
437 * Implemented in a way that's alphabet-independent: |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
438 * it uses the 26 upper case letters as "residues". |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
439 * Any alphabetic character in aseq is interpreted as |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
440 * a unique "residue" (case insensitively; lower case |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
441 * mapped to upper case). All other characters are |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
442 * interpreted as gaps. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
443 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
444 * This way, we don't have to pass around any alphabet |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
445 * type info (DNA vs. RNA vs. protein) and don't have |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
446 * to deal with remapping IUPAC degenerate codes |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
447 * probabilistically. However, on the down side, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
448 * a sequence with a lot of degenerate IUPAC characters |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
449 * will get an artifactually high PB weight. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
450 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
451 * Args: aseq - sequence alignment to weight |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
452 * nseq - number of sequences in alignment |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
453 * alen - length of alignment |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
454 * wgt - RETURN: weights filled in (pre-allocated 0..nseq-1) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
455 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
456 * Returns: (void) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
457 * wgt is allocated (0..nseq-1) by caller, and filled in here. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
458 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
459 void |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
460 PositionBasedWeights(char **aseq, int nseq, int alen, float *wgt) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
461 { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
462 int rescount[26]; /* count of A-Z residues in a column */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
463 int nres; /* number of different residues in col */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
464 int idx, pos; /* indices into aseq */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
465 int x; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
466 float norm; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
467 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
468 FSet(wgt, nseq, 0.0); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
469 for (pos = 0; pos < alen; pos++) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
470 { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
471 for (x = 0; x < 26; x++) rescount[x] = 0; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
472 for (idx = 0; idx < nseq; idx++) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
473 if (isalpha(aseq[idx][pos])) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
474 rescount[toupper(aseq[idx][pos]) - 'A'] ++; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
475 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
476 nres = 0; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
477 for (x = 0; x < 26; x++) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
478 if (rescount[x] > 0) nres++; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
479 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
480 for (idx = 0; idx < nseq; idx++) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
481 if (isalpha(aseq[idx][pos])) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
482 wgt[idx] += 1. / (float) (nres * rescount[toupper(aseq[idx][pos]) - 'A']); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
483 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
484 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
485 for (idx = 0; idx < nseq; idx++) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
486 wgt[idx] /= (float) DealignedLength(aseq[idx]); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
487 norm = (float) nseq / FSum(wgt, nseq); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
488 FScale(wgt, nseq, norm); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
489 return; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
490 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
491 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
492 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
493 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
494 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
495 /* Function: FilterAlignment() |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
496 * Date: SRE, Wed Jun 30 09:19:30 1999 [St. Louis] |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
497 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
498 * Purpose: Constructs a new alignment by removing near-identical |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
499 * sequences from a given alignment (where identity is |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
500 * calculated *based on the alignment*). |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
501 * Does not affect the given alignment. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
502 * Keeps earlier sequence, discards later one. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
503 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
504 * Usually called as an ad hoc sequence "weighting" mechanism. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
505 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
506 * Limitations: |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
507 * Unparsed Stockholm markup is not propagated into the |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
508 * new alignment. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
509 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
510 * Args: msa -- original alignment |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
511 * cutoff -- fraction identity cutoff. 0.8 removes sequences > 80% id. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
512 * ret_new -- RETURN: new MSA, usually w/ fewer sequences |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
513 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
514 * Return: (void) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
515 * ret_new must be free'd by caller: MSAFree(). |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
516 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
517 void |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
518 FilterAlignment(MSA *msa, float cutoff, MSA **ret_new) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
519 { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
520 int nnew; /* number of seqs in new alignment */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
521 int *list; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
522 int *useme; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
523 float ident; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
524 int i,j; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
525 int remove; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
526 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
527 /* find which seqs to keep (list) */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
528 /* diff matrix; allow ragged ends */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
529 list = MallocOrDie (sizeof(int) * msa->nseq); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
530 useme = MallocOrDie (sizeof(int) * msa->nseq); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
531 for (i = 0; i < msa->nseq; i++) useme[i] = FALSE; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
532 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
533 nnew = 0; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
534 for (i = 0; i < msa->nseq; i++) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
535 { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
536 remove = FALSE; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
537 for (j = 0; j < nnew; j++) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
538 { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
539 ident = PairwiseIdentity(msa->aseq[i], msa->aseq[list[j]]); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
540 if (ident > cutoff) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
541 { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
542 remove = TRUE; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
543 printf("removing %12s -- fractional identity %.2f to %s\n", |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
544 msa->sqname[i], ident, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
545 msa->sqname[list[j]]); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
546 break; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
547 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
548 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
549 if (remove == FALSE) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
550 list[nnew++] = i; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
551 useme[i] = TRUE; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
552 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
553 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
554 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
555 MSASmallerAlignment(msa, useme, ret_new); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
556 free(list); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
557 free(useme); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
558 return; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
559 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
560 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
561 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
562 /* Function: SampleAlignment() |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
563 * Date: SRE, Wed Jun 30 10:13:56 1999 [St. Louis] |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
564 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
565 * Purpose: Constructs a new, smaller alignment by sampling a given |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
566 * number of sequences at random. Does not change the |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
567 * alignment nor the order of the sequences. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
568 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
569 * If you ask for a sample that is larger than nseqs, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
570 * it silently returns the original alignment. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
571 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
572 * Not really a weighting method, but this is as good |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
573 * a place as any to keep it, since it's similar in |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
574 * construction to FilterAlignment(). |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
575 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
576 * Args: msa -- original alignment |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
577 * sample -- number of sequences in new alignment (0 < sample <= nseq) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
578 * ret_new -- RETURN: new MSA |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
579 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
580 * Return: (void) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
581 * ret_new must be free'd by caller: MSAFree(). |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
582 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
583 void |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
584 SampleAlignment(MSA *msa, int sample, MSA **ret_new) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
585 { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
586 int *list; /* array for random selection w/o replace */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
587 int *useme; /* array of flags 0..nseq-1: TRUE to use */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
588 int i, idx; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
589 int len; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
590 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
591 /* Allocations |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
592 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
593 list = (int *) MallocOrDie (sizeof(int) * msa->nseq); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
594 useme = (int *) MallocOrDie (sizeof(int) * msa->nseq); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
595 for (i = 0; i < msa->nseq; i++) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
596 { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
597 list[i] = i; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
598 useme[i] = FALSE; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
599 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
600 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
601 /* Sanity check. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
602 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
603 if (sample >= msa->nseq) sample = msa->nseq; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
604 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
605 /* random selection w/o replacement */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
606 for (len = msa->nseq, i = 0; i < sample; i++) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
607 { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
608 idx = CHOOSE(len); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
609 printf("chose %d: %s\n", list[idx], msa->sqname[list[idx]]); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
610 useme[list[idx]] = TRUE; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
611 list[idx] = list[--len]; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
612 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
613 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
614 MSASmallerAlignment(msa, useme, ret_new); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
615 free(list); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
616 free(useme); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
617 return; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
618 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
619 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
620 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
621 /* Function: SingleLinkCluster() |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
622 * Date: SRE, Fri Jul 16 15:02:57 1999 [St. Louis] |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
623 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
624 * Purpose: Perform simple single link clustering of seqs in a |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
625 * sequence alignment. A pairwise identity threshold |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
626 * defines whether two sequences are linked or not. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
627 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
628 * Important: runs in O(N) memory, unlike standard |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
629 * graph decomposition algorithms that use O(N^2) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
630 * adjacency matrices or adjacency lists. Requires |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
631 * O(N^2) time in worst case (which is when you have |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
632 * no links at all), O(NlogN) in "average" |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
633 * case, and O(N) in best case (when there is just |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
634 * one cluster in a completely connected graph. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
635 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
636 * (Developed because hmmbuild could no longer deal |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
637 * with GP120, a 16,013 sequence alignment.) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
638 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
639 * Limitations: |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
640 * CASE-SENSITIVE. Assumes aseq have been put into |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
641 * either all lower or all upper case; or at least, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
642 * within a column, there's no mixed case. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
643 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
644 * Algorithm: |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
645 * I don't know if this algorithm is published. I |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
646 * haven't seen it in graph theory books, but that might |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
647 * be because it's so obvious that nobody's bothered. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
648 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
649 * In brief, we're going to do a breadth-first search |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
650 * of the graph, and we're going to calculate links |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
651 * on the fly rather than precalculating them into |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
652 * some sort of standard adjacency structure. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
653 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
654 * While working, we keep two stacks of maximum length N: |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
655 * a : list of vertices that are still unconnected. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
656 * b : list of vertices that we've connected to |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
657 * in our current breadth level, but we haven't |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
658 * yet tested for other connections to a. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
659 * The current length (number of elements in) a and b are |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
660 * kept in na, nb. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
661 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
662 * We store our results in an array of length N: |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
663 * c : assigns each vertex to a component. for example |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
664 * c[4] = 1 means that vertex 4 is in component 1. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
665 * nc is the number of components. Components |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
666 * are numbered from 0 to nc-1. We return c and nc |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
667 * to our caller. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
668 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
669 * The algorithm is: |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
670 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
671 * Initialisation: |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
672 * a <-- all the vertices |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
673 * na <-- N |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
674 * b <-- empty set |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
675 * nb <-- 0 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
676 * nc <-- 0 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
677 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
678 * Then: |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
679 * while (a is not empty) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
680 * pop a vertex off a, push onto b |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
681 * while (b is not empty) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
682 * pop vertex v off b |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
683 * assign c[v] = nc |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
684 * for each vertex w in a: |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
685 * compare v,w. If w is linked to v, remove w |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
686 * from a, push onto b. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
687 * nc++ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
688 * q.e.d. :) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
689 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
690 * Args: aseq - aligned sequences |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
691 * nseq - number of sequences in aseq |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
692 * alen - alignment length |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
693 * maxid - fractional identity threshold 0..1. if id >= maxid, seqs linked |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
694 * ret_c - RETURN: 0..nseq-1 assignments of seqs to components (clusters) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
695 * ret_nc - RETURN: number of components |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
696 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
697 * Returns: void. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
698 * ret_c is allocated here. Caller free's with free(*ret_c) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
699 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
700 void |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
701 SingleLinkCluster(char **aseq, int nseq, int alen, float maxid, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
702 int **ret_c, int *ret_nc) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
703 { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
704 int *a, na; /* stack of available vertices */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
705 int *b, nb; /* stack of working vertices */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
706 int *c; /* array of results */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
707 int nc; /* total number of components */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
708 int v,w; /* index of a working vertices */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
709 int i; /* loop counter */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
710 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
711 /* allocations and initializations |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
712 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
713 a = MallocOrDie (sizeof(int) * nseq); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
714 b = MallocOrDie (sizeof(int) * nseq); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
715 c = MallocOrDie (sizeof(int) * nseq); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
716 for (i = 0; i < nseq; i++) a[i] = i; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
717 na = nseq; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
718 nb = 0; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
719 nc = 0; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
720 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
721 /* Main algorithm |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
722 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
723 while (na > 0) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
724 { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
725 v = a[na-1]; na--; /* pop a vertex off a, */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
726 b[nb] = v; nb++; /* and push onto b */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
727 while (nb > 0) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
728 { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
729 v = b[nb-1]; nb--; /* pop vertex off b */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
730 c[v] = nc; /* assign it to component nc */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
731 for (i = na-1; i >= 0; i--)/* backwards, becase of deletion/swapping we do*/ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
732 if (simple_distance(aseq[v], aseq[a[i]]) <= 1. - maxid) /* linked? */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
733 { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
734 w = a[i]; a[i] = a[na-1]; na--; /* delete w from a (note swap) */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
735 b[nb] = w; nb++; /* push w onto b */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
736 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
737 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
738 nc++; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
739 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
740 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
741 /* Cleanup and return |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
742 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
743 free(a); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
744 free(b); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
745 *ret_c = c; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
746 *ret_nc = nc; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
747 return; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
748 } |