forked from mothur/mothur
-
Notifications
You must be signed in to change notification settings - Fork 0
/
clearcut.h
285 lines (221 loc) · 7.37 KB
/
clearcut.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
/*
* clearcut.h
*
* $Id$
*
*****************************************************************************
*
* Copyright (c) 2004, Luke Sheneman
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* + Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* + Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* + The names of its contributors may not be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*****************************************************************************
*
* AUTHOR:
*
* Luke Sheneman
*
*/
#ifndef _INC_CLEARCUT_H_
#define _INC_CLEARCUT_H_ 1
extern "C" {
#include "common.h"
#include "cmdargs.h"
#define NJ_VERSION "1.0.9"
#define NJ_INTERNAL_NODE -1
#define NJ_LAST 101
#define NJ_INPUT_MODE_UNKNOWN 0
#define NJ_INPUT_MODE_DISTANCE 100
#define NJ_INPUT_MODE_UNALIGNED_SEQUENCES 101
#define NJ_INPUT_MODE_ALIGNED_SEQUENCES 102
#define NJ_MODEL_NONE 100
#define NJ_MODEL_JUKES 101
#define NJ_MODEL_KIMURA 102
/*
* DMAT - Distance Matrix
*
* This is arguably the most important structure in the
* program. This is the distance matrix, and it is used
* by many functions throughout the application.
*
* The matrix is architected as a contiguously allocated
* upper-diagonal matrix of floats which include the
* diagonal.
*
* Example:
*
* 0 1 2 3 4 5
* 0 0.0 1.0 0.3 0.2 0.1 0.3
* 1 0.0 0.3 0.2 0.1 0.8
* 2 0.0 0.1 0.3 0.5
* 3 0.0 0.2 0.1
* 4 0.0 0.2
* 5 0.0
*
* The distance matrix shrinks with every join operation,
* so I track the original and working size of the matrix
* inside the matrix.
*
* One fast optimization to shrink the distance matrix
* involves incrementing the "val" pointer. Thus, in
* addition to tracking the pointer to the distances,
* I also track the original pointer to that I can
* free the memory associated with the working distance
* matrix.
*
* This also applies to the r and r2 vectors which are
* used to compute the transformed distances in the
* matrix.
*
*/
typedef struct _STRUCT_DMAT {
long int ntaxa; /* the original size of the distance matrix */
long int size; /* the current/effective size of the distance matrix */
char **taxaname; /* a pointer to an array of taxa name strings */
float *val; /* the distances */
float *valhandle; /* to track the orig. pointer to free memory */
float *r, *r2; /* r and r2 vectors (used to compute transformed dists) */
float *rhandle, *r2handle; /* track orig. pointers to free memory */
} DMAT;
/*
* NJ_TREE - The Tree Data Structure
*
*
* The tree is represented internally as a rooted
* binary tree. Each internal node has a left and a right child.
*
* Additionally, I track the distance between the current node
* and that node's parent (i.e. the branch length).
*
* Finally, I track the index of the taxa for leaf nodes.
*
*/
typedef struct _STRUCT_NJ_TREE {
struct _STRUCT_NJ_TREE *left; /* left child */
struct _STRUCT_NJ_TREE *right; /* right child */
float dist; /* branch length. i.e. dist from node to parent */
long int taxa_index; /* for terminal nodes, track the taxon index */
} NJ_TREE;
/*
* NJ_VERTEX
*
* This structure is used for building trees. It is a vector
* which, represents the center of the star when building the RNJ/NJ
* tree through star-decomposition.
*
* It contains a vector of tree (node) pointers. These pointers
* get joined together by a new internal node, and the new internal
* node is placed back into the vector of nodes (which is now smaller).
*
* To keep this vector in sync. with the shrinking matrix, parts of
* the vector are shuffled around, and so a pointer to the originally
* allocated vector is stored such that it can be freed from memory
* later.
*
* The original and working sizes of the vector are also tracked.
*
*/
typedef struct _STRUCT_NJ_VERTEX {
NJ_TREE **nodes;
NJ_TREE **nodes_handle; /* original memory handle for freeing */
long int nactive; /* number of active nodes in the list */
long int size; /* the total size of the vertex */
} NJ_VERTEX;
/* some function prototypes */
int clearcut_main(int, char**);
/* core function for performing Relaxed Neighbor Joining */
NJ_TREE *
NJ_relaxed_nj(NJ_ARGS *nj_args, DMAT *dmat);
/* function for performing traditional Neighbor-Joining */
NJ_TREE *
NJ_neighbor_joining(NJ_ARGS *nj_args, DMAT *dmat);
/* print the distance matrix (for debugging) */
void
NJ_print_distance_matrix(DMAT *dmat);
/* output the computed tree to stdout or to the specified file */
void
NJ_output_tree(NJ_ARGS *nj_args,
NJ_TREE *tree,
DMAT *dmat,
long int count);
/* the recursive function for outputting trees */
void
NJ_output_tree2(FILE *fp,
NJ_ARGS *nj_args,
NJ_TREE *tree,
NJ_TREE *root,
DMAT *dmat);
/* initialize vertex */
NJ_VERTEX *
NJ_init_vertex(DMAT *dmat);
/* used to decompose the star topology and build the tree */
NJ_TREE *
NJ_decompose(DMAT *dmat,
NJ_VERTEX *vertex,
long int x,
long int y,
int last_flag);
/* print the vertex vector (for debugging) */
void
NJ_print_vertex(NJ_VERTEX *vertex);
/* print taxa names (for debugging) */
void
NJ_print_taxanames(DMAT *dmat);
/* initialize r-vector prior to RNJ/NJ */
void
NJ_init_r(DMAT *dmat);
/* print the r-vector (for debugging) */
void
NJ_print_r(DMAT *dmat);
/* shuffle the distance matrix, usually after reading in input */
void
NJ_shuffle_distance_matrix(DMAT *dmat);
/* free memory from the tree */
void
NJ_free_tree(NJ_TREE *node);
/* print permutations (for debugging) */
void
NJ_print_permutation(long int *perm,
long int size);
/* duplicate a distance matrix for multiple iterations */
DMAT *
NJ_dup_dmat(DMAT *src);
/* free the distance matrix */
void
NJ_free_dmat(DMAT *dmat);
/* free the vertex vector */
void
NJ_free_vertex(NJ_VERTEX *vertex);
/* for computing the global minimum transformed distance in traditional NJ */
float
NJ_min_transform(DMAT *dmat,
long int *ret_i,
long int *ret_j);
}
#endif /* _INC_CLEARCUT_H_ */