-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcladogram.ml
145 lines (126 loc) · 4.16 KB
/
cladogram.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
(* virt-similarity
* Copyright (C) 2013 Red Hat Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*)
open Printf
open Utils
type t =
| Leaf of int (* A single disk image (index of). *)
| Node of t list * int (* An interior node in the tree. *)
let rec images_in_subtree = function
| Leaf i -> [i]
| Node (xs, _) -> List.concat (List.map images_in_subtree xs)
let max_list = List.fold_left max min_int
let mapi f xs =
let rec loop i = function
| [] -> []
| x :: xs -> let r = f i x in r :: loop (i+1) xs
in
loop 0 xs
(* Compute the minimum distance between subtrees t1 and t2. 'matrix'
* is the distance matrix between leaves.
*)
let min_distance_between_subtrees matrix t1 t2 =
let min_distance = ref max_int in
let xs = images_in_subtree t1 in
let ys = images_in_subtree t2 in
List.iter (
fun (i, j) ->
let d = matrix.(i).(j) in
if d < !min_distance then min_distance := d
) (pairs xs ys);
!min_distance
(* Find the closest subtrees and combine them. *)
let combine_closest_subtrees matrix trees =
let trees = Array.of_list trees in
let n = Array.length trees in
(* Find the minimum distance between any two subtrees. *)
let min_distance = ref max_int in
List.iter (
fun (i, j) ->
let d = min_distance_between_subtrees matrix trees.(i) trees.(j) in
if d < !min_distance then min_distance := d
) (pairs_of_ints n);
let min_distance = !min_distance in
(* For each subtree that differs from another by exactly the
* minimum distance, group them together into a single subtree.
*)
let in_group = Array.make n false in
List.iter (
fun (i, j) ->
let d = min_distance_between_subtrees matrix trees.(i) trees.(j) in
if d = min_distance then (
in_group.(i) <- true;
in_group.(j) <- true
)
) (pairs_of_ints n);
let group = ref [] and rest = ref [] in
Array.iteri (
fun i in_group ->
if in_group then
group := trees.(i) :: !group
else
rest := trees.(i) :: !rest
) in_group;
!rest @ [Node (!group, min_distance)]
let construct_cladogram matrix n =
(* At the bottom level, every disk image is in its own leaf. *)
let leaves =
let rec loop i = if i < n then Leaf i :: loop (i+1) else [] in
loop 0 in
(* Work up the tree, combining subtrees together, until we end
* up with one tree (ie. the top node of the final tree).
*)
let rec loop trees =
match trees with
| [] -> assert false
| [x] -> x (* finished *)
| xs -> loop (combine_closest_subtrees matrix xs)
in
loop leaves
let format_cladogram ?format_leaf t =
let format_leaf = match format_leaf with
| None -> string_of_int
| Some f -> f
in
let rec format = function
| Leaf i ->
let s = "--- " ^ format_leaf i in
[s; ""], String.length s
| Node (xs, _) ->
let xs = List.map format xs in
let n = List.length xs in
let w = 7 + max_list (List.map snd xs) in
let xs = mapi (
fun row (ss, _) ->
let s, ss = match ss with
| s :: ss -> s, ss
| [] -> assert false in
if row = 0 then (
("---+---" ^ s) ::
List.map (fun s -> " | " ^ s) ss
) else if row < n-1 then (
(" +---" ^ s) ::
List.map (fun s -> " | " ^ s) ss
) else (
(" +---" ^ s) ::
List.map (fun s -> " " ^ s) ss
)
) xs in
List.concat xs, w
in
let strs, _ = format t in
strs