Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master'
Browse files Browse the repository at this point in the history
# Conflicts:
#	.idea/codeStyleSettings.xml
  • Loading branch information
lmcinnes committed Jan 5, 2017
2 parents 1d2a9e0 + f47af2f commit eb1993b
Show file tree
Hide file tree
Showing 13 changed files with 18,786 additions and 16,529 deletions.
6 changes: 1 addition & 5 deletions .idea/codeStyleSettings.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion .idea/hdbscan.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions hdbscan/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
from .hdbscan_ import HDBSCAN, hdbscan
from .robust_single_linkage_ import RobustSingleLinkage, robust_single_linkage
from .validity import validity_index

13,559 changes: 7,175 additions & 6,384 deletions hdbscan/_hdbscan_boruvka.c

Large diffs are not rendered by default.

6,954 changes: 3,625 additions & 3,329 deletions hdbscan/_hdbscan_linkage.c

Large diffs are not rendered by default.

1,554 changes: 781 additions & 773 deletions hdbscan/_hdbscan_reachability.c

Large diffs are not rendered by default.

9,844 changes: 5,294 additions & 4,550 deletions hdbscan/_hdbscan_tree.c

Large diffs are not rendered by default.

27 changes: 20 additions & 7 deletions hdbscan/_hdbscan_tree.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -373,12 +373,14 @@ cdef np.ndarray[np.intp_t, ndim=1] do_labelling(
np.ndarray tree,
set clusters,
dict cluster_label_map,
np.intp_t allow_single_cluster):
np.intp_t allow_single_cluster,
np.intp_t match_reference_implementation):

cdef np.intp_t root_cluster
cdef np.ndarray[np.intp_t, ndim=1] result_arr
cdef np.ndarray[np.intp_t, ndim=1] parent_array
cdef np.ndarray[np.intp_t, ndim=1] child_array
cdef np.ndarray[np.double_t, ndim=1] lambda_array
cdef np.intp_t *result
cdef TreeUnionFind union_find
cdef np.intp_t parent
Expand All @@ -388,6 +390,7 @@ cdef np.ndarray[np.intp_t, ndim=1] do_labelling(

child_array = tree['child']
parent_array = tree['parent']
lambda_array = tree['lambda_val']

root_cluster = parent_array.min()
result_arr = np.empty(root_cluster, dtype=np.intp)
Expand All @@ -406,14 +409,22 @@ cdef np.ndarray[np.intp_t, ndim=1] do_labelling(
if cluster < root_cluster:
result[n] = -1
elif cluster == root_cluster:
if len(clusters) == 1 and \
if len(clusters) == 1 and allow_single_cluster and \
tree['lambda_val'][tree['child'] == n] >= \
tree['lambda_val'][tree['parent'] == cluster].max():
result[n] = cluster_label_map[cluster]
else:
result[n] = -1
else:
result[n] = cluster_label_map[cluster]
if match_reference_implementation:
point_lambda = lambda_array[child_array == n][0]
cluster_lambda = lambda_array[child_array == cluster][0]
if point_lambda > cluster_lambda:
result[n] = cluster_label_map[cluster]
else:
result[n] = -1
else:
result[n] = cluster_label_map[cluster]

return result_arr

Expand Down Expand Up @@ -525,7 +536,8 @@ cpdef np.ndarray get_stability_scores(np.ndarray labels, set clusters,


cpdef tuple get_clusters(np.ndarray tree, dict stability,
allow_single_cluster=False):
allow_single_cluster=False,
match_reference_implementation=False):
"""
The tree is assumed to have numeric node ids such that a reverse numeric
sort is equivalent to a topological sort.
Expand Down Expand Up @@ -571,10 +583,11 @@ cpdef tuple get_clusters(np.ndarray tree, dict stability,
is_cluster[sub_node] = False

clusters = set([c for c in is_cluster if is_cluster[c]])
cluster_map = {c: n for n, c in enumerate(clusters)}
reverse_cluster_map = {n: c for n, c in enumerate(clusters)}
cluster_map = {c: n for n, c in enumerate(sorted(list(clusters)))}
reverse_cluster_map = {n: c for c, n in cluster_map.items()}

labels = do_labelling(tree, clusters, cluster_map, allow_single_cluster)
labels = do_labelling(tree, clusters, cluster_map,
allow_single_cluster, match_reference_implementation)
probs = get_probabilities(tree, reverse_cluster_map, labels)
stabilities = get_stability_scores(labels, clusters, stability, max_lambda)

Expand Down
Loading

0 comments on commit eb1993b

Please sign in to comment.