diff --git a/README.md b/README.md index db1e1b6..e7e26b9 100644 --- a/README.md +++ b/README.md @@ -4,4 +4,8 @@ Hierarchical Navigable Small World - demonstration of concept implementation in Implementation mainly referenced the paper [Efficient and robust approximate nearest neighbor search using Hierarchical Navigable Small World graphs](https://arxiv.org/abs/1603.09320) however I made simplifications. -I don't care about performance. That said, we can still compare the relative running time? \ No newline at end of file +I don't care about performance. That said, we can still compare the relative running time? + +## Update: + +Fixed code to avoid using greedy routing. Previous results are in result-greedy.csv. We have not yet seen a difference in results, but the logic is different. \ No newline at end of file diff --git a/hnsw.py b/hnsw.py index af030ea..62c0edc 100644 --- a/hnsw.py +++ b/hnsw.py @@ -77,10 +77,10 @@ class HNSW: self.level_count[l] += 1 elem = HNSWTower(coord, l) L = self.ep.level - for level in range(L, l, -1): - ep = self.route_layer(coord, ep, level) # enter points (multiple, sorted by distance from near to far) eps = [(distance(coord, ep.coord), ep)] + for level in range(L, l, -1): + eps = self.search_layer(coord, eps, 1, level) for level in range(min(l,L), -1, -1): W = self.search_layer(coord, eps, self.ef_construction, level) neighbors = [entry[1] for entry in sorted(W)[:self.M]] @@ -94,7 +94,7 @@ class HNSW: if l > L: self.ep = elem def route_layer(self, coord, ep, level): - # returns node that is closes to coord at level, starting from "ep" + # greedy routing starting at "ep". This method shouldn't be used. while True: best = None min_d = distance(coord, ep.coord) @@ -142,8 +142,8 @@ class HNSW: def k_nn_search(self, coord, K, ef): ep = self.ep L = ep.level - for level in range(L, 0, -1): - ep = self.route_layer(coord, ep, level) eps = [(distance(coord, ep.coord), ep)] + for level in range(L, 0, -1): + eps = self.search_layer(coord, eps, 1, level) W = self.search_layer(coord, eps, ef, 0) return sorted(W)[:K] diff --git a/result-greedy.csv b/result-greedy.csv new file mode 100644 index 0000000..0f07d1e --- /dev/null +++ b/result-greedy.csv @@ -0,0 +1,21 @@ +N_corpus,Dim,M,ef_construction,build_time,K,ef,recall,query_ms,N_query,seed +1000,10,10,50,1.2735,5,10,0.8280,0.4636,100,43 +1000,20,10,50,1.3208,5,10,0.6200,0.5730,100,43 +1000,30,10,50,1.2967,5,10,0.6540,0.5083,100,43 +1000,40,10,50,1.3569,5,10,0.6140,0.4980,100,43 +1000,50,10,50,1.3414,5,10,0.5340,0.5513,100,43 +1000,70,10,50,1.3196,5,10,0.5560,0.5107,100,43 +1000,100,10,50,1.3419,5,10,0.5160,0.5179,100,43 +1000,100,12,50,1.6164,5,10,0.5760,0.5652,100,43 +1000,100,14,50,1.8366,5,10,0.6000,0.6434,100,43 +1000,100,16,50,2.1186,5,10,0.6420,0.6895,100,43 +1000,100,18,50,2.4118,5,10,0.6880,0.7652,100,43 +1000,100,20,50,2.7659,5,10,0.6940,0.8060,100,43 +2000,100,20,50,6.2879,5,10,0.5740,0.9183,100,43 +3000,100,20,50,10.1791,5,10,0.5280,1.0457,100,43 +4000,100,20,50,14.1797,5,10,0.4720,1.1173,100,43 +5000,100,20,50,18.9001,5,10,0.4240,1.1408,100,43 +5000,100,10,50,9.2855,5,10,0.3220,0.6739,100,43 +5000,100,10,100,13.6119,5,10,0.3160,0.6867,100,43 +5000,100,10,20,6.0684,5,10,0.3080,0.6659,100,43 +5000,100,10,10,4.9333,5,10,0.2520,0.6278,100,43 diff --git a/result.csv b/result.csv index 0f07d1e..40ad6f8 100644 --- a/result.csv +++ b/result.csv @@ -1,21 +1,2 @@ N_corpus,Dim,M,ef_construction,build_time,K,ef,recall,query_ms,N_query,seed -1000,10,10,50,1.2735,5,10,0.8280,0.4636,100,43 -1000,20,10,50,1.3208,5,10,0.6200,0.5730,100,43 -1000,30,10,50,1.2967,5,10,0.6540,0.5083,100,43 -1000,40,10,50,1.3569,5,10,0.6140,0.4980,100,43 -1000,50,10,50,1.3414,5,10,0.5340,0.5513,100,43 -1000,70,10,50,1.3196,5,10,0.5560,0.5107,100,43 -1000,100,10,50,1.3419,5,10,0.5160,0.5179,100,43 -1000,100,12,50,1.6164,5,10,0.5760,0.5652,100,43 -1000,100,14,50,1.8366,5,10,0.6000,0.6434,100,43 -1000,100,16,50,2.1186,5,10,0.6420,0.6895,100,43 -1000,100,18,50,2.4118,5,10,0.6880,0.7652,100,43 -1000,100,20,50,2.7659,5,10,0.6940,0.8060,100,43 -2000,100,20,50,6.2879,5,10,0.5740,0.9183,100,43 -3000,100,20,50,10.1791,5,10,0.5280,1.0457,100,43 -4000,100,20,50,14.1797,5,10,0.4720,1.1173,100,43 -5000,100,20,50,18.9001,5,10,0.4240,1.1408,100,43 -5000,100,10,50,9.2855,5,10,0.3220,0.6739,100,43 -5000,100,10,100,13.6119,5,10,0.3160,0.6867,100,43 -5000,100,10,20,6.0684,5,10,0.3080,0.6659,100,43 -5000,100,10,10,4.9333,5,10,0.2520,0.6278,100,43 +5000,100,10,10,5.0158,5,10,0.2520,0.6253,100,43