use best first search, not greedy routing
This commit is contained in:
parent
346c7beb05
commit
7b00e272d7
@ -5,3 +5,7 @@ Hierarchical Navigable Small World - demonstration of concept implementation in
|
|||||||
Implementation mainly referenced the paper [Efficient and robust approximate nearest neighbor search using Hierarchical Navigable Small World graphs](https://arxiv.org/abs/1603.09320) however I made simplifications.
|
Implementation mainly referenced the paper [Efficient and robust approximate nearest neighbor search using Hierarchical Navigable Small World graphs](https://arxiv.org/abs/1603.09320) however I made simplifications.
|
||||||
|
|
||||||
I don't care about performance. That said, we can still compare the relative running time?
|
I don't care about performance. That said, we can still compare the relative running time?
|
||||||
|
|
||||||
|
## Update:
|
||||||
|
|
||||||
|
Fixed code to avoid using greedy routing. Previous results are in result-greedy.csv. We have not yet seen a difference in results, but the logic is different.
|
10
hnsw.py
10
hnsw.py
@ -77,10 +77,10 @@ class HNSW:
|
|||||||
self.level_count[l] += 1
|
self.level_count[l] += 1
|
||||||
elem = HNSWTower(coord, l)
|
elem = HNSWTower(coord, l)
|
||||||
L = self.ep.level
|
L = self.ep.level
|
||||||
for level in range(L, l, -1):
|
|
||||||
ep = self.route_layer(coord, ep, level)
|
|
||||||
# enter points (multiple, sorted by distance from near to far)
|
# enter points (multiple, sorted by distance from near to far)
|
||||||
eps = [(distance(coord, ep.coord), ep)]
|
eps = [(distance(coord, ep.coord), ep)]
|
||||||
|
for level in range(L, l, -1):
|
||||||
|
eps = self.search_layer(coord, eps, 1, level)
|
||||||
for level in range(min(l,L), -1, -1):
|
for level in range(min(l,L), -1, -1):
|
||||||
W = self.search_layer(coord, eps, self.ef_construction, level)
|
W = self.search_layer(coord, eps, self.ef_construction, level)
|
||||||
neighbors = [entry[1] for entry in sorted(W)[:self.M]]
|
neighbors = [entry[1] for entry in sorted(W)[:self.M]]
|
||||||
@ -94,7 +94,7 @@ class HNSW:
|
|||||||
if l > L:
|
if l > L:
|
||||||
self.ep = elem
|
self.ep = elem
|
||||||
def route_layer(self, coord, ep, level):
|
def route_layer(self, coord, ep, level):
|
||||||
# returns node that is closes to coord at level, starting from "ep"
|
# greedy routing starting at "ep". This method shouldn't be used.
|
||||||
while True:
|
while True:
|
||||||
best = None
|
best = None
|
||||||
min_d = distance(coord, ep.coord)
|
min_d = distance(coord, ep.coord)
|
||||||
@ -142,8 +142,8 @@ class HNSW:
|
|||||||
def k_nn_search(self, coord, K, ef):
|
def k_nn_search(self, coord, K, ef):
|
||||||
ep = self.ep
|
ep = self.ep
|
||||||
L = ep.level
|
L = ep.level
|
||||||
for level in range(L, 0, -1):
|
|
||||||
ep = self.route_layer(coord, ep, level)
|
|
||||||
eps = [(distance(coord, ep.coord), ep)]
|
eps = [(distance(coord, ep.coord), ep)]
|
||||||
|
for level in range(L, 0, -1):
|
||||||
|
eps = self.search_layer(coord, eps, 1, level)
|
||||||
W = self.search_layer(coord, eps, ef, 0)
|
W = self.search_layer(coord, eps, ef, 0)
|
||||||
return sorted(W)[:K]
|
return sorted(W)[:K]
|
||||||
|
21
result-greedy.csv
Normal file
21
result-greedy.csv
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
N_corpus,Dim,M,ef_construction,build_time,K,ef,recall,query_ms,N_query,seed
|
||||||
|
1000,10,10,50,1.2735,5,10,0.8280,0.4636,100,43
|
||||||
|
1000,20,10,50,1.3208,5,10,0.6200,0.5730,100,43
|
||||||
|
1000,30,10,50,1.2967,5,10,0.6540,0.5083,100,43
|
||||||
|
1000,40,10,50,1.3569,5,10,0.6140,0.4980,100,43
|
||||||
|
1000,50,10,50,1.3414,5,10,0.5340,0.5513,100,43
|
||||||
|
1000,70,10,50,1.3196,5,10,0.5560,0.5107,100,43
|
||||||
|
1000,100,10,50,1.3419,5,10,0.5160,0.5179,100,43
|
||||||
|
1000,100,12,50,1.6164,5,10,0.5760,0.5652,100,43
|
||||||
|
1000,100,14,50,1.8366,5,10,0.6000,0.6434,100,43
|
||||||
|
1000,100,16,50,2.1186,5,10,0.6420,0.6895,100,43
|
||||||
|
1000,100,18,50,2.4118,5,10,0.6880,0.7652,100,43
|
||||||
|
1000,100,20,50,2.7659,5,10,0.6940,0.8060,100,43
|
||||||
|
2000,100,20,50,6.2879,5,10,0.5740,0.9183,100,43
|
||||||
|
3000,100,20,50,10.1791,5,10,0.5280,1.0457,100,43
|
||||||
|
4000,100,20,50,14.1797,5,10,0.4720,1.1173,100,43
|
||||||
|
5000,100,20,50,18.9001,5,10,0.4240,1.1408,100,43
|
||||||
|
5000,100,10,50,9.2855,5,10,0.3220,0.6739,100,43
|
||||||
|
5000,100,10,100,13.6119,5,10,0.3160,0.6867,100,43
|
||||||
|
5000,100,10,20,6.0684,5,10,0.3080,0.6659,100,43
|
||||||
|
5000,100,10,10,4.9333,5,10,0.2520,0.6278,100,43
|
|
21
result.csv
21
result.csv
@ -1,21 +1,2 @@
|
|||||||
N_corpus,Dim,M,ef_construction,build_time,K,ef,recall,query_ms,N_query,seed
|
N_corpus,Dim,M,ef_construction,build_time,K,ef,recall,query_ms,N_query,seed
|
||||||
1000,10,10,50,1.2735,5,10,0.8280,0.4636,100,43
|
5000,100,10,10,5.0158,5,10,0.2520,0.6253,100,43
|
||||||
1000,20,10,50,1.3208,5,10,0.6200,0.5730,100,43
|
|
||||||
1000,30,10,50,1.2967,5,10,0.6540,0.5083,100,43
|
|
||||||
1000,40,10,50,1.3569,5,10,0.6140,0.4980,100,43
|
|
||||||
1000,50,10,50,1.3414,5,10,0.5340,0.5513,100,43
|
|
||||||
1000,70,10,50,1.3196,5,10,0.5560,0.5107,100,43
|
|
||||||
1000,100,10,50,1.3419,5,10,0.5160,0.5179,100,43
|
|
||||||
1000,100,12,50,1.6164,5,10,0.5760,0.5652,100,43
|
|
||||||
1000,100,14,50,1.8366,5,10,0.6000,0.6434,100,43
|
|
||||||
1000,100,16,50,2.1186,5,10,0.6420,0.6895,100,43
|
|
||||||
1000,100,18,50,2.4118,5,10,0.6880,0.7652,100,43
|
|
||||||
1000,100,20,50,2.7659,5,10,0.6940,0.8060,100,43
|
|
||||||
2000,100,20,50,6.2879,5,10,0.5740,0.9183,100,43
|
|
||||||
3000,100,20,50,10.1791,5,10,0.5280,1.0457,100,43
|
|
||||||
4000,100,20,50,14.1797,5,10,0.4720,1.1173,100,43
|
|
||||||
5000,100,20,50,18.9001,5,10,0.4240,1.1408,100,43
|
|
||||||
5000,100,10,50,9.2855,5,10,0.3220,0.6739,100,43
|
|
||||||
5000,100,10,100,13.6119,5,10,0.3160,0.6867,100,43
|
|
||||||
5000,100,10,20,6.0684,5,10,0.3080,0.6659,100,43
|
|
||||||
5000,100,10,10,4.9333,5,10,0.2520,0.6278,100,43
|
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user