This may not be a direct solution but more of a reference. I tried using the haversine formula previously but using it to compute the set of the nearest n neighbours became intolerably long when used for thousands of points.
So I created a hash class (or mapping?) that could be put into a binary tree to allow quick searching. It doesn't work on distances but angles (lat, long).
The find function works by finding the closest point in the tree and then walking back up the tree until n nodes are found.
geocode.py:
units = [(31-q, 180.0/(2 ** q)) for q in range(32)]
def bit_sum(bits):
return sum([2**bit for bit in bits])
class gpshash(object):
def __init__(self, longitude = None, latitude = None, **kwargs):
if(kwargs):
if(kwargs.has_key("longitude ") and kwargs.has_key("latitude ")):
self.longitude = geohash(degrees=kwargs["degrees"])
self.latitude = geohash(degrees=kwargs["hash"])
else:
if(longitude == None or latitude == None):
self.longitude = geohash(degrees=0)
self.latitude = geohash(degrees=0)
else:
self.longitude = geohash(degrees=longitude)
self.latitude = geohash(degrees=latitude)
long_hash = self.longitude.bin_hash
lat_hash = self.latitude.bin_hash
hash_str = ""
if(len(long_hash) == len(lat_hash)):
for i in range(len(long_hash)):
hash_str += (long_hash[i]+lat_hash[i])
self.bin_hash = hash_str
def __str__(self):
return "%s, %s" % (str(self.longitude.hash), str(self.latitude.hash))
def __repr__(self):
return str("<gpshash long: %f lat: %f>" % (self.longitude.degrees, self.latitude.degrees))
def __eq__(self, other):
if(isinstance(self, gpshash) and isinstance(other, gpshash)):
return (((self.longitude._hash ^ other.longitude._hash) == 0) and ((self.latitude._hash ^ other.latitude._hash) == 0))
else:
return False
class geohash(object):
def __init__(self, degrees = 0, **kwargs):
if(kwargs):
if(kwargs.has_key("degrees")):
self.degrees = kwargs["degrees"] % 360
self._hash = self.encode()
elif(kwargs.has_key("hash")):
self._hash = kwargs["hash"] % ((2 << 31) - 1)
self.degrees = self.decode()
else:
self.degrees = degrees % 360
self._hash = self.encode()
def __str__(self):
return str(self.hash)
def __repr__(self):
return str("<geohash degrees: %f hash: %s>" % (self.degrees, self.hash))
def __add__(self, other):
return geohash(hash=(self._hash + other._hash))
def __sub__(self, other):
return geohash(hash=(self._hash - other._hash))
def __xor__(self, other):
return geohash(hash=(self._hash ^ other._hash))
def __eq__(self, other):
if(isinstance(self, geohash) and isinstance(other, geohash)):
return ((self._hash ^ other._hash) == 0)
else:
return False
def encode(self):
lesser = filter(lambda (bit, angle): self.degrees >= angle, units)
combined = reduce(lambda (bits, angles), (bit, angle): (bits+[bit], angles + angle) if((angles + angle) <= self.degrees) else (bits, angles), lesser, ([], 0))
return bit_sum(combined[0])
def decode(self):
lesser = filter(lambda (bit, angle): self._hash>= (2 ** bit), units)
combined = reduce(lambda (bits, angles), (bit, angle): (bits+[bit], angles + angle) if((bit_sum(bits+[bit])) <= self._hash) else (bits, angles), lesser, ([], 0))
return combined[1]
@property
def hash(self):
self._hash = self.encode()
return "%08x" % self._hash
@property
def inv_hash(self):
self._inv_hash = self.decode()
return self._inv_hash
@property
def bin_hash(self):
self._bin_hash = bin(self._hash)[2:].zfill(32)
return self._bin_hash
class gdict(object):
'''
Base Geo Dictionary
Critical Components taken from Python26\Lib\UserDict.py
'''
__slots__ = ["parent", "left", "right", "hash_type"]
hash_type = None
def __init__(self, ihash=None, iparent=None):
def set_helper(iparent, cur_hex, hex_list):
ret_bin_tree = self.__class__(None, iparent)
if(hex_list):
ret_bin_tree.set_child(cur_hex, set_helper(ret_bin_tree, hex_list[0], hex_list[1:]))
return ret_bin_tree
elif(cur_hex):
ret_bin_tree.set_child(cur_hex, ihash)
return ret_bin_tree
self.parent = self
self.left = None
self.right = None
if(iparent != None):
self.parent = iparent
if(isinstance(ihash, self.hash_type)):
ilist = list(ihash.bin_hash)
if(len(ilist) > 1):
ret_bin_tree = set_helper(self, ilist[1], ilist[2:])
if(ret_bin_tree):
self.set_child(ilist[0], ret_bin_tree)
def set_child(self, istr, ichild):
if(istr == "0"):
self.left = ichild
elif(istr == "1"):
self.right = ichild
def get_child(self, istr):
if(istr == "0"):
return self.left
elif(istr == "1"):
return self.right
else:
return ""
def __repr__(self):
def repr_print_helper(ibin_tree, fmt_str = "", depth = 1):
if(not isinstance(ibin_tree, self.__class__)):
fmt_str += "\n"
fmt_str += ("%%%ds" % (depth)) % ""
fmt_str += ibin_tree.__repr__()
else:
if((ibin_tree.left != None and ibin_tree.right == None) or (ibin_tree.left == None and ibin_tree.right != None)):
if(ibin_tree.left != None):
fmt_str += "0"
fmt_str = repr_print_helper(ibin_tree.left, fmt_str, depth + 1)
elif(ibin_tree.right != None):
fmt_str += "1"
fmt_str = repr_print_helper(ibin_tree.right, fmt_str, depth + 1)
else:
if(ibin_tree.left != None):
fmt_str += "\n"
fmt_str += ("%%%ds" % (depth - 1)) % ""
fmt_str += "0"
fmt_str = repr_print_helper(ibin_tree.left, fmt_str, depth + 1)
if(ibin_tree.right != None):
fmt_str += "\n"
fmt_str += ("%%%ds" % (depth - 1)) % ""
fmt_str += "1"
fmt_str = repr_print_helper(ibin_tree.right, fmt_str, depth + 1)
return fmt_str
return repr_print_helper(self)
def find(self, ihash, itarget = 1):
class flat(list):
pass
def find_helper_base(iparent, ibin_tree, ihash):
ret_find = None
if(isinstance(ibin_tree, self.hash_type)):
ret_find = iparent
elif(len(ihash) > 0):
if(ibin_tree.get_child(ihash[0])):
ret_find = find_helper_base(ibin_tree, ibin_tree.get_child(ihash[0]), ihash[1:])
else:
ret_find = ibin_tree
return ret_find
def up_find(iflat, iparent, ibin_tree, ibias = "0"):
if((ibin_tree != iparent) and (len(iflat) < itarget)):
if(iparent.left):
if(len(iflat) >= itarget):
return
if(iparent.left != ibin_tree):
down_find(iflat, iparent.left, ibias)
if(iparent.right):
if(len(iflat) >= itarget):
return
if(iparent.right != ibin_tree):
down_find(iflat, iparent.right, ibias)
up_find(iflat, ibin_tree.parent.parent, ibin_tree.parent, ibias)
def down_find(iflat, ibin_tree, ibias = "0"):
if(len(iflat) >= itarget):
return
elif(isinstance(ibin_tree, self.hash_type)):
iflat += [ibin_tree]
else:
if(ibias == "1"):
if(ibin_tree.left):
down_find(iflat, ibin_tree.left, ibias)
if(ibin_tree.right):
down_find(iflat, ibin_tree.right, ibias)
else:
if(ibin_tree.right):
down_find(iflat, ibin_tree.right, ibias)
if(ibin_tree.left):
down_find(iflat, ibin_tree.left, ibias)
if(isinstance(ihash, self.hash_type)):
flatter = flat()
hasher = ihash.bin_hash
base = find_helper_base(self, self.get_child(hasher[0]), hasher[1:])
if(base):
down_find(flatter, base)
bias = flatter[0].bin_hash[0]
up_find(flatter, base.parent, base, bias)
return list(flatter)
def merge(self, from_bin_tree):
def merge_helper(to_bin_tree, from_bin_tree):
if(isinstance(from_bin_tree, self.__class__)):
if(from_bin_tree.left != None):
if(to_bin_tree.left != None):
merge_helper(to_bin_tree.left, from_bin_tree.left)
else:
from_bin_tree.left.parent = to_bin_tree
to_bin_tree.left = from_bin_tree.left
elif(from_bin_tree.right != None):
if(to_bin_tree.right != None):
merge_helper(to_bin_tree.right, from_bin_tree.right)
else:
from_bin_tree.right.parent = to_bin_tree
to_bin_tree.right = from_bin_tree.right
merge_helper(self, from_bin_tree)
class geodict(gdict):
'''
Geo Dictionary
'''
hash_type = geohash
def __init__(self, ihash=None, iparent=None):
gdict.__init__(self, ihash, iparent)
class gpsdict(gdict):
'''
GPS Dictionary
'''
hash_type = gpshash
def __init__(self, ihash=None, iparent=None):
gdict.__init__(self, ihash, iparent)
if(__name__ == "__main__"):
gpses = \
[
gpshash(90, 90),
gpshash(68, 24),
gpshash(144, 60),
gpshash(48, 91),
gpshash(32, 105),
gpshash(32, 150),
gpshash(167, 20),
gpshash(49, 116),
gpshash(81, 82),
gpshash(63, 79),
gpshash(129, 76)
]
base_dict = gpsdict()
for cur_hash in gpses:
base_dict.merge(gpsdict(cur_hash ))
print "All locations added:"
print base_dict
print ""
print "Trying to find 3 nearest points to:"
to_find = gpshash(60, 20)
print to_find.__repr__()
found = base_dict.find(to_find, 3)
print ""
print "Found the following:"
for x in found:
print x.__repr__()