Question

Using scipy.KDTree to do some quick nearest neighbour searches. I'm using KDTree.query_ball_point(pnt, r=some_distance) to do the search.

As my points are lat,long the radius value to search (some_distance) has to be in decimal degrees (I believe). If I wanted to make this accessible to a user, I would expect that distance to be given in Kilometres, meters, miles etc.

What is the best way, with python libs, to convert a distance in km to a decimal degrees value? I'm using numpy, scipy and played a bit with PySAL.

Help appreciated, Louis

Was it helpful?

Solution

Classic Calculation from here:

Distance

This uses the ‘haversine’ formula to calculate the great-circle distance between two points – that is, the shortest distance over the earth’s surface – giving an ‘as-the-crow-flies’ distance between the points (ignoring any hills, of course!).

Haversine formula:

a = sin²(Δφ/2) + cos(φ1).cos(φ2).sin²(Δλ/2)
c = 2.atan2(√a, √(1−a))
d = R.c

where φ is latitude, λ is longitude, R is earth’s radius (mean radius = 6,371km) note that angles need to be in radians to pass to trig functions!

You can of course do a very rough and ready approximation from the definitions of Nautical Mile and kilometre:

The nautical mile (symbol M, NM or nmi) is a unit of length that is about one minute of arc of latitude measured along any meridian (at sea level), or about one minute of arc of longitude at the equator. By international agreement it has been set at 1,852 metres exactly (about 6,076 feet).

OTHER TIPS

This may not be a direct solution but more of a reference. I tried using the haversine formula previously but using it to compute the set of the nearest n neighbours became intolerably long when used for thousands of points.

So I created a hash class (or mapping?) that could be put into a binary tree to allow quick searching. It doesn't work on distances but angles (lat, long).

The find function works by finding the closest point in the tree and then walking back up the tree until n nodes are found.

geocode.py:

units = [(31-q, 180.0/(2 ** q)) for q in range(32)]

def bit_sum(bits):
    return sum([2**bit for bit in bits])

class gpshash(object):
    def __init__(self, longitude = None, latitude = None, **kwargs):
        if(kwargs):
            if(kwargs.has_key("longitude ") and kwargs.has_key("latitude ")):
                self.longitude = geohash(degrees=kwargs["degrees"])
                self.latitude = geohash(degrees=kwargs["hash"])
        else:
            if(longitude == None or latitude == None):
                self.longitude = geohash(degrees=0)
                self.latitude = geohash(degrees=0)
            else:
                self.longitude = geohash(degrees=longitude)
                self.latitude = geohash(degrees=latitude)
        long_hash = self.longitude.bin_hash
        lat_hash = self.latitude.bin_hash
        hash_str = ""
        if(len(long_hash) == len(lat_hash)):
            for i in range(len(long_hash)):
                hash_str += (long_hash[i]+lat_hash[i])
        self.bin_hash = hash_str
    def __str__(self):
        return "%s, %s" % (str(self.longitude.hash), str(self.latitude.hash))
    def __repr__(self):
        return str("<gpshash long: %f lat: %f>" % (self.longitude.degrees, self.latitude.degrees))
    def __eq__(self, other):
        if(isinstance(self, gpshash) and isinstance(other, gpshash)):
            return (((self.longitude._hash ^ other.longitude._hash) == 0) and ((self.latitude._hash ^ other.latitude._hash) == 0))
        else:
            return False

class geohash(object):
    def __init__(self, degrees = 0, **kwargs):
        if(kwargs):
            if(kwargs.has_key("degrees")):
                self.degrees = kwargs["degrees"] % 360
                self._hash = self.encode()
            elif(kwargs.has_key("hash")):
                self._hash = kwargs["hash"] % ((2 << 31) - 1)
                self.degrees = self.decode()
        else:
            self.degrees = degrees % 360
            self._hash = self.encode()
    def __str__(self):
        return str(self.hash)
    def __repr__(self):
        return str("<geohash degrees: %f hash: %s>" % (self.degrees, self.hash))
    def __add__(self, other):
        return geohash(hash=(self._hash + other._hash))
    def __sub__(self, other):
        return geohash(hash=(self._hash - other._hash))
    def __xor__(self, other):
        return geohash(hash=(self._hash ^ other._hash))
    def __eq__(self, other):
        if(isinstance(self, geohash) and isinstance(other, geohash)):
            return ((self._hash ^ other._hash) == 0)
        else:
            return False
    def encode(self):
        lesser = filter(lambda (bit, angle): self.degrees >= angle, units)
        combined = reduce(lambda (bits, angles), (bit, angle): (bits+[bit], angles + angle) if((angles + angle) <= self.degrees) else (bits, angles), lesser, ([], 0))
        return bit_sum(combined[0])
    def decode(self):
        lesser = filter(lambda (bit, angle): self._hash>= (2 ** bit), units)
        combined = reduce(lambda (bits, angles), (bit, angle): (bits+[bit], angles + angle) if((bit_sum(bits+[bit])) <= self._hash) else (bits, angles), lesser, ([], 0))
        return combined[1]
    @property
    def hash(self):
        self._hash = self.encode()
        return "%08x" % self._hash
    @property
    def inv_hash(self):
        self._inv_hash = self.decode()
        return self._inv_hash
    @property
    def bin_hash(self):
        self._bin_hash = bin(self._hash)[2:].zfill(32)
        return self._bin_hash

class gdict(object):
    '''
    Base Geo Dictionary
    Critical Components taken from Python26\Lib\UserDict.py
    '''
    __slots__ = ["parent", "left", "right", "hash_type"]
    hash_type = None
    def __init__(self, ihash=None, iparent=None):
        def set_helper(iparent, cur_hex, hex_list):
            ret_bin_tree = self.__class__(None, iparent)
            if(hex_list):
                ret_bin_tree.set_child(cur_hex, set_helper(ret_bin_tree, hex_list[0], hex_list[1:]))
                return ret_bin_tree
            elif(cur_hex):
                ret_bin_tree.set_child(cur_hex, ihash)
                return ret_bin_tree
        self.parent = self
        self.left = None
        self.right = None
        if(iparent != None):
            self.parent = iparent
        if(isinstance(ihash, self.hash_type)):
            ilist = list(ihash.bin_hash)
            if(len(ilist) > 1):
                ret_bin_tree = set_helper(self, ilist[1], ilist[2:])
            if(ret_bin_tree):
                self.set_child(ilist[0], ret_bin_tree)
    def set_child(self, istr, ichild):
        if(istr == "0"):
            self.left = ichild
        elif(istr == "1"):
            self.right = ichild
    def get_child(self, istr):
        if(istr == "0"):
            return self.left
        elif(istr == "1"):
            return self.right
        else:
            return ""
    def __repr__(self):
        def repr_print_helper(ibin_tree, fmt_str = "", depth = 1):
            if(not isinstance(ibin_tree, self.__class__)):
                fmt_str += "\n"
                fmt_str += ("%%%ds" % (depth)) % ""
                fmt_str += ibin_tree.__repr__()
            else:
                if((ibin_tree.left != None and ibin_tree.right == None) or (ibin_tree.left == None and ibin_tree.right != None)):
                    if(ibin_tree.left != None):
                        fmt_str += "0"
                        fmt_str = repr_print_helper(ibin_tree.left, fmt_str, depth + 1)
                    elif(ibin_tree.right != None):
                        fmt_str += "1"
                        fmt_str = repr_print_helper(ibin_tree.right, fmt_str, depth + 1)
                else:
                    if(ibin_tree.left != None):
                        fmt_str += "\n"
                        fmt_str += ("%%%ds" % (depth - 1)) % ""
                        fmt_str += "0"
                        fmt_str = repr_print_helper(ibin_tree.left, fmt_str, depth + 1)
                    if(ibin_tree.right != None):
                        fmt_str += "\n"
                        fmt_str += ("%%%ds" % (depth - 1)) % ""
                        fmt_str += "1"
                        fmt_str = repr_print_helper(ibin_tree.right, fmt_str, depth + 1)
            return fmt_str
        return repr_print_helper(self)
    def find(self, ihash, itarget = 1):
        class flat(list):
            pass
        def find_helper_base(iparent, ibin_tree, ihash):
            ret_find = None
            if(isinstance(ibin_tree, self.hash_type)):
                ret_find = iparent
            elif(len(ihash) > 0):
                if(ibin_tree.get_child(ihash[0])):
                    ret_find = find_helper_base(ibin_tree, ibin_tree.get_child(ihash[0]), ihash[1:])
                else:
                    ret_find = ibin_tree
            return ret_find
        def up_find(iflat, iparent, ibin_tree, ibias = "0"):
            if((ibin_tree != iparent) and (len(iflat) < itarget)):
                if(iparent.left):
                    if(len(iflat) >= itarget):
                        return
                    if(iparent.left != ibin_tree):
                        down_find(iflat, iparent.left, ibias)
                if(iparent.right):
                    if(len(iflat) >= itarget):
                        return
                    if(iparent.right != ibin_tree):
                        down_find(iflat, iparent.right, ibias)
                up_find(iflat, ibin_tree.parent.parent, ibin_tree.parent, ibias)
        def down_find(iflat, ibin_tree, ibias = "0"):
            if(len(iflat) >= itarget):
                return
            elif(isinstance(ibin_tree, self.hash_type)):
                iflat += [ibin_tree]
            else:
                if(ibias == "1"):
                    if(ibin_tree.left):
                        down_find(iflat, ibin_tree.left, ibias)
                    if(ibin_tree.right):
                        down_find(iflat, ibin_tree.right, ibias)
                else:
                    if(ibin_tree.right):
                        down_find(iflat, ibin_tree.right, ibias)
                    if(ibin_tree.left):
                        down_find(iflat, ibin_tree.left, ibias)
        if(isinstance(ihash, self.hash_type)):
            flatter = flat()
            hasher = ihash.bin_hash
            base = find_helper_base(self, self.get_child(hasher[0]), hasher[1:])
            if(base):
                down_find(flatter, base)
                bias = flatter[0].bin_hash[0]
                up_find(flatter, base.parent, base, bias)
            return list(flatter)
    def merge(self, from_bin_tree):
        def merge_helper(to_bin_tree, from_bin_tree):
            if(isinstance(from_bin_tree, self.__class__)):
                if(from_bin_tree.left != None):
                    if(to_bin_tree.left != None):
                        merge_helper(to_bin_tree.left, from_bin_tree.left)
                    else:
                        from_bin_tree.left.parent = to_bin_tree
                        to_bin_tree.left = from_bin_tree.left
                elif(from_bin_tree.right != None):
                    if(to_bin_tree.right != None):
                        merge_helper(to_bin_tree.right, from_bin_tree.right)
                    else:
                        from_bin_tree.right.parent = to_bin_tree
                        to_bin_tree.right = from_bin_tree.right
        merge_helper(self, from_bin_tree)

class geodict(gdict):
    '''
    Geo Dictionary
    '''
    hash_type = geohash
    def __init__(self, ihash=None, iparent=None):
        gdict.__init__(self, ihash, iparent)

class gpsdict(gdict):
    '''
    GPS Dictionary
    '''
    hash_type = gpshash
    def __init__(self, ihash=None, iparent=None):
        gdict.__init__(self, ihash, iparent)

if(__name__ == "__main__"):
    gpses = \
    [
        gpshash(90, 90),
        gpshash(68, 24),
        gpshash(144, 60),
        gpshash(48, 91),
        gpshash(32, 105),
        gpshash(32, 150),
        gpshash(167, 20),
        gpshash(49, 116),
        gpshash(81, 82),
        gpshash(63, 79),
        gpshash(129, 76)
    ]

    base_dict = gpsdict()
    for cur_hash in gpses:
        base_dict.merge(gpsdict(cur_hash ))

    print "All locations added:"
    print base_dict
    print ""
    print "Trying to find 3 nearest points to:"
    to_find = gpshash(60, 20)
    print to_find.__repr__()
    found = base_dict.find(to_find, 3)
    print ""
    print "Found the following:"
    for x in found:
        print x.__repr__()
Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top