Question

I have a set of ~36,000 polygons which represent a partition (~counties) of the country. My python script receives a lot of points: pointId, longitude, latitude.

For each point, I want to send back pointId, polygonId. For each point, looping into all the polygons and use myPoint.within(myPolygon) is quite inefficient.

I suppose the shapely library offers a better way to prepare the polygon so that finding the polygon for a point becomes a tree path (country, region, sub region, ...)

Here is my code so far:

import sys
import os
import json
import time
import string
import uuid

py_id = str(uuid.uuid4())

sys.stderr.write(py_id + '\n')
sys.stderr.write('point_in_polygon.py V131130a.\n')
sys.stderr.flush()

from shapely.geometry import Point
from shapely.geometry import Polygon
import sys
import json
import string
import uuid
import time

jsonpath='.\cantons.json'
jsonfile = json.loads(open(jsonpath).read())

def find(id, obj):
    results = []

    def _find_values(id, obj):
        try:
            for key, value in obj.iteritems():
                if key == id:
                    results.append(value)
                elif not isinstance(value, basestring):
                    _find_values(id, value)
        except AttributeError:
            pass

        try:
            for item in obj:
                if not isinstance(item, basestring):
                    _find_values(id, item)
        except TypeError:
            pass

    if not isinstance(obj, basestring):
        _find_values(id, obj)
    return results


#1-Load polygons from json  
r=find('rings',jsonfile)
len_r = len(r)

#2-Load attributes from json
a=find('attributes',jsonfile)

def insidePolygon(point,json):
        x=0
    while x < len_r :
            y=0
            while y < len(r[x]) :
        p=Polygon(r[x][y])
        if(point.within(p)):
            return a[y]['OBJECTID'],a[y]['NAME_5']
        y=y+1
        x=x+1
    return None,None


while True:
    line = sys.stdin.readline()
    if not line:
        break
    try:
        args, tobedropped = string.split(line, "\n", 2)

        #input: vehicleId, longitude, latitude
        vehicleId, longitude, latitude = string.split(args, "\t")

        point = Point(float(longitude), float(latitude))
        cantonId,cantonName = insidePolygon(point,r)

        #output: vehicleId, cantonId, cantonName
        # vehicleId will be 0 if not found
        # vehicleId will be < 0 in case of an exception
        if (cantonId == None):
            print "\t".join(["0", "", ""])
        else:
            print "\t".join([str(vehicleId), str(cantonId), str(cantonName)])

    except ValueError:
        print "\t".join(["-1", "", ""])
        sys.stderr.write(py_id + '\n')
        sys.stderr.write('ValueError in Python script\n')
        sys.stderr.write(line)
        sys.stderr.flush()
    except:
        sys.stderr.write(py_id + '\n')
        sys.stderr.write('Exception in Python script\n')
        sys.stderr.write(str(sys.exc_info()[0]) + '\n')
        sys.stderr.flush()
        print "\t".join(["-2", "", ""])
Was it helpful?

Solution

Use Rtree (examples) as R-tree index to: (1) index the bounds of the 36k polygons (do this just after reading jsonfile), then (2) very quickly find the intersecting bounding boxes of each polygon to your point of interest. Then, (3) for the intersecting bounding boxes from Rtree, use shapely to use, e.g. point.within(p) to do the actual point-in-polygon analysis. You should see a massive leap of performance with this technique.

OTHER TIPS

It's great,

Here is sample code :

polygons_sf = shapefile.Reader("<shapefile>")
polygon_shapes = polygons_sf.shapes()
polygon_points = [q.points for q in polygon_shapes ]
polygons = [Polygon(q) for q in polygon_points]
idx = index.Index()
count = -1
for q in polygon_shapes:
    count +=1
    idx.insert(count, q.bbox)
[...]
for j in idx.intersection([point.x, point.y]):
    if(point.within(polygons[j])):
        geo1, geo2 = polygons_sf.record(j)[0], polygons_sf.record(j)[13]
        break

Thanks

Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top