Clustering Urban Areas¶

This notebook shows how tessellation can be used to generate clustering units in order to segment urban areas.

[1]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

plt.rcParams["figure.dpi"] = 100
plt.rcParams["figure.figsize"] = (8, 8)

import geopandas as gpd
from shapely.geometry import Point
import contextily as ctx

[2]:

from sklearn.cluster import KMeans
from sklearn.preprocessing import MinMaxScaler

[3]:

from tesspy import Tessellation

Area¶

We use Frankfurt am Main in Germany as a case study. First, we get the city boundary.

[4]:

ffm = Tessellation("Frankfurt am Main")
ffm_polygon = ffm.get_polygon()

[5]:

# visualization of area
ax = ffm_polygon.to_crs("EPSG:3857").plot(facecolor="none", edgecolor="tab:red", lw=1)
ctx.add_basemap(ax=ax, source=ctx.providers.CartoDB.Positron)
ax.set_axis_off()
ax.set_title("Frankfurt am Main", fontsize=10)
plt.show()

../_images/example_notebooks_06_Clustering_Urban_Areas_6_0.png

Tessellation¶

We tessellate the area using Voronoi Diagrams.

[6]:

poi_categories = ["shop", "building", "amenity", "office", "public_transport"]

# voronoi Diagrams
ffm_voronoi_kmeans = ffm.voronoi(poi_categories=poi_categories, n_polygons=1000)

2026-02-22 16:26:48 | INFO | tesspy.tessellation | event=voronoi.start cluster_algo=k-means poi_categories=5 n_polygons=1000 min_cluster_size=15 timeout_s=300
2026-02-22 16:26:48 | INFO | tesspy.data.poi | event=poi.fetch.start poi_categories=5 timeout_s=300
2026-02-22 16:44:53 | INFO | tesspy.data.poi | event=poi.fetch.done features=164331 duration_s=438.215
/Users/siavash.saki/Personal/tesspy/tesspy/data/poi.py:118: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.

  centroids = gdf.geometry.centroid
2026-02-22 16:44:54 | INFO | tesspy.data.poi | event=poi.parse.done poi_count=164331
2026-02-22 16:44:56 | INFO | tesspy.tessellation | event=voronoi.cluster.start algo=kmeans
2026-02-22 16:45:02 | INFO | tesspy.tessellation | event=voronoi.polygons.create generators=1000
2026-02-22 16:45:02 | INFO | tesspy._validators | event=geometry.multipolygon.explode
2026-02-22 16:45:02 | INFO | tesspy.tessellation | event=voronoi.done polygons=1002 duration_s=447.148

[7]:

# adding an ID to tiles
ffm_voronoi_kmeans.reset_index(inplace=True)
ffm_voronoi_kmeans.rename(columns={"index": "tile_id"}, inplace=True)

# check polygons
ffm_voronoi_kmeans.tail()

[7]:

	tile_id	voronoi_id	geometry
997	997	voronoiID997	POLYGON ((8.69789 50.09346, 8.6966 50.09023, 8...
998	998	voronoiID998	POLYGON ((8.72927 50.08946, 8.72217 50.08244, ...
999	999	voronoiID999	POLYGON ((8.62277 50.11096, 8.61704 50.11149, ...
1000	1000	voronoiID1000	POLYGON ((8.6668 50.19889, 8.66502 50.19997, 8...
1001	1001	voronoiID1001	POLYGON ((8.6919 50.13976, 8.6934 50.13511, 8....

[8]:

ax = ffm_voronoi_kmeans.to_crs("EPSG:3857").plot(
    facecolor="none", edgecolor="k", lw=0.5
)
ffm_polygon.to_crs("EPSG:3857").plot(ax=ax, facecolor="none", edgecolor="tab:red", lw=1)
ctx.add_basemap(ax=ax, source=ctx.providers.CartoDB.Positron)
ax.set_axis_off()
ax.set_title("Voronoi Tessellation", fontsize=15)
plt.show()

../_images/example_notebooks_06_Clustering_Urban_Areas_10_0.png

Data Processing¶

We create count variables. For each polygon (tile), the number of each POI category is calculated.

[9]:

# create a GeoDataFrame of POI data
poi_df = ffm.get_poi_data()
poi_geodata = gpd.GeoDataFrame(
    data=poi_df,
    geometry=poi_df[["center_longitude", "center_latitude"]]
    .apply(Point, axis=1)
    .values,
    crs="EPSG:4326",
)

[10]:

poi_geodata.head()

[10]:

	center_longitude	center_latitude	shop	building	amenity	office	public_transport	geometry
0	8.568829	50.122765	False	False	False	False	True	POINT (8.56883 50.12277)
1	8.633793	50.153205	False	False	False	False	True	POINT (8.63379 50.1532)
2	8.609222	50.106639	False	False	False	False	True	POINT (8.60922 50.10664)
3	8.620979	50.107860	False	False	False	False	True	POINT (8.62098 50.10786)
4	8.604924	50.108962	False	False	False	False	True	POINT (8.60492 50.10896)

[11]:

# Count POI
poi_counts = gpd.sjoin(
    ffm_voronoi_kmeans, poi_geodata, how="left", predicate="contains"
)
poi_counts[poi_categories] = poi_counts[poi_categories].map(
    lambda x: np.nan if not x else x
)
poi_counts = poi_counts.groupby(by="tile_id").count().reset_index()
poi_counts.head()

[11]:

	tile_id	voronoi_id	geometry	index_right	center_longitude	center_latitude	shop	building	amenity	office	public_transport
0	0	117	117	117	117	117	8	79	18	0	12
1	1	230	230	230	230	230	2	150	69	5	12
2	2	343	343	343	343	343	10	183	142	1	9
3	3	275	275	275	275	275	0	273	2	1	0
4	4	247	247	247	247	247	1	234	8	0	6

[12]:

fig, ax = plt.subplots(figsize=(5, 3))
poi_counts["index_right"].plot(
    kind="hist", bins=50, ax=ax, lw=0.1, edgecolor="white", zorder=3
)
ax.set_title("All POI count")
ax.set_xlabel("POI count per polygon")
ax.set_ylabel("Polygons count")
ax.grid(axis="y", lw=0.3, ls="--", zorder=0)

../_images/example_notebooks_06_Clustering_Urban_Areas_15_0.png

We normalize the data using MinMaxScaler.

[13]:

data = poi_counts[poi_categories].values
data_norm = MinMaxScaler().fit_transform(data)

Clustering¶

K-Means is used for clustering polygons.

[14]:

kmc = KMeans(n_clusters=4, random_state=0).fit(data_norm)

[15]:

clustering_results = gpd.GeoDataFrame(
    geometry=ffm_voronoi_kmeans["geometry"], data=poi_counts[poi_categories]
)
clustering_results["cluster"] = kmc.labels_

# pretiffy the results
dct_ = dict(
    zip(
        clustering_results.groupby("cluster")
        .count()
        .sort_values("geometry", ascending=False)
        .index,
        clustering_results.groupby("cluster").count().index,
    )
)


def sort_class_labels(i, dct):
    return dct[i]


clustering_results["cluster"] = clustering_results["cluster"].apply(
    lambda x: sort_class_labels(x, dct_)
)

Results¶

[16]:

fig, ax = plt.subplots()
ax.set_axis_off()
clustering_results.plot(
    column="cluster",
    categorical=True,
    cmap="OrRd",
    linewidth=0.1,
    edgecolor="k",
    legend=True,
    ax=ax,
)
ax.set_title(f"Frankfurt: Clustering Results")
plt.show()

../_images/example_notebooks_06_Clustering_Urban_Areas_23_0.png

[17]:

from matplotlib import cm

cs = cm.OrRd(np.linspace(0, 1, 4))

fig, ax = plt.subplots()
labels = clustering_results["cluster"].value_counts().index
sizes = clustering_results["cluster"].value_counts().values
ax.pie(sizes, labels=labels, startangle=90, colors=cs)
ax.set_title(f"Cluster sizes (Number of polygons in each cluster)")

plt.show()

../_images/example_notebooks_06_Clustering_Urban_Areas_24_0.png

[18]:

cluster_centroids = clustering_results.groupby("cluster").mean(numeric_only=True)
fig, ax = plt.subplots(figsize=(8, 4))
sns.heatmap(cluster_centroids, ax=ax, cmap="OrRd_r", vmin=0, vmax=1)
ax.set_title(f"Cluster Centroids")
plt.show()

../_images/example_notebooks_06_Clustering_Urban_Areas_25_0.png

	tile_id	voronoi_id	geometry	index_right	center_longitude	center_latitude	shop	building	amenity	office	public_transport
0	0	117	117	117	117	117	8	79	18	0	12
1	1	230	230	230	230	230	2	150	69	5	12
2	2	343	343	343	343	343	10	183	142	1	9
3	3	275	275	275	275	275	0	273	2	1	0
4	4	247	247	247	247	247	1	234	8	0	6

	tile_id	voronoi_id	geometry	index_right	center_longitude	center_latitude	shop	building	amenity	office	public_transport
0	0	117	117	117	117	117	8	79	18	0	12
1	1	230	230	230	230	230	2	150	69	5	12
2	2	343	343	343	343	343	10	183	142	1	9
3	3	275	275	275	275	275	0	273	2	1	0
4	4	247	247	247	247	247	1	234	8	0	6

	tile_id	voronoi_id	geometry	index_right	center_longitude	center_latitude	shop	building	amenity	office	public_transport
0	0	117	117	117	117	117	8	79	18	0	12
1	1	230	230	230	230	230	2	150	69	5	12
2	2	343	343	343	343	343	10	183	142	1	9
3	3	275	275	275	275	275	0	273	2	1	0
4	4	247	247	247	247	247	1	234	8	0	6