· 7 years ago · Jan 16, 2019, 07:00 AM
1CREATE TABLE pt
2(
3 stop_id character varying(32) NOT NULL,
4 geom geometry(Point)
5)
6
7CREATE TABLE clustered_pt
8(
9 stop_id character varying(32) NOT NULL,
10 geom geometry(Point),
11 cluster_id smallint
12)
13
14CREATE OR REPLACE FUNCTION bottomup_cluster_index(points pt[], radius integer)
15 RETURNS SETOF clustered_pt AS
16$BODY$
17
18DECLARE
19 srid int;
20 counter int:=1;
21
22BEGIN
23--Avoid the whole processing if there's only 1 point.
24IF array_length(points,1)<2 THEN
25 RETURN QUERY SELECT stop_id::varchar(32), geom::geometry(point), 1 FROM unnest(points);
26 RETURN;
27END IF;
28
29
30CREATE TEMPORARY TABLE IF NOT EXISTS stops (LIKE pt) ON COMMIT DROP;
31
32CREATE TEMPORARY SEQUENCE clusterids;
33
34CREATE TEMPORARY TABLE clusters(
35 stop_group geometry,
36 stop_ids varchar[],
37 cluster_id smallint DEFAULT nextval('clusterids')
38 ) ON COMMIT DROP;
39
40
41ALTER SEQUENCE clusterids OWNED BY clusters.cluster_id;
42
43
44
45TRUNCATE stops;
46 --inserting points in
47INSERT INTO stops(stop_id, geom)
48 (SELECT (unnest(points)).* );
49
50--Store the srid to reconvert points after, assumes all points have the same SRID
51srid := ST_SRID(geom) FROM stops LIMIT 1;
52
53--Transforming points to a UTM coordinate system so distances will be calculated in meters.
54UPDATE stops
55SET geom = ST_TRANSFORM(geom,26986);
56
57INSERT INTO clusters(stop_group, stop_ids)
58 (SELECT ST_COLLECT(geom), ARRAY_AGG(stop_id)
59 FROM stops GROUP BY geom --Groups together points which are at the same location
60 );
61
62CREATE INDEX geom_index
63ON clusters
64USING gist
65(stop_group);
66
67Analyze clusters;
68
69LOOP
70 --If the shortest maximum distance between two clusters is greater than 2x the specified radius, then end the clustering algorithm.
71 IF (SELECT ST_MaxDistance(a.stop_group,b.stop_group) FROM clusters a, clusters b
72 WHERE
73 ST_DFullyWithin(a.stop_group,b.stop_group, 2 * radius)
74 AND a.cluster_id < b.cluster_id AND a.cluster_id > 0 AND b.cluster_id > 0
75 ORDER BY ST_MaxDistance(a.stop_group,b.stop_group) LIMIT 1)
76 IS NULl
77 THEN
78 EXIT;
79 END IF;
80
81 --Periodically reindex the clusters table
82 ANALYZE clusters;
83
84 counter := counter +1;
85
86 WITH finding_nearest_clusters AS(
87 SELECT DISTINCT ON (a.cluster_id) a.cluster_id, ST_collect(a.stop_group,b.stop_group) AS stop_group, ARRAY[a.cluster_id,b.cluster_id] as joined_clusters, a.stop_ids||b.stop_ids AS stop_ids
88 FROM clusters a, clusters b
89 WHERE ST_DFullyWithin(a.stop_group,b.stop_group, 2 * radius)
90 AND a.cluster_id < b.cluster_id AND a.cluster_id > 0 AND b.cluster_id > 0
91 ORDER BY a.cluster_id, ST_MaxDistance(a.stop_group,b.stop_group)
92 )
93 --If a cluster is linked to multiple nearest clusters, select only the shortest distance pairing, and flag the others.
94 , unique_clusters AS(
95 SELECT a.*, CASE WHEN ST_AREA(ST_MinimumBoundingCircle(a.stop_group))>= ST_AREA(ST_MinimumBoundingCircle(b.stop_group)) THEN 1 ELSE 0 END as repeat_flag
96 FROM finding_nearest_clusters a
97 LEFT OUTER JOIN finding_nearest_clusters b ON a.cluster_id <> b.cluster_id AND a.joined_clusters && b.joined_clusters
98 )
99 --Update the set of clusters with the new clusters
100 UPDATE clusters o SET
101 --Set to 0 the cluster_id of the cluster which will contain 0 data.
102 cluster_id = CASE WHEN o.cluster_id = joined_clusters[2] THEN 0 ELSE joined_clusters[1] END
103 ,stop_group = CASE WHEN o.cluster_id = joined_clusters[2] THEN NULL ELSE f.stop_group END
104 ,stop_ids = CASE WHEN o.cluster_id = joined_clusters[2] THEN NULL ELSE f.stop_ids END
105 FROM (SELECT DISTINCT ON (cluster_id) cluster_id, stop_group, joined_clusters, stop_ids, repeat_flag
106 FROM unique_clusters
107 ORDER BY cluster_id, repeat_flag DESC
108 ) f
109 WHERE o.cluster_id = ANY (joined_clusters) AND repeat_flag =0;
110
111 IF (SELECT COUNT(DISTINCT cluster_id) FROM clusters) < 2 THEN
112 EXIT;
113 END IF;
114
115END LOOP;
116
117RAISE NOTICE USING MESSAGE = $$Number of passes $$||counter;
118
119RETURN QUERY
120 SELECT stop_id::varchar(32), ST_TRANSFORM(geom, srid)::geometry(point), cluster_id
121 FROM stops
122 inner join (select cluster_id, unnest(stop_ids) AS stop_id FROM clusters)c USING (stop_id);
123END;
124$BODY$
125 LANGUAGE plpgsql VOLATILE
126 ;
127
128SELECT (clusters).* FROM (
129
130 SELECT bottomup_cluster_index(array_agg((stop_id,geom)::pt), 250) as clusters
131 FROM points
132)a