· 7 years ago · Dec 09, 2018, 07:28 PM
1-- author: siyer@prosperworks.com
2-- incept: 2018-06-29
3
4-- Inspired in large part by jhw@prosperworks.com's index creation scripts and neil's column promotion effort.
5
6-- Promotes the company_user_id column of the gmail_msgid_mappings table to
7-- bigint. Locks that table for only O(1) with small K.
8
9-- Schema migration parameters:
10-- Table: gmail_msgid_mappings
11-- Column: company_user_id
12-- New column: tmp_promote_company_user_id
13-- Primary Key: false
14
15-- Direct backfill parameters:
16-- Batch size: 10000
17
18-- Locking parameters:
19-- Type: Advisory session lock.
20-- Key: 5701425142260866221
21
22-- WARNING: This script uses session variables. Therefore it is NOT compatible
23-- with pgbouncer in transaction or statement pooling modes. Use this only with
24-- pgbouncer in session pooling mode, or avoid pgbouncer in the connection path.
25-- This script is likely to hold a connection for hours when operating on large
26-- tables, so connection pooling will be of dubious value anyhow.
27
28-- Pattern for use with Heroku Postgres:
29--
30-- $ psql -d `heroku config:get --app ali-staging DATABASE_URL` -f promote_company_user_id_on_gmail_msgid_mappings_step_3.sql
31
32--
33-- Set up the connection.
34--
35
36\set ECHO none
37\pset pager off
38\timing on
39\set ON_ERROR_STOP on
40SET maintenance_work_mem = '8GB';
41SET citus.multi_shard_commit_protocol = '2pc';
42\set ECHO queries
43
44--
45-- Validate the parameters of the migration.
46
47DO $$
48
49DECLARE
50 tbl text := quote_ident('gmail_msgid_mappings');
51 target_col text := quote_ident('company_user_id');
52 new_col text := quote_ident('tmp_promote_company_user_id');
53
54BEGIN
55 -- If the target column does not exist, fail.
56
57 IF NOT EXISTS (
58 SELECT 1 FROM information_schema.columns
59 WHERE table_name = 'gmail_msgid_mappings'
60 AND column_name = 'company_user_id'
61 ) THEN
62 RAISE EXCEPTION 'target column % does not exist', target_col;
63 END IF;
64
65 -- If the new column does not exist, fail.
66
67 IF NOT EXISTS (
68 SELECT 1 FROM information_schema.columns
69 WHERE table_name = 'gmail_msgid_mappings'
70 AND column_name = 'tmp_promote_company_user_id'
71 ) THEN
72 RAISE EXCEPTION 'new column % does not exist', new_col;
73 END IF;
74
75-- Verify the backfill
76
77 IF EXISTS (
78 SELECT 1
79 FROM run_command_on_shards(
80 'gmail_msgid_mappings',
81 $cmd$
82 SELECT count(*)
83 FROM %s
84 WHERE company_user_id != tmp_promote_company_user_id
85 OR (company_user_id IS NULL AND tmp_promote_company_user_id IS NOT NULL)
86 OR (company_user_id IS NOT NULL AND tmp_promote_company_user_id IS NULL)
87 $cmd$
88 )
89 WHERE result::integer != 0
90 ) THEN
91 RAISE EXCEPTION 'backfill of % incomplete',target_col;
92 END IF;
93
94END
95$$;
96
97DO $$
98DECLARE
99 num_shards integer;
100 num_good_shards integer;
101
102BEGIN
103 num_shards := (
104 SELECT count(*) FROM pg_dist_shard WHERE logicalrelid::text = 'gmail_msgid_mappings'
105 );
106
107 num_good_shards := (
108 SELECT count(*)
109 FROM run_command_on_shards(
110 'gmail_msgid_mappings',
111 $cmd$
112 SELECT count(*) FROM information_schema.triggers
113 WHERE trigger_schema = 'public'
114 AND trigger_name = 'backfill_company_user_id_to_tmp_promote_company_user_id_on_gmai'
115 AND event_object_table::text = '%s'
116 $cmd$
117 )
118 WHERE result::integer = 2 -- One for INSERT, one for UPDATE.
119 );
120
121END
122$$;
123
124-- Create indexes if index(s) exists on the column
125--
126
127 CREATE INDEX CONCURRENTLY tmp_index_gmail_msgid_mappings_on_company_user_id ON gmail_msgid_mappings USING btree (tmp_promote_company_user_id);
128
129 CREATE UNIQUE INDEX CONCURRENTLY tmp_index_gmail_msgid_mappings_on_company_user_id_and_gmail_msg ON gmail_msgid_mappings USING btree (company_id,tmp_promote_company_user_id,gmail_msgid);
130
131 CREATE UNIQUE INDEX CONCURRENTLY tmp_unique_on_user_msgid ON gmail_msgid_mappings USING btree (company_id,tmp_promote_company_user_id,gmail_msgid);
132
133-- Cannot drop index and rename column on same transcation.
134
135 ALTER INDEX IF EXISTS old_index_gmail_msgid_mappings_on_company_user_ RENAME TO older_index_gmail_msgid_mappings_on_company_use;
136 ALTER INDEX index_gmail_msgid_mappings_on_company_user_id RENAME TO old_index_gmail_msgid_mappings_on_company_user_;
137 ALTER INDEX tmp_index_gmail_msgid_mappings_on_company_user_id RENAME TO index_gmail_msgid_mappings_on_company_user_id;
138
139 ALTER INDEX IF EXISTS old_index_gmail_msgid_mappings_on_company_user_ RENAME TO older_index_gmail_msgid_mappings_on_company_use;
140 ALTER INDEX index_gmail_msgid_mappings_on_company_user_id_and_gmail_msgid RENAME TO old_index_gmail_msgid_mappings_on_company_user_;
141 ALTER INDEX tmp_index_gmail_msgid_mappings_on_company_user_id_and_gmail_msg RENAME TO index_gmail_msgid_mappings_on_company_user_id_a;
142
143 ALTER INDEX IF EXISTS old_unique_on_user_msgid RENAME TO older_unique_on_user_msgid;
144 ALTER INDEX unique_on_user_msgid RENAME TO old_unique_on_user_msgid;
145 ALTER INDEX tmp_unique_on_user_msgid RENAME TO unique_on_user_msgid;
146
147-- This is the critical section: We obtain a full lock on the entire table, drop the old column, and rename the new column to have the old column's name.
148
149BEGIN;
150
151 -- If we can't get a lock in 10s, give up.
152
153 SET LOCAL lock_timeout = '10s';
154
155 LOCK TABLE gmail_msgid_mappings IN ACCESS EXCLUSIVE MODE;
156
157 ALTER TABLE gmail_msgid_mappings RENAME COLUMN company_user_id to old_company_user_id;
158
159 ALTER TABLE gmail_msgid_mappings RENAME COLUMN tmp_promote_company_user_id to company_user_id;
160
161END;
162
163-- Remove the passive backfill trigger from all shards.
164
165 SELECT run_command_on_shards(
166 'gmail_msgid_mappings',
167 $cmd$
168 DROP TRIGGER IF EXISTS backfill_company_user_id_to_tmp_promote_company_user_id_on_gmai ON %s
169 $cmd$
170 );
171
172-- Remove the backfill function from all workers.
173
174 SELECT run_command_on_workers(
175 $cmd$
176 DROP FUNCTION IF EXISTS copy_company_user_id_to_tmp_promote_company_user_id_on_gmail_ms CASCADE;
177 $cmd$
178 );
179
180--
181-- Create the passive backfill function.
182--
183
184 SELECT run_command_on_workers(
185 $cmd$
186 CREATE OR REPLACE FUNCTION copy_company_user_id_to_old_company_user_id_on_gmail_msgid_mapp()
187 RETURNS TRIGGER AS
188 $copy_company_user_id_to_old_company_user_id_on_gmail_msgid_mapp$
189 BEGIN
190 NEW.old_company_user_id := NEW.company_user_id;
191 RETURN NEW;
192 END;
193 $copy_company_user_id_to_old_company_user_id_on_gmail_msgid_mapp$ LANGUAGE plpgsql;
194 $cmd$
195 );
196
197--
198-- Create Trigger
199--
200
201 SELECT run_command_on_shards('gmail_msgid_mappings', $cmd$ CREATE TRIGGER fill_company_user_id_to_old_company_user_id_on_gmail_msgid_mapp
202 BEFORE INSERT OR UPDATE ON %s
203 FOR EACH ROW
204 EXECUTE PROCEDURE copy_company_user_id_to_old_company_user_id_on_gmail_msgid_mapp (company_user_id, old_company_user_id); $cmd$
205 );
206
207\echo Done!