Merge lp://staging/~stub/launchpad/replication into lp://staging/launchpad/db-devel

Proposed by Stuart Bishop
Status: Merged
Approved by: Aaron Bentley
Approved revision: not available
Merged at revision: not available
Proposed branch: lp://staging/~stub/launchpad/replication
Merge into: lp://staging/launchpad/db-devel
Diff against target: 196 lines (+106/-19)
7 files modified
daemons/cache-database-replication-lag.py (+53/-0)
database/replication/helpers.py (+1/-0)
database/schema/comments.sql (+4/-0)
database/schema/patch-2207-28-1.sql (+9/-0)
database/schema/security.cfg (+6/-0)
database/schema/trusted.sql (+22/-0)
lib/canonical/launchpad/webapp/dbpolicy.py (+11/-19)
To merge this branch: bzr merge lp://staging/~stub/launchpad/replication
Reviewer Review Type Date Requested Status
Aaron Bentley (community) Approve
Review via email: mp+18698@code.staging.launchpad.net

Commit message

an alternative approach of stopping the appservers from getting blocked when they check replication lag under high load.

To post a comment you must log in.
Revision history for this message
Stuart Bishop (stub) wrote :

The fix to Bug #504696 fails on production, despite passing testing locally and on staging and I'm unable to reproduce.

This branch implements an alternative approach of stopping the appservers from getting blocked when they check replication lag under high load.

The database patch, stored procedures and permissions have been applied to production in case we want to test this on edge while I'm unavailable.

Revision history for this message
Aaron Bentley (abentley) :
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== added symlink 'daemons/_pythonpath.py'
=== target is u'../_pythonpath.py'
=== added file 'daemons/cache-database-replication-lag.py'
--- daemons/cache-database-replication-lag.py 1970-01-01 00:00:00 +0000
+++ daemons/cache-database-replication-lag.py 2010-02-15 07:48:22 +0000
@@ -0,0 +1,53 @@
1#!/usr/bin/python2.5
2#
3# Copyright 2010 Canonical Ltd. This software is licensed under the
4# GNU Affero General Public License version 3 (see the file LICENSE).
5
6"""Calculate database replication lag and cache it."""
7
8__metaclass__ = type
9__all__ = []
10
11import _pythonpath
12
13import sys
14import time
15
16import psycopg2
17
18from canonical.database.sqlbase import connect, ISOLATION_LEVEL_AUTOCOMMIT
19from canonical.launchpad.scripts import db_options, logger
20from lp.scripts.helpers import LPOptionParser
21
22
23def main(args=None):
24 parser = LPOptionParser()
25 db_options(parser)
26 parser.add_option(
27 "-s", "--sleep", dest="sleep", type="int", default=5,
28 metavar="SECS", help="Wait SECS seconds between refreshes.")
29
30 (options, args) = parser.parse_args(args)
31 if len(args) != 0:
32 parser.error("Too many arguments.")
33
34 log = logger(options)
35
36 while True:
37 try:
38 con = connect(user="lagmon", isolation=ISOLATION_LEVEL_AUTOCOMMIT)
39 cur = con.cursor()
40 while True:
41 cur.execute("SELECT update_replication_lag_cache()")
42 if cur.fetchone()[0]:
43 log.info("Updated.")
44 else:
45 log.error("update_replication_lag_cache() failed.")
46 time.sleep(options.sleep)
47 except psycopg2.Error, x:
48 log.error("%s. Retrying.", str(x).strip())
49 time.sleep(options.sleep)
50
51
52if __name__ == '__main__':
53 sys.exit(main())
054
=== modified file 'database/replication/helpers.py'
--- database/replication/helpers.py 2010-01-22 06:25:48 +0000
+++ database/replication/helpers.py 2010-02-15 07:48:22 +0000
@@ -44,6 +44,7 @@
44LPMAIN_SEED = frozenset([44LPMAIN_SEED = frozenset([
45 ('public', 'person'),45 ('public', 'person'),
46 ('public', 'launchpaddatabaserevision'),46 ('public', 'launchpaddatabaserevision'),
47 ('public', 'databasereplicationlag'),
47 ('public', 'fticache'),48 ('public', 'fticache'),
48 ('public', 'nameblacklist'),49 ('public', 'nameblacklist'),
49 ('public', 'openidconsumerassociation'),50 ('public', 'openidconsumerassociation'),
5051
=== modified file 'database/schema/comments.sql'
--- database/schema/comments.sql 2010-02-03 15:55:01 +0000
+++ database/schema/comments.sql 2010-02-15 07:48:22 +0000
@@ -2352,3 +2352,7 @@
2352COMMENT ON COLUMN SourcePackageFormatSelection.distroseries IS 'Refers to the distroseries in question.';2352COMMENT ON COLUMN SourcePackageFormatSelection.distroseries IS 'Refers to the distroseries in question.';
2353COMMENT ON COLUMN SourcePackageFormatSelection.format IS 'The SourcePackageFormat to allow.';2353COMMENT ON COLUMN SourcePackageFormatSelection.format IS 'The SourcePackageFormat to allow.';
23542354
2355COMMENT ON TABLE DatabaseReplicationLag IS 'A cached snapshot of database replication lag between our master Slony node and its slaves.';
2356COMMENT ON COLUMN DatabaseReplicationLag.node IS 'The Slony node number identifying the slave database.';
2357COMMENT ON COLUMN DatabaseReplicationLag.lag IS 'lag time.';
2358COMMENT ON COLUMN DatabaseReplicationLag.updated IS 'When this value was updated.';
23552359
=== added file 'database/schema/patch-2207-28-1.sql'
--- database/schema/patch-2207-28-1.sql 1970-01-01 00:00:00 +0000
+++ database/schema/patch-2207-28-1.sql 2010-02-15 07:48:22 +0000
@@ -0,0 +1,9 @@
1SET client_min_messages = ERROR;
2
3CREATE TABLE DatabaseReplicationLag (
4 node integer PRIMARY KEY,
5 lag interval NOT NULL,
6 updated timestamp without time zone
7 DEFAULT (CURRENT_TIMESTAMP AT TIME ZONE 'UTC'));
8
9INSERT INTO LaunchpadDatabaseRevision VALUES (2207, 28, 1);
010
=== modified file 'database/schema/security.cfg'
--- database/schema/security.cfg 2010-02-12 15:44:24 +0000
+++ database/schema/security.cfg 2010-02-15 07:48:22 +0000
@@ -166,6 +166,7 @@
166public.cvereference = SELECT, INSERT166public.cvereference = SELECT, INSERT
167public.cve = SELECT, INSERT, UPDATE167public.cve = SELECT, INSERT, UPDATE
168public.customlanguagecode = SELECT, INSERT, UPDATE, DELETE168public.customlanguagecode = SELECT, INSERT, UPDATE, DELETE
169public.databasereplicationlag = SELECT
169public.diff = SELECT, INSERT, UPDATE170public.diff = SELECT, INSERT, UPDATE
170public.distributionbounty = SELECT, INSERT, UPDATE171public.distributionbounty = SELECT, INSERT, UPDATE
171public.distributionmirror = SELECT, INSERT, UPDATE, DELETE172public.distributionmirror = SELECT, INSERT, UPDATE, DELETE
@@ -1898,6 +1899,11 @@
1898public.job = SELECT, UPDATE, DELETE1899public.job = SELECT, UPDATE, DELETE
1899public.bugjob = SELECT, DELETE1900public.bugjob = SELECT, DELETE
19001901
1902[lagmon]
1903# cache-database-replication-lag.py
1904type=user
1905public.update_replication_lag_cache() = EXECUTE
1906
1901[process-apport-blobs]1907[process-apport-blobs]
1902type=user1908type=user
1903groups=script,read1909groups=script,read
19041910
=== modified file 'database/schema/trusted.sql'
--- database/schema/trusted.sql 2010-02-02 10:57:26 +0000
+++ database/schema/trusted.sql 2010-02-15 07:48:22 +0000
@@ -94,6 +94,28 @@
94'Returns the lag time of the lpmain replication set to the given node, or NULL if not a replicated installation. The node id parameter can be obtained by calling getlocalnodeid() on the relevant database. This function only returns meaningful results on the lpmain replication set master.';94'Returns the lag time of the lpmain replication set to the given node, or NULL if not a replicated installation. The node id parameter can be obtained by calling getlocalnodeid() on the relevant database. This function only returns meaningful results on the lpmain replication set master.';
9595
9696
97CREATE OR REPLACE FUNCTION update_replication_lag_cache() RETURNS boolean
98LANGUAGE plpgsql VOLATILE SECURITY DEFINER AS
99$$
100 BEGIN
101 DELETE FROM DatabaseReplicationLag;
102 INSERT INTO DatabaseReplicationLag (node, lag)
103 SELECT st_received, st_lag_time FROM _sl.sl_status
104 WHERE st_origin = _sl.getlocalnodeid('_sl');
105 RETURN TRUE;
106 -- Slony-I not installed here - non-replicated setup.
107 EXCEPTION
108 WHEN invalid_schema_name THEN
109 RETURN FALSE;
110 WHEN undefined_table THEN
111 RETURN FALSE;
112 END;
113$$;
114
115COMMENT ON FUNCTION update_replication_lag_cache() IS
116'Updates the DatabaseReplicationLag materialized view.';
117
118
97CREATE OR REPLACE FUNCTION getlocalnodeid() RETURNS integer119CREATE OR REPLACE FUNCTION getlocalnodeid() RETURNS integer
98LANGUAGE plpgsql STABLE SECURITY DEFINER AS120LANGUAGE plpgsql STABLE SECURITY DEFINER AS
99$$121$$
100122
=== modified file 'lib/canonical/launchpad/webapp/dbpolicy.py'
--- lib/canonical/launchpad/webapp/dbpolicy.py 2010-01-20 22:09:26 +0000
+++ lib/canonical/launchpad/webapp/dbpolicy.py 2010-02-15 07:48:22 +0000
@@ -293,26 +293,18 @@
293293
294 # sl_status gives meaningful results only on the origin node.294 # sl_status gives meaningful results only on the origin node.
295 master_store = self.getStore(MAIN_STORE, MASTER_FLAVOR)295 master_store = self.getStore(MAIN_STORE, MASTER_FLAVOR)
296 # If it takes more than (by default) 0.25 seconds to query the
297 # replication lag, assume we are lagged. Normally the query
298 # takes <20ms. This can happen during heavy updates, as the
299 # Slony-I tables can get slow with lots of events. We use a
300 # SAVEPOINT to conveniently reset the statement timeout.
301 master_store.execute("""
302 SAVEPOINT lag_check; SET LOCAL statement_timeout TO %d
303 """ % config.launchpad.lag_check_timeout)
304 try:
305 try:
306 return master_store.execute(
307 "SELECT replication_lag(%d)" % slave_node_id).get_one()[0]
308 except TimeoutError:
309 logging.warn(
310 'Gave up querying slave lag after %d ms',
311 (config.launchpad.lag_check_timeout))
312 return timedelta(days=999) # A long, long time.
313 finally:
314 master_store.execute("ROLLBACK TO lag_check")
315296
297 # Retrieve the cached lag.
298 lag = master_store.execute("""
299 SELECT lag + (CURRENT_TIMESTAMP AT TIME ZONE 'UTC' - updated)
300 FROM DatabaseReplicationLag WHERE node=%d
301 """ % slave_node_id).get_one()
302 if lag is None:
303 logging.error(
304 "No data in DatabaseReplicationLag for node %d"
305 % slave_node_id)
306 return timedelta(days=999) # A long, long time.
307 return lag[0]
316308
317309
318def WebServiceDatabasePolicyFactory(request):310def WebServiceDatabasePolicyFactory(request):

Subscribers

People subscribed via source and target branches

to status/vote changes: