Merge lp://staging/~stub/launchpad/opstats into lp://staging/launchpad

Proposed by Stuart Bishop
Status: Merged
Merged at revision: not available
Proposed branch: lp://staging/~stub/launchpad/opstats
Merge into: lp://staging/launchpad
Diff against target: None lines
To merge this branch: bzr merge lp://staging/~stub/launchpad/opstats
Reviewer Review Type Date Requested Status
Michael Hudson-Doyle Approve
Review via email: mp+11774@code.staging.launchpad.net
To post a comment you must log in.
Revision history for this message
Stuart Bishop (stub) wrote :

Add a new metrics to the Launchpad operational statistics, as requested by the Losas in Bug #426416.

The new metric counts the number of 5xx response codes sent to known web browsers. This will allow Losas to monitor user experience without the noise generated by robots.

Revision history for this message
Michael Hudson-Doyle (mwhudson) wrote :

The testing purist in me wants you to split test_is_browser into three test cases, but basically this looks fine.

review: Approve
Revision history for this message
Francis J. Lacoste (flacoste) wrote :

On September 15, 2009, Stuart Bishop wrote:
> +
> +
> +_browser_re = re.compile(r"""(?x)^(
> + Mozilla |
> + Opera |
> + Lynx |
> + Links |
> + w3m
> + )""")
>

Will this cover Webkit?

--
Francis J. Lacoste
<email address hidden>

Revision history for this message
Stuart Bishop (stub) wrote :

On Wed, Sep 16, 2009 at 8:54 PM, Francis J. Lacoste
<email address hidden> wrote:
> On September 15, 2009, Stuart Bishop wrote:
>> +
>> +
>> +_browser_re = re.compile(r"""(?x)^(
>> +    Mozilla |
>> +    Opera |
>> +    Lynx |
>> +    Links |
>> +    w3m
>> +    )""")
>>
>
> Will this cover Webkit?

I believe so. I think it is one of the vast majority that starts their
user-agent string with 'Mozilla'.

--
Stuart Bishop <email address hidden>
http://www.stuartbishop.net/

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lib/canonical/launchpad/pagetests/standalone/xx-opstats.txt'
2--- lib/canonical/launchpad/pagetests/standalone/xx-opstats.txt 2009-02-05 20:46:59 +0000
3+++ lib/canonical/launchpad/pagetests/standalone/xx-opstats.txt 2009-09-15 10:55:13 +0000
4@@ -46,6 +46,7 @@
5 500s: 0
6 503s: 0
7 5XXs: 0
8+ 5XXs_b: 0
9 6XXs: 0
10 http requests: 0
11 requests: 0
12@@ -108,6 +109,26 @@
13 http requests: 1
14 requests: 1
15
16+We also have a special metric counting server errors returned to known
17+web browsers (5XXs_b) - in the production environment we care more
18+about errors returned to people than robots crawling obscure parts of
19+the site.
20+
21+ >>> from textwrap import dedent
22+ >>> output = http(dedent("""\
23+ ... GET /error-test HTTP/1.1
24+ ... Host: launchpad.dev
25+ ... User-Agent: Mozilla/42.0
26+ ... """))
27+ >>> output.getStatus()
28+ 500
29+ >>> report()
30+ 500s: 1
31+ 5XXs: 1
32+ 5XXs_b: 1
33+ http requests: 1
34+ requests: 1
35+
36 == Number of XML-RPC Faults ==
37
38 >>> try:
39@@ -123,7 +144,6 @@
40
41 == Number of soft timeouts ==
42
43- >>> from textwrap import dedent
44 >>> from canonical.config import config
45 >>> test_data = dedent("""
46 ... [database]
47
48=== modified file 'lib/canonical/launchpad/webapp/opstats.py'
49--- lib/canonical/launchpad/webapp/opstats.py 2009-06-25 05:30:52 +0000
50+++ lib/canonical/launchpad/webapp/opstats.py 2009-09-15 09:56:29 +0000
51@@ -43,6 +43,7 @@
52 '4XXs': 0, # Client Errors
53 '5XXs': 0, # Server Errors
54 '6XXs': 0, # Internal Errors
55+ '5XXs_b': 0, # Server Errors returned to browsers (not robots).
56 })
57
58 def opstats(self):
59@@ -51,13 +52,14 @@
60 Keys currently are:
61 requests -- # requests served by this appserver.
62 xml-rpc requests -- # xml-rpc requests served.
63- 404s -- # 404 status responses served (Not Found)
64- 500s -- # 500 status responses served (Unhandled exceptions)
65- 503s -- # 503 status responses served (Timeouts)
66- 3XXs -- # 300-399 status responses served (Redirection)
67- 4XXs -- # 400-499 status responses served (Client Errors)
68- 5XXs -- # 500-599 status responses served (Server Errors)
69- 6XXs -- # 600-600 status responses served (Internal Errors)
70+ 404s -- 404 status responses served (Not Found)
71+ 500s -- 500 status responses served (Unhandled exceptions)
72+ 503s -- 503 status responses served (Timeouts)
73+ 3XXs -- 300-399 status responses served (Redirection)
74+ 4XXs -- 400-499 status responses served (Client Errors)
75+ 5XXs -- 500-599 status responses served (Server Errors)
76+ 6XXs -- 600-600 status responses served (Internal Errors)
77+ 5XXs_b -- As 5XXs, but returned to browsers (not robots).
78 """
79 return OpStats.stats
80
81
82=== modified file 'lib/canonical/launchpad/webapp/publication.py'
83--- lib/canonical/launchpad/webapp/publication.py 2009-08-06 12:23:39 +0000
84+++ lib/canonical/launchpad/webapp/publication.py 2009-09-15 10:28:23 +0000
85@@ -10,6 +10,7 @@
86
87 import gc
88 import os
89+import re
90 import thread
91 import threading
92 import traceback
93@@ -596,7 +597,12 @@
94 OpStats.stats['503s'] += 1
95
96 # Increment counters for status code groups.
97- OpStats.stats[str(status)[0] + 'XXs'] += 1
98+ status_group = str(status)[0] + 'XXs'
99+ OpStats.stats[status_group] += 1
100+
101+ # Increment counter for 5XXs_b.
102+ if is_browser(request) and status_group == '5XXs':
103+ OpStats.stats['5XXs_b'] += 1
104
105 # Reset all Storm stores when not running the test suite. We could
106 # reset them when running the test suite but that'd make writing tests
107@@ -765,3 +771,30 @@
108
109 def __init__(self, context):
110 self.context = context
111+
112+
113+_browser_re = re.compile(r"""(?x)^(
114+ Mozilla |
115+ Opera |
116+ Lynx |
117+ Links |
118+ w3m
119+ )""")
120+
121+def is_browser(request):
122+ """Return True if we believe the request was from a browser.
123+
124+ There will be false positives and false negatives, as we can
125+ only tell this from the User-Agent: header and this cannot be
126+ trusted.
127+
128+ Almost all web browsers provide a User-Agent: header starting
129+ with 'Mozilla'. This is good enough for our uses. We also
130+ add a few other common matches as well for good measure.
131+ We could massage one of the user-agent databases that are
132+ available into a usable, but we would gain little.
133+ """
134+ user_agent = request.getHeader('User-Agent')
135+ return (
136+ user_agent is not None
137+ and _browser_re.search(user_agent) is not None)
138
139=== modified file 'lib/canonical/launchpad/webapp/tests/test_publication.py'
140--- lib/canonical/launchpad/webapp/tests/test_publication.py 2009-08-18 08:32:15 +0000
141+++ lib/canonical/launchpad/webapp/tests/test_publication.py 2009-09-15 10:28:23 +0000
142@@ -23,6 +23,7 @@
143 from lp.testing import TestCase, TestCaseWithFactory
144 import canonical.launchpad.webapp.adapter as da
145 from canonical.launchpad.webapp.interfaces import OAuthPermission
146+from canonical.launchpad.webapp.publication import is_browser
147 from canonical.launchpad.webapp.servers import (
148 LaunchpadTestRequest, WebServicePublication)
149
150@@ -93,6 +94,20 @@
151 # Ensure that it is different to the last logged OOPS.
152 self.assertNotEqual(repr(last_oops), repr(next_oops))
153
154+ def test_is_browser(self):
155+ # No User-Agent: header.
156+ request = LaunchpadTestRequest()
157+ self.assertFalse(is_browser(request))
158+
159+ # Browser User-Agent: header.
160+ request = LaunchpadTestRequest(environ={
161+ 'USER_AGENT': 'Mozilla/42 Extreme Edition'})
162+ self.assertTrue(is_browser(request))
163+
164+ # Robot User-Agent: header.
165+ request = LaunchpadTestRequest(environ={'USER_AGENT': 'BottyBot'})
166+ self.assertFalse(is_browser(request))
167+
168
169 def test_suite():
170 suite = unittest.TestLoader().loadTestsFromName(__name__)