1
=== modified file 'src/lazr/restfulclient/_browser.py'
2
--- src/lazr/restfulclient/_browser.py	2009-10-22 16:25:34 +0000
3
+++ src/lazr/restfulclient/_browser.py	2010-02-09 20:07:14 +0000
4
@@ -35,7 +35,7 @@
5
35
import shutil
35
import shutil
6
36
import tempfile
36
import tempfile
7
37
from httplib2 import (
37
from httplib2 import (
9
38
    FailedToDecompressContent, FileCache, Http, safename, urlnorm)
38
    FailedToDecompressContent, FileCache, Http, urlnorm)
10
39
import simplejson
39
import simplejson
11
40
from cStringIO import StringIO
40
from cStringIO import StringIO
12
41
import zlib
41
import zlib
13
@@ -68,6 +68,45 @@
14
68
            response, content)
68
            response, content)
15
69
    return content
69
    return content
16
70
70
17
71
# A drop-in replacement for httplib2's safename.
18
72
from httplib2 import _md5, re_url_scheme, re_slash
19
73
def safename(filename):
20
74
    """Return a filename suitable for the cache.
21
75
22
76
    Strips dangerous and common characters to create a filename we
23
77
    can use to store the cache in.
24
78
    """
25
79
26
80
    try:
27
81
        if re_url_scheme.match(filename):
28
82
            if isinstance(filename,str):
29
83
                filename = filename.decode('utf-8')
30
84
                filename = filename.encode('idna')
31
85
            else:
32
86
                filename = filename.encode('idna')
33
87
    except UnicodeError:
34
88
        pass
35
89
    if isinstance(filename,unicode):
36
90
        filename=filename.encode('utf-8')
37
91
    filemd5 = _md5(filename).hexdigest()
38
92
    filename = re_url_scheme.sub("", filename)
39
93
    filename = re_slash.sub(",", filename)
40
94
41
95
    # This is the part that we changed. In stock httplib2, the
42
96
    # filename is trimmed if it's longer than 200 characters, and then
43
97
    # a comma and a 32-character md5 sum are appended. This causes
44
98
    # problems on eCryptfs filesystems, where the maximum safe
45
99
    # filename length is closer to 150 characters. So we take 117 as
46
100
    # our limit (150-32-1) instead of 200.
47
101
    #
48
102
    # See:
49
103
    #  http://code.google.com/p/httplib2/issues/detail?id=92
50
104
    #  https://bugs.launchpad.net/bugs/344878
51
105
    #  https://bugs.launchpad.net/bugs/512832
52
106
    if len(filename)>117:
53
107
        filename=filename[:117]
54
108
    return ",".join((filename, filemd5))
55
109
56
71
110
57
72
class RestfulHttp(Http):
111
class RestfulHttp(Http):
58
73
    """An Http subclass with some custom behavior.
112
    """An Http subclass with some custom behavior.
59
74
113
60
=== modified file 'src/lazr/restfulclient/docs/caching.txt'
61
--- src/lazr/restfulclient/docs/caching.txt	2009-10-22 16:25:34 +0000
62
+++ src/lazr/restfulclient/docs/caching.txt	2010-02-09 20:07:14 +0000
63
@@ -112,3 +112,110 @@
64
112
    >>> httplib2.debuglevel = 0
112
    >>> httplib2.debuglevel = 0
65
113
    >>> import shutil
113
    >>> import shutil
66
114
    >>> shutil.rmtree(tempdir)
114
    >>> shutil.rmtree(tempdir)
67
115
68
116
Cache filenames
69
117
---------------
70
118
71
119
lazr.restfulclient caches HTTP repsonses in individual files named
72
120
after the URL accessed. This is behavior derived from httplib2, but
73
121
lazr.restfulclient does two things differently from httplib2.
74
122
75
123
To see these two things, let's set up a client that uses a temporary
76
124
directory as a cache file. The directory starts out empty.
77
125
78
126
    >>> from os import listdir
79
127
    >>> tempdir = tempfile.mkdtemp()
80
128
    >>> len(listdir(tempdir))
81
129
    0
82
130
83
131
As soon as we create a client object, though, lazr.restfulclient
84
132
fetches a JSON and a WADL representation of the service root, and
85
133
caches them individually.
86
134
87
135
    >>> service = CookbookWebServiceClient(cache=tempdir)
88
136
    >>> cache_contents = listdir(tempdir)
89
137
    >>> for file in sorted(cache_contents):
90
138
    ...     print file
91
139
    cookbooks.dev...application,json...
92
140
    cookbooks.dev...vnd.sun.wadl+xml...
93
141
94
142
This is the first difference between lazr.restfulclient's caching and
95
143
httplib2's. httplib2 would store all requests for the service root in
96
144
a filename based solely on the URL. This effectively limits httplib2
97
145
to a single representation of a given resource: the WADL
98
146
representation would be overwritten with the JSON
99
147
representation. lazr.restfulclient incorporates the media type in the
100
148
cache filename, so that WADL and JSON representations are stored
101
149
separately.
102
150
103
151
The second difference has to do with filename length limits. httplib2
104
152
caps filenames at about 240 characters so that cache files can be
105
153
stored on filesystems with 255-character filename length limits. For
106
154
compatibility with eCryptfs filesystems, lazr.restfulclient goes
107
155
further, and caps filenames at 150 characters.
108
156
109
157
To test out the limit, let's create a cookbook with an incredibly
110
158
long name.
111
159
112
160
    >>> long_name = (
113
161
    ...     "This cookbook name is amazingly long; so long that it will "
114
162
    ...     "surely be truncated when it is incorporated into a file "
115
163
    ...     "name for the cache. The cache file will contain a cached "
116
164
    ...     "HTTP respone containing a JSON representation of of this "
117
165
    ...     "cookbook, whose name, I repeat, is very long indeed.")
118
166
    >>> len(long_name)
119
167
    281
120
168
121
169
    >>> import datetime
122
170
    >>> date = datetime.datetime(1994, 1, 1)
123
171
    >>> book = service.cookbooks.create(
124
172
    ...     name=long_name, cuisine="General", copyright_date=date,
125
173
    ...     price=10.22, last_printing=date)
126
174
127
175
lazr.restfulclient automatically fetched a JSON representation of the
128
176
new cookbook, so it's already present in the cache. Because a
129
177
cookbook's URL incorporates its name, and this cookbook's name is
130
178
incredibly long, it must have been truncated to fit on disk.
131
179
132
180
    >>> [cookbook_cache_filename] = [file for file in listdir(tempdir)
133
181
    ...                              if 'amazingly' in file]
134
182
135
183
Indeed, the filename has been truncated to fit in the rough
136
184
150-character safety limit for eCryptfs filesystems.
137
185
138
186
    >>> len(cookbook_cache_filename)
139
187
    150
140
188
141
189
Despite the truncation, some of the useful information from the
142
190
cookbook's name makes it into the filename, making it easy to find when
143
191
manually crawling through the cache directory.
144
192
145
193
    >>> print cookbook_cache_filename
146
194
    cookbooks.dev...This%20cookbook%20name%20is%20amazingly%20long...
147
195
148
196
To avoid conflicts caused by truncation, the filename always ends with
149
197
an MD5 sum derived from the untruncated URL. Let's create a second
150
198
cookbook whose name differs from the first cookbook only at the end.
151
199
152
200
    >>> longer_name = long_name + ": The Sequel"
153
201
    >>> book = service.cookbooks.create(
154
202
    ...     name=longer_name, cuisine="General", copyright_date=date,
155
203
    ...     price=10.22, last_printing=date)
156
204
157
205
This cookbook's URL is identical to the first cookbook's URL for far
158
206
longer than 150 characters. But since the truncated filename
159
207
incorporates an MD5 sum based on the full URL, the two cookbooks are
160
208
cached in separate files.
161
209
162
210
    >>> [file1, file2] = [file for file in listdir(tempdir)
163
211
    ...                   if 'amazingly' in file]
164
212
165
213
The filenames are identical up to the last 32 characters, which is
166
214
where the MD5 sum begins. But because the MD5 sums are different, they
167
215
are not completely identical.
168
216
169
217
    >>> file1[:-32] == file2[:-32]
170
218
    True
171
219
172
220
    >>> file1 == file2
173
221
    False
Reviewer	Review Type	Date Requested	Status
Brad Crittenden (community)	code	2010-02-09	Approve on 2010-02-09
Review via email: mp+18951@code.staging.launchpad.net