]> projects.mako.cc - wikipedia-api-cdsw/blob - mwclient/http.py
15335d73f072aed3ad1453aacf1bfbfd2e41df1d
[wikipedia-api-cdsw] / mwclient / http.py
1 import urllib2
2 import urlparse
3 import httplib
4 import socket
5 import time
6
7 import upload
8 import errors
9
10 from client import __ver__
11
12
13 class CookieJar(dict):
14
15     def __init__(self):
16         dict.__init__(self, ())
17
18     def extract_cookies(self, response):
19         for cookie in response.msg.getallmatchingheaders('Set-Cookie'):
20             self.parse_cookie(cookie.strip())
21         if response.getheader('set-cookie2', None):
22             # ...
23             raise RuntimeError, 'Set-Cookie2', value
24
25     def parse_cookie(self, cookie):
26         if not cookie:
27             return
28         value, attrs = cookie.split(': ', 1)[1].split(';', 1)
29         i = value.strip().split('=')
30         if len(i) == 1 and i[0] in self:
31             del self[i[0]]
32         else:
33             self[i[0]] = i[1]
34
35     def get_cookie_header(self):
36         return '; '.join(('%s=%s' % i for i in self.iteritems()))
37
38     def __iter__(self):
39         for k, v in self.iteritems():
40             yield Cookie(k, v)
41
42
43 class Cookie(object):
44
45     def __init__(self, name, value):
46         self.name = name
47         self.value = value
48
49
50 class HTTPPersistentConnection(object):
51     http_class = httplib.HTTPConnection
52     scheme_name = 'http'
53     useragent = None
54
55     def __init__(self, host, pool=None, clients_useragent=None):
56         self._conn = self.http_class(host)
57         self._conn.connect()
58         self.last_request = time.time()
59         self.cookies = {}
60
61         self.pool = pool
62         if pool:
63             self.cookies = pool.cookies
64
65         clients_useragent = clients_useragent or ""
66         if clients_useragent != "":
67             clients_useragent += " "
68         self.useragent = clients_useragent + 'MwClient/' + __ver__ + ' (https://github.com/mwclient/mwclient)'
69
70     def request(self, method, host, path, headers, data,
71                 raise_on_not_ok=True, auto_redirect=True):
72
73         # Strip scheme
74         if type(host) is tuple:
75             host = host[1]
76
77         # Dirty hack...
78         if (time.time() - self.last_request) > 60:
79             self._conn.close()
80             self._conn.connect()
81
82         _headers = headers
83         headers = {}
84
85         headers['Connection'] = 'Keep-Alive'
86         headers['User-Agent'] = self.useragent
87         headers['Host'] = host
88         if host in self.cookies:
89             headers['Cookie'] = self.cookies[host].get_cookie_header()
90         if issubclass(data.__class__, upload.Upload):
91             headers['Content-Type'] = data.content_type
92             headers['Content-Length'] = str(data.length)
93         elif data:
94             headers['Content-Length'] = str(len(data))
95
96         if _headers:
97             headers.update(_headers)
98
99         try:
100             self._conn.request(method, path, headers=headers)
101             if issubclass(data.__class__, upload.Upload):
102                 for s in data:
103                     self._conn.send(s)
104             elif data:
105                 self._conn.send(data)
106
107             self.last_request = time.time()
108             try:
109                 res = self._conn.getresponse()
110             except httplib.BadStatusLine:
111                 self._conn.close()
112                 self._conn.connect()
113                 self._conn.request(method, path, data, headers)
114                 res = self._conn.getresponse()
115         except socket.error, e:
116             self._conn.close()
117             raise errors.HTTPError, e
118         # except Exception, e:
119         #   raise errors.HTTPError, e
120
121         if not host in self.cookies:
122             self.cookies[host] = CookieJar()
123         self.cookies[host].extract_cookies(res)
124
125         if res.status >= 300 and res.status <= 399 and auto_redirect:
126             res.read()
127
128             location = urlparse.urlparse(res.getheader('Location'))
129             if res.status in (302, 303):
130                 if 'Content-Type' in headers:
131                     del headers['Content-Type']
132                 if 'Content-Length' in headers:
133                     del headers['Content-Length']
134                 method = 'GET'
135                 data = ''
136             old_path = path
137             path = location[2]
138             if location[4]:
139                 path = path + '?' + location[4]
140
141             if location[0].lower() != self.scheme_name:
142                 raise errors.HTTPRedirectError, ('Only HTTP connections are supported',
143                                                  res.getheader('Location'))
144
145             if self.pool is None:
146                 if location[1] != host:
147                     raise errors.HTTPRedirectError, ('Redirecting to different hosts not supported',
148                                                      res.getheader('Location'))
149
150                 return self.request(method, host, path, headers, data)
151             else:
152                 if host == location[1] and path == old_path:
153                     conn = self.__class__(location[1], self.pool)
154                     self.pool.append(([location[1]], conn))
155                 return self.pool.request(method, location[1], path,
156                                          headers, data, raise_on_not_ok, auto_redirect)
157
158         if res.status != 200 and raise_on_not_ok:
159             try:
160                 raise errors.HTTPStatusError, (res.status, res)
161             finally:
162                 res.close()
163
164         return res
165
166     def get(self, host, path, headers=None):
167         return self.request('GET', host, path, headers, None)
168
169     def post(self, host, path, headers=None, data=None):
170         return self.request('POST', host, path, headers, data)
171
172     def head(self, host, path, headers=None, auto_redirect=False):
173         res = self.request('HEAD', host, path, headers,
174                            data=None, raise_on_not_ok=False,
175                            auto_redirect=auto_redirect)
176         res.read()
177         return res.status, res.getheaders()
178
179     def close(self):
180         self._conn.close()
181
182     def fileno(self):
183         return self._conn.sock.fileno()
184
185
186 class HTTPConnection(HTTPPersistentConnection):
187
188     def request(self, method, host, path, headers, data,
189                 raise_on_not_ok=True, auto_redirect=True):
190         if not headers:
191             headers = {}
192         headers['Connection'] = 'Close'
193         res = HTTPPersistentConnection.request(self, method, host, path, headers, data,
194                                                raise_on_not_ok, auto_redirect)
195         return res
196
197
198 class HTTPSPersistentConnection(HTTPPersistentConnection):
199     http_class = httplib.HTTPSConnection
200     scheme_name = 'https'
201
202
203 class HTTPPool(list):
204
205     def __init__(self, clients_useragent=None):
206         list.__init__(self)
207         self.cookies = {}
208         self.clients_useragent = clients_useragent
209
210     def find_connection(self, host, scheme='http'):
211         if type(host) is tuple:
212             scheme, host = host
213
214         for hosts, conn in self:
215             if (scheme, host) in hosts:
216                 return conn
217
218         redirected_host = None
219         for hosts, conn in self:
220             status, headers = conn.head(host, '/')
221             if status == 200:
222                 hosts.append((scheme, host))
223                 return conn
224             if status >= 300 and status <= 399:
225                 # BROKEN!
226                 headers = dict(headers)
227                 location = urlparse.urlparse(headers.get('location', ''))
228                 if (location[0], location[1]) == (scheme, host):
229                     hosts.append((scheme, host))
230                     return conn
231         if scheme == 'http':
232             cls = HTTPPersistentConnection
233         elif scheme == 'https':
234             cls = HTTPSPersistentConnection
235         else:
236             raise RuntimeError('Unsupported scheme', scheme)
237         conn = cls(host, self, self.clients_useragent)
238         self.append(([(scheme, host)], conn))
239         return conn
240
241     def get(self, host, path, headers=None):
242         return self.find_connection(host).get(host,
243                                               path, headers)
244
245     def post(self, host, path, headers=None, data=None):
246         return self.find_connection(host).post(host,
247                                                path, headers, data)
248
249     def head(self, host, path, headers=None, auto_redirect=False):
250         return self.find_connection(host).head(host,
251                                                path, headers, auto_redirect)
252
253     def request(self, method, host, path, headers, data,
254                 raise_on_not_ok, auto_redirect):
255         return self.find_connection(host).request(method, host, path,
256                                                   headers, data, raise_on_not_ok, auto_redirect)
257
258     def close(self):
259         for hosts, conn in self:
260             conn.close()

Benjamin Mako Hill || Want to submit a patch?