1from __future__ import absolute_import
2from contextlib import contextmanager
3import zlib
4import io
5import logging
6from socket import timeout as SocketTimeout
7from socket import error as SocketError
8
9from ._collections import HTTPHeaderDict
10from .exceptions import (
11    BodyNotHttplibCompatible, ProtocolError, DecodeError, ReadTimeoutError,
12    ResponseNotChunked, IncompleteRead, InvalidHeader
13)
14from .packages.six import string_types as basestring, binary_type, PY3
15from .packages.six.moves import http_client as httplib
16from .connection import HTTPException, BaseSSLError
17from .util.response import is_fp_closed, is_response_to_head
18
19log = logging.getLogger(__name__)
20
21
22class DeflateDecoder(object):
23
24    def __init__(self):
25        self._first_try = True
26        self._data = binary_type()
27        self._obj = zlib.decompressobj()
28
29    def __getattr__(self, name):
30        return getattr(self._obj, name)
31
32    def decompress(self, data):
33        if not data:
34            return data
35
36        if not self._first_try:
37            return self._obj.decompress(data)
38
39        self._data += data
40        try:
41            return self._obj.decompress(data)
42        except zlib.error:
43            self._first_try = False
44            self._obj = zlib.decompressobj(-zlib.MAX_WBITS)
45            try:
46                return self.decompress(self._data)
47            finally:
48                self._data = None
49
50
51class GzipDecoder(object):
52
53    def __init__(self):
54        self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
55
56    def __getattr__(self, name):
57        return getattr(self._obj, name)
58
59    def decompress(self, data):
60        if not data:
61            return data
62        return self._obj.decompress(data)
63
64
65def _get_decoder(mode):
66    if mode == 'gzip':
67        return GzipDecoder()
68
69    return DeflateDecoder()
70
71
72class HTTPResponse(io.IOBase):
73    """
74    HTTP Response container.
75
76    Backwards-compatible to httplib's HTTPResponse but the response ``body`` is
77    loaded and decoded on-demand when the ``data`` property is accessed.  This
78    class is also compatible with the Python standard library's :mod:`io`
79    module, and can hence be treated as a readable object in the context of that
80    framework.
81
82    Extra parameters for behaviour not present in httplib.HTTPResponse:
83
84    :param preload_content:
85        If True, the response's body will be preloaded during construction.
86
87    :param decode_content:
88        If True, attempts to decode specific content-encoding's based on headers
89        (like 'gzip' and 'deflate') will be skipped and raw data will be used
90        instead.
91
92    :param original_response:
93        When this HTTPResponse wrapper is generated from an httplib.HTTPResponse
94        object, it's convenient to include the original for debug purposes. It's
95        otherwise unused.
96
97    :param retries:
98        The retries contains the last :class:`~urllib3.util.retry.Retry` that
99        was used during the request.
100
101    :param enforce_content_length:
102        Enforce content length checking. Body returned by server must match
103        value of Content-Length header, if present. Otherwise, raise error.
104    """
105
106    CONTENT_DECODERS = ['gzip', 'deflate']
107    REDIRECT_STATUSES = [301, 302, 303, 307, 308]
108
109    def __init__(self, body='', headers=None, status=0, version=0, reason=None,
110                 strict=0, preload_content=True, decode_content=True,
111                 original_response=None, pool=None, connection=None,
112                 retries=None, enforce_content_length=False, request_method=None):
113
114        if isinstance(headers, HTTPHeaderDict):
115            self.headers = headers
116        else:
117            self.headers = HTTPHeaderDict(headers)
118        self.status = status
119        self.version = version
120        self.reason = reason
121        self.strict = strict
122        self.decode_content = decode_content
123        self.retries = retries
124        self.enforce_content_length = enforce_content_length
125
126        self._decoder = None
127        self._body = None
128        self._fp = None
129        self._original_response = original_response
130        self._fp_bytes_read = 0
131
132        if body and isinstance(body, (basestring, binary_type)):
133            self._body = body
134
135        self._pool = pool
136        self._connection = connection
137
138        if hasattr(body, 'read'):
139            self._fp = body
140
141        # Are we using the chunked-style of transfer encoding?
142        self.chunked = False
143        self.chunk_left = None
144        tr_enc = self.headers.get('transfer-encoding', '').lower()
145        # Don't incur the penalty of creating a list and then discarding it
146        encodings = (enc.strip() for enc in tr_enc.split(","))
147        if "chunked" in encodings:
148            self.chunked = True
149
150        # Determine length of response
151        self.length_remaining = self._init_length(request_method)
152
153        # If requested, preload the body.
154        if preload_content and not self._body:
155            self._body = self.read(decode_content=decode_content)
156
157    def get_redirect_location(self):
158        """
159        Should we redirect and where to?
160
161        :returns: Truthy redirect location string if we got a redirect status
162            code and valid location. ``None`` if redirect status and no
163            location. ``False`` if not a redirect status code.
164        """
165        if self.status in self.REDIRECT_STATUSES:
166            return self.headers.get('location')
167
168        return False
169
170    def release_conn(self):
171        if not self._pool or not self._connection:
172            return
173
174        self._pool._put_conn(self._connection)
175        self._connection = None
176
177    @property
178    def data(self):
179        # For backwords-compat with earlier urllib3 0.4 and earlier.
180        if self._body:
181            return self._body
182
183        if self._fp:
184            return self.read(cache_content=True)
185
186    @property
187    def connection(self):
188        return self._connection
189
190    def tell(self):
191        """
192        Obtain the number of bytes pulled over the wire so far. May differ from
193        the amount of content returned by :meth:``HTTPResponse.read`` if bytes
194        are encoded on the wire (e.g, compressed).
195        """
196        return self._fp_bytes_read
197
198    def _init_length(self, request_method):
199        """
200        Set initial length value for Response content if available.
201        """
202        length = self.headers.get('content-length')
203
204        if length is not None and self.chunked:
205            # This Response will fail with an IncompleteRead if it can't be
206            # received as chunked. This method falls back to attempt reading
207            # the response before raising an exception.
208            log.warning("Received response with both Content-Length and "
209                        "Transfer-Encoding set. This is expressly forbidden "
210                        "by RFC 7230 sec 3.3.2. Ignoring Content-Length and "
211                        "attempting to process response as Transfer-Encoding: "
212                        "chunked.")
213            return None
214
215        elif length is not None:
216            try:
217                # RFC 7230 section 3.3.2 specifies multiple content lengths can
218                # be sent in a single Content-Length header
219                # (e.g. Content-Length: 42, 42). This line ensures the values
220                # are all valid ints and that as long as the `set` length is 1,
221                # all values are the same. Otherwise, the header is invalid.
222                lengths = set([int(val) for val in length.split(',')])
223                if len(lengths) > 1:
224                    raise InvalidHeader("Content-Length contained multiple "
225                                        "unmatching values (%s)" % length)
226                length = lengths.pop()
227            except ValueError:
228                length = None
229            else:
230                if length < 0:
231                    length = None
232
233        # Convert status to int for comparison
234        # In some cases, httplib returns a status of "_UNKNOWN"
235        try:
236            status = int(self.status)
237        except ValueError:
238            status = 0
239
240        # Check for responses that shouldn't include a body
241        if status in (204, 304) or 100 <= status < 200 or request_method == 'HEAD':
242            length = 0
243
244        return length
245
246    def _init_decoder(self):
247        """
248        Set-up the _decoder attribute if necessary.
249        """
250        # Note: content-encoding value should be case-insensitive, per RFC 7230
251        # Section 3.2
252        content_encoding = self.headers.get('content-encoding', '').lower()
253        if self._decoder is None and content_encoding in self.CONTENT_DECODERS:
254            self._decoder = _get_decoder(content_encoding)
255
256    def _decode(self, data, decode_content, flush_decoder):
257        """
258        Decode the data passed in and potentially flush the decoder.
259        """
260        try:
261            if decode_content and self._decoder:
262                data = self._decoder.decompress(data)
263        except (IOError, zlib.error) as e:
264            content_encoding = self.headers.get('content-encoding', '').lower()
265            raise DecodeError(
266                "Received response with content-encoding: %s, but "
267                "failed to decode it." % content_encoding, e)
268
269        if flush_decoder and decode_content:
270            data += self._flush_decoder()
271
272        return data
273
274    def _flush_decoder(self):
275        """
276        Flushes the decoder. Should only be called if the decoder is actually
277        being used.
278        """
279        if self._decoder:
280            buf = self._decoder.decompress(b'')
281            return buf + self._decoder.flush()
282
283        return b''
284
285    @contextmanager
286    def _error_catcher(self):
287        """
288        Catch low-level python exceptions, instead re-raising urllib3
289        variants, so that low-level exceptions are not leaked in the
290        high-level api.
291
292        On exit, release the connection back to the pool.
293        """
294        clean_exit = False
295
296        try:
297            try:
298                yield
299
300            except SocketTimeout:
301                # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but
302                # there is yet no clean way to get at it from this context.
303                raise ReadTimeoutError(self._pool, None, 'Read timed out.')
304
305            except BaseSSLError as e:
306                # FIXME: Is there a better way to differentiate between SSLErrors?
307                if 'read operation timed out' not in str(e):  # Defensive:
308                    # This shouldn't happen but just in case we're missing an edge
309                    # case, let's avoid swallowing SSL errors.
310                    raise
311
312                raise ReadTimeoutError(self._pool, None, 'Read timed out.')
313
314            except (HTTPException, SocketError) as e:
315                # This includes IncompleteRead.
316                raise ProtocolError('Connection broken: %r' % e, e)
317
318            # If no exception is thrown, we should avoid cleaning up
319            # unnecessarily.
320            clean_exit = True
321        finally:
322            # If we didn't terminate cleanly, we need to throw away our
323            # connection.
324            if not clean_exit:
325                # The response may not be closed but we're not going to use it
326                # anymore so close it now to ensure that the connection is
327                # released back to the pool.
328                if self._original_response:
329                    self._original_response.close()
330
331                # Closing the response may not actually be sufficient to close
332                # everything, so if we have a hold of the connection close that
333                # too.
334                if self._connection:
335                    self._connection.close()
336
337            # If we hold the original response but it's closed now, we should
338            # return the connection back to the pool.
339            if self._original_response and self._original_response.isclosed():
340                self.release_conn()
341
342    def read(self, amt=None, decode_content=None, cache_content=False):
343        """
344        Similar to :meth:`httplib.HTTPResponse.read`, but with two additional
345        parameters: ``decode_content`` and ``cache_content``.
346
347        :param amt:
348            How much of the content to read. If specified, caching is skipped
349            because it doesn't make sense to cache partial content as the full
350            response.
351
352        :param decode_content:
353            If True, will attempt to decode the body based on the
354            'content-encoding' header.
355
356        :param cache_content:
357            If True, will save the returned data such that the same result is
358            returned despite of the state of the underlying file object. This
359            is useful if you want the ``.data`` property to continue working
360            after having ``.read()`` the file object. (Overridden if ``amt`` is
361            set.)
362        """
363        self._init_decoder()
364        if decode_content is None:
365            decode_content = self.decode_content
366
367        if self._fp is None:
368            return
369
370        flush_decoder = False
371        data = None
372
373        with self._error_catcher():
374            if amt is None:
375                # cStringIO doesn't like amt=None
376                data = self._fp.read()
377                flush_decoder = True
378            else:
379                cache_content = False
380                data = self._fp.read(amt)
381                if amt != 0 and not data:  # Platform-specific: Buggy versions of Python.
382                    # Close the connection when no data is returned
383                    #
384                    # This is redundant to what httplib/http.client _should_
385                    # already do.  However, versions of python released before
386                    # December 15, 2012 (http://bugs.python.org/issue16298) do
387                    # not properly close the connection in all cases. There is
388                    # no harm in redundantly calling close.
389                    self._fp.close()
390                    flush_decoder = True
391                    if self.enforce_content_length and self.length_remaining not in (0, None):
392                        # This is an edge case that httplib failed to cover due
393                        # to concerns of backward compatibility. We're
394                        # addressing it here to make sure IncompleteRead is
395                        # raised during streaming, so all calls with incorrect
396                        # Content-Length are caught.
397                        raise IncompleteRead(self._fp_bytes_read, self.length_remaining)
398
399        if data:
400            self._fp_bytes_read += len(data)
401            if self.length_remaining is not None:
402                self.length_remaining -= len(data)
403
404            data = self._decode(data, decode_content, flush_decoder)
405
406            if cache_content:
407                self._body = data
408
409        return data
410
411    def stream(self, amt=2**16, decode_content=None):
412        """
413        A generator wrapper for the read() method. A call will block until
414        ``amt`` bytes have been read from the connection or until the
415        connection is closed.
416
417        :param amt:
418            How much of the content to read. The generator will return up to
419            much data per iteration, but may return less. This is particularly
420            likely when using compressed data. However, the empty string will
421            never be returned.
422
423        :param decode_content:
424            If True, will attempt to decode the body based on the
425            'content-encoding' header.
426        """
427        if self.chunked and self.supports_chunked_reads():
428            for line in self.read_chunked(amt, decode_content=decode_content):
429                yield line
430        else:
431            while not is_fp_closed(self._fp):
432                data = self.read(amt=amt, decode_content=decode_content)
433
434                if data:
435                    yield data
436
437    @classmethod
438    def from_httplib(ResponseCls, r, **response_kw):
439        """
440        Given an :class:`httplib.HTTPResponse` instance ``r``, return a
441        corresponding :class:`urllib3.response.HTTPResponse` object.
442
443        Remaining parameters are passed to the HTTPResponse constructor, along
444        with ``original_response=r``.
445        """
446        headers = r.msg
447
448        if not isinstance(headers, HTTPHeaderDict):
449            if PY3:  # Python 3
450                headers = HTTPHeaderDict(headers.items())
451            else:  # Python 2
452                headers = HTTPHeaderDict.from_httplib(headers)
453
454        # HTTPResponse objects in Python 3 don't have a .strict attribute
455        strict = getattr(r, 'strict', 0)
456        resp = ResponseCls(body=r,
457                           headers=headers,
458                           status=r.status,
459                           version=r.version,
460                           reason=r.reason,
461                           strict=strict,
462                           original_response=r,
463                           **response_kw)
464        return resp
465
466    # Backwards-compatibility methods for httplib.HTTPResponse
467    def getheaders(self):
468        return self.headers
469
470    def getheader(self, name, default=None):
471        return self.headers.get(name, default)
472
473    # Overrides from io.IOBase
474    def close(self):
475        if not self.closed:
476            self._fp.close()
477
478        if self._connection:
479            self._connection.close()
480
481    @property
482    def closed(self):
483        if self._fp is None:
484            return True
485        elif hasattr(self._fp, 'isclosed'):
486            return self._fp.isclosed()
487        elif hasattr(self._fp, 'closed'):
488            return self._fp.closed
489        else:
490            return True
491
492    def fileno(self):
493        if self._fp is None:
494            raise IOError("HTTPResponse has no file to get a fileno from")
495        elif hasattr(self._fp, "fileno"):
496            return self._fp.fileno()
497        else:
498            raise IOError("The file-like object this HTTPResponse is wrapped "
499                          "around has no file descriptor")
500
501    def flush(self):
502        if self._fp is not None and hasattr(self._fp, 'flush'):
503            return self._fp.flush()
504
505    def readable(self):
506        # This method is required for `io` module compatibility.
507        return True
508
509    def readinto(self, b):
510        # This method is required for `io` module compatibility.
511        temp = self.read(len(b))
512        if len(temp) == 0:
513            return 0
514        else:
515            b[:len(temp)] = temp
516            return len(temp)
517
518    def supports_chunked_reads(self):
519        """
520        Checks if the underlying file-like object looks like a
521        httplib.HTTPResponse object. We do this by testing for the fp
522        attribute. If it is present we assume it returns raw chunks as
523        processed by read_chunked().
524        """
525        return hasattr(self._fp, 'fp')
526
527    def _update_chunk_length(self):
528        # First, we'll figure out length of a chunk and then
529        # we'll try to read it from socket.
530        if self.chunk_left is not None:
531            return
532        line = self._fp.fp.readline()
533        line = line.split(b';', 1)[0]
534        try:
535            self.chunk_left = int(line, 16)
536        except ValueError:
537            # Invalid chunked protocol response, abort.
538            self.close()
539            raise httplib.IncompleteRead(line)
540
541    def _handle_chunk(self, amt):
542        returned_chunk = None
543        if amt is None:
544            chunk = self._fp._safe_read(self.chunk_left)
545            returned_chunk = chunk
546            self._fp._safe_read(2)  # Toss the CRLF at the end of the chunk.
547            self.chunk_left = None
548        elif amt < self.chunk_left:
549            value = self._fp._safe_read(amt)
550            self.chunk_left = self.chunk_left - amt
551            returned_chunk = value
552        elif amt == self.chunk_left:
553            value = self._fp._safe_read(amt)
554            self._fp._safe_read(2)  # Toss the CRLF at the end of the chunk.
555            self.chunk_left = None
556            returned_chunk = value
557        else:  # amt > self.chunk_left
558            returned_chunk = self._fp._safe_read(self.chunk_left)
559            self._fp._safe_read(2)  # Toss the CRLF at the end of the chunk.
560            self.chunk_left = None
561        return returned_chunk
562
563    def read_chunked(self, amt=None, decode_content=None):
564        """
565        Similar to :meth:`HTTPResponse.read`, but with an additional
566        parameter: ``decode_content``.
567
568        :param decode_content:
569            If True, will attempt to decode the body based on the
570            'content-encoding' header.
571        """
572        self._init_decoder()
573        # FIXME: Rewrite this method and make it a class with a better structured logic.
574        if not self.chunked:
575            raise ResponseNotChunked(
576                "Response is not chunked. "
577                "Header 'transfer-encoding: chunked' is missing.")
578        if not self.supports_chunked_reads():
579            raise BodyNotHttplibCompatible(
580                "Body should be httplib.HTTPResponse like. "
581                "It should have have an fp attribute which returns raw chunks.")
582
583        # Don't bother reading the body of a HEAD request.
584        if self._original_response and is_response_to_head(self._original_response):
585            self._original_response.close()
586            return
587
588        with self._error_catcher():
589            while True:
590                self._update_chunk_length()
591                if self.chunk_left == 0:
592                    break
593                chunk = self._handle_chunk(amt)
594                decoded = self._decode(chunk, decode_content=decode_content,
595                                       flush_decoder=False)
596                if decoded:
597                    yield decoded
598
599            if decode_content:
600                # On CPython and PyPy, we should never need to flush the
601                # decoder. However, on Jython we *might* need to, so
602                # lets defensively do it anyway.
603                decoded = self._flush_decoder()
604                if decoded:  # Platform-specific: Jython.
605                    yield decoded
606
607            # Chunk content ends with \r\n: discard it.
608            while True:
609                line = self._fp.fp.readline()
610                if not line:
611                    # Some sites may not end with '\r\n'.
612                    break
613                if line == b'\r\n':
614                    break
615
616            # We read everything; close the "file".
617            if self._original_response:
618                self._original_response.close()
619