retry.py revision 420216e5
1from __future__ import absolute_import
2import time
3import logging
4from collections import namedtuple
5from itertools import takewhile
6import email
7import re
8
9from ..exceptions import (
10    ConnectTimeoutError,
11    MaxRetryError,
12    ProtocolError,
13    ReadTimeoutError,
14    ResponseError,
15    InvalidHeader,
16)
17from ..packages import six
18
19
20log = logging.getLogger(__name__)
21
22# Data structure for representing the metadata of requests that result in a retry.
23RequestHistory = namedtuple('RequestHistory', ["method", "url", "error",
24                                               "status", "redirect_location"])
25
26
27class Retry(object):
28    """ Retry configuration.
29
30    Each retry attempt will create a new Retry object with updated values, so
31    they can be safely reused.
32
33    Retries can be defined as a default for a pool::
34
35        retries = Retry(connect=5, read=2, redirect=5)
36        http = PoolManager(retries=retries)
37        response = http.request('GET', 'http://example.com/')
38
39    Or per-request (which overrides the default for the pool)::
40
41        response = http.request('GET', 'http://example.com/', retries=Retry(10))
42
43    Retries can be disabled by passing ``False``::
44
45        response = http.request('GET', 'http://example.com/', retries=False)
46
47    Errors will be wrapped in :class:`~urllib3.exceptions.MaxRetryError` unless
48    retries are disabled, in which case the causing exception will be raised.
49
50    :param int total:
51        Total number of retries to allow. Takes precedence over other counts.
52
53        Set to ``None`` to remove this constraint and fall back on other
54        counts. It's a good idea to set this to some sensibly-high value to
55        account for unexpected edge cases and avoid infinite retry loops.
56
57        Set to ``0`` to fail on the first retry.
58
59        Set to ``False`` to disable and imply ``raise_on_redirect=False``.
60
61    :param int connect:
62        How many connection-related errors to retry on.
63
64        These are errors raised before the request is sent to the remote server,
65        which we assume has not triggered the server to process the request.
66
67        Set to ``0`` to fail on the first retry of this type.
68
69    :param int read:
70        How many times to retry on read errors.
71
72        These errors are raised after the request was sent to the server, so the
73        request may have side-effects.
74
75        Set to ``0`` to fail on the first retry of this type.
76
77    :param int redirect:
78        How many redirects to perform. Limit this to avoid infinite redirect
79        loops.
80
81        A redirect is a HTTP response with a status code 301, 302, 303, 307 or
82        308.
83
84        Set to ``0`` to fail on the first retry of this type.
85
86        Set to ``False`` to disable and imply ``raise_on_redirect=False``.
87
88    :param iterable method_whitelist:
89        Set of uppercased HTTP method verbs that we should retry on.
90
91        By default, we only retry on methods which are considered to be
92        idempotent (multiple requests with the same parameters end with the
93        same state). See :attr:`Retry.DEFAULT_METHOD_WHITELIST`.
94
95        Set to a ``False`` value to retry on any verb.
96
97    :param iterable status_forcelist:
98        A set of integer HTTP status codes that we should force a retry on.
99        A retry is initiated if the request method is in ``method_whitelist``
100        and the response status code is in ``status_forcelist``.
101
102        By default, this is disabled with ``None``.
103
104    :param float backoff_factor:
105        A backoff factor to apply between attempts after the second try
106        (most errors are resolved immediately by a second try without a
107        delay). urllib3 will sleep for::
108
109            {backoff factor} * (2 ^ ({number of total retries} - 1))
110
111        seconds. If the backoff_factor is 0.1, then :func:`.sleep` will sleep
112        for [0.0s, 0.2s, 0.4s, ...] between retries. It will never be longer
113        than :attr:`Retry.BACKOFF_MAX`.
114
115        By default, backoff is disabled (set to 0).
116
117    :param bool raise_on_redirect: Whether, if the number of redirects is
118        exhausted, to raise a MaxRetryError, or to return a response with a
119        response code in the 3xx range.
120
121    :param bool raise_on_status: Similar meaning to ``raise_on_redirect``:
122        whether we should raise an exception, or return a response,
123        if status falls in ``status_forcelist`` range and retries have
124        been exhausted.
125
126    :param tuple history: The history of the request encountered during
127        each call to :meth:`~Retry.increment`. The list is in the order
128        the requests occurred. Each list item is of class :class:`RequestHistory`.
129
130    :param bool respect_retry_after_header:
131        Whether to respect Retry-After header on status codes defined as
132        :attr:`Retry.RETRY_AFTER_STATUS_CODES` or not.
133
134    """
135
136    DEFAULT_METHOD_WHITELIST = frozenset([
137        'HEAD', 'GET', 'PUT', 'DELETE', 'OPTIONS', 'TRACE'])
138
139    RETRY_AFTER_STATUS_CODES = frozenset([413, 429, 503])
140
141    #: Maximum backoff time.
142    BACKOFF_MAX = 120
143
144    def __init__(self, total=10, connect=None, read=None, redirect=None,
145                 method_whitelist=DEFAULT_METHOD_WHITELIST, status_forcelist=None,
146                 backoff_factor=0, raise_on_redirect=True, raise_on_status=True,
147                 history=None, respect_retry_after_header=True):
148
149        self.total = total
150        self.connect = connect
151        self.read = read
152
153        if redirect is False or total is False:
154            redirect = 0
155            raise_on_redirect = False
156
157        self.redirect = redirect
158        self.status_forcelist = status_forcelist or set()
159        self.method_whitelist = method_whitelist
160        self.backoff_factor = backoff_factor
161        self.raise_on_redirect = raise_on_redirect
162        self.raise_on_status = raise_on_status
163        self.history = history or tuple()
164        self.respect_retry_after_header = respect_retry_after_header
165
166    def new(self, **kw):
167        params = dict(
168            total=self.total,
169            connect=self.connect, read=self.read, redirect=self.redirect,
170            method_whitelist=self.method_whitelist,
171            status_forcelist=self.status_forcelist,
172            backoff_factor=self.backoff_factor,
173            raise_on_redirect=self.raise_on_redirect,
174            raise_on_status=self.raise_on_status,
175            history=self.history,
176        )
177        params.update(kw)
178        return type(self)(**params)
179
180    @classmethod
181    def from_int(cls, retries, redirect=True, default=None):
182        """ Backwards-compatibility for the old retries format."""
183        if retries is None:
184            retries = default if default is not None else cls.DEFAULT
185
186        if isinstance(retries, Retry):
187            return retries
188
189        redirect = bool(redirect) and None
190        new_retries = cls(retries, redirect=redirect)
191        log.debug("Converted retries value: %r -> %r", retries, new_retries)
192        return new_retries
193
194    def get_backoff_time(self):
195        """ Formula for computing the current backoff
196
197        :rtype: float
198        """
199        # We want to consider only the last consecutive errors sequence (Ignore redirects).
200        consecutive_errors_len = len(list(takewhile(lambda x: x.redirect_location is None,
201                                                    reversed(self.history))))
202        if consecutive_errors_len <= 1:
203            return 0
204
205        backoff_value = self.backoff_factor * (2 ** (consecutive_errors_len - 1))
206        return min(self.BACKOFF_MAX, backoff_value)
207
208    def parse_retry_after(self, retry_after):
209        # Whitespace: https://tools.ietf.org/html/rfc7230#section-3.2.4
210        if re.match(r"^\s*[0-9]+\s*$", retry_after):
211            seconds = int(retry_after)
212        else:
213            retry_date_tuple = email.utils.parsedate(retry_after)
214            if retry_date_tuple is None:
215                raise InvalidHeader("Invalid Retry-After header: %s" % retry_after)
216            retry_date = time.mktime(retry_date_tuple)
217            seconds = retry_date - time.time()
218
219        if seconds < 0:
220            seconds = 0
221
222        return seconds
223
224    def get_retry_after(self, response):
225        """ Get the value of Retry-After in seconds. """
226
227        retry_after = response.getheader("Retry-After")
228
229        if retry_after is None:
230            return None
231
232        return self.parse_retry_after(retry_after)
233
234    def sleep_for_retry(self, response=None):
235        retry_after = self.get_retry_after(response)
236        if retry_after:
237            time.sleep(retry_after)
238            return True
239
240        return False
241
242    def _sleep_backoff(self):
243        backoff = self.get_backoff_time()
244        if backoff <= 0:
245            return
246        time.sleep(backoff)
247
248    def sleep(self, response=None):
249        """ Sleep between retry attempts.
250
251        This method will respect a server's ``Retry-After`` response header
252        and sleep the duration of the time requested. If that is not present, it
253        will use an exponential backoff. By default, the backoff factor is 0 and
254        this method will return immediately.
255        """
256
257        if response:
258            slept = self.sleep_for_retry(response)
259            if slept:
260                return
261
262        self._sleep_backoff()
263
264    def _is_connection_error(self, err):
265        """ Errors when we're fairly sure that the server did not receive the
266        request, so it should be safe to retry.
267        """
268        return isinstance(err, ConnectTimeoutError)
269
270    def _is_read_error(self, err):
271        """ Errors that occur after the request has been started, so we should
272        assume that the server began processing it.
273        """
274        return isinstance(err, (ReadTimeoutError, ProtocolError))
275
276    def _is_method_retryable(self, method):
277        """ Checks if a given HTTP method should be retried upon, depending if
278        it is included on the method whitelist.
279        """
280        if self.method_whitelist and method.upper() not in self.method_whitelist:
281            return False
282
283        return True
284
285    def is_retry(self, method, status_code, has_retry_after=False):
286        """ Is this method/status code retryable? (Based on whitelists and control
287        variables such as the number of total retries to allow, whether to
288        respect the Retry-After header, whether this header is present, and
289        whether the returned status code is on the list of status codes to
290        be retried upon on the presence of the aforementioned header)
291        """
292        if not self._is_method_retryable(method):
293            return False
294
295        if self.status_forcelist and status_code in self.status_forcelist:
296            return True
297
298        return (self.total and self.respect_retry_after_header and
299                has_retry_after and (status_code in self.RETRY_AFTER_STATUS_CODES))
300
301    def is_exhausted(self):
302        """ Are we out of retries? """
303        retry_counts = (self.total, self.connect, self.read, self.redirect)
304        retry_counts = list(filter(None, retry_counts))
305        if not retry_counts:
306            return False
307
308        return min(retry_counts) < 0
309
310    def increment(self, method=None, url=None, response=None, error=None,
311                  _pool=None, _stacktrace=None):
312        """ Return a new Retry object with incremented retry counters.
313
314        :param response: A response object, or None, if the server did not
315            return a response.
316        :type response: :class:`~urllib3.response.HTTPResponse`
317        :param Exception error: An error encountered during the request, or
318            None if the response was received successfully.
319
320        :return: A new ``Retry`` object.
321        """
322        if self.total is False and error:
323            # Disabled, indicate to re-raise the error.
324            raise six.reraise(type(error), error, _stacktrace)
325
326        total = self.total
327        if total is not None:
328            total -= 1
329
330        connect = self.connect
331        read = self.read
332        redirect = self.redirect
333        cause = 'unknown'
334        status = None
335        redirect_location = None
336
337        if error and self._is_connection_error(error):
338            # Connect retry?
339            if connect is False:
340                raise six.reraise(type(error), error, _stacktrace)
341            elif connect is not None:
342                connect -= 1
343
344        elif error and self._is_read_error(error):
345            # Read retry?
346            if read is False or not self._is_method_retryable(method):
347                raise six.reraise(type(error), error, _stacktrace)
348            elif read is not None:
349                read -= 1
350
351        elif response and response.get_redirect_location():
352            # Redirect retry?
353            if redirect is not None:
354                redirect -= 1
355            cause = 'too many redirects'
356            redirect_location = response.get_redirect_location()
357            status = response.status
358
359        else:
360            # Incrementing because of a server error like a 500 in
361            # status_forcelist and a the given method is in the whitelist
362            cause = ResponseError.GENERIC_ERROR
363            if response and response.status:
364                cause = ResponseError.SPECIFIC_ERROR.format(
365                    status_code=response.status)
366                status = response.status
367
368        history = self.history + (RequestHistory(method, url, error, status, redirect_location),)
369
370        new_retry = self.new(
371            total=total,
372            connect=connect, read=read, redirect=redirect,
373            history=history)
374
375        if new_retry.is_exhausted():
376            raise MaxRetryError(_pool, url, error or ResponseError(cause))
377
378        log.debug("Incremented Retry for (url='%s'): %r", url, new_retry)
379
380        return new_retry
381
382    def __repr__(self):
383        return ('{cls.__name__}(total={self.total}, connect={self.connect}, '
384                'read={self.read}, redirect={self.redirect})').format(
385                    cls=type(self), self=self)
386
387
388# For backwards compatibility (equivalent to pre-v1.9):
389Retry.DEFAULT = Retry(3)
390