1from __future__ import absolute_import
2from collections import namedtuple
3
4from ..exceptions import LocationParseError
5
6
7url_attrs = ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment']
8
9
10class Url(namedtuple('Url', url_attrs)):
11    """
12    Datastructure for representing an HTTP URL. Used as a return value for
13    :func:`parse_url`. Both the scheme and host are normalized as they are
14    both case-insensitive according to RFC 3986.
15    """
16    __slots__ = ()
17
18    def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None,
19                query=None, fragment=None):
20        if path and not path.startswith('/'):
21            path = '/' + path
22        if scheme:
23            scheme = scheme.lower()
24        if host:
25            host = host.lower()
26        return super(Url, cls).__new__(cls, scheme, auth, host, port, path,
27                                       query, fragment)
28
29    @property
30    def hostname(self):
31        """For backwards-compatibility with urlparse. We're nice like that."""
32        return self.host
33
34    @property
35    def request_uri(self):
36        """Absolute path including the query string."""
37        uri = self.path or '/'
38
39        if self.query is not None:
40            uri += '?' + self.query
41
42        return uri
43
44    @property
45    def netloc(self):
46        """Network location including host and port"""
47        if self.port:
48            return '%s:%d' % (self.host, self.port)
49        return self.host
50
51    @property
52    def url(self):
53        """
54        Convert self into a url
55
56        This function should more or less round-trip with :func:`.parse_url`. The
57        returned url may not be exactly the same as the url inputted to
58        :func:`.parse_url`, but it should be equivalent by the RFC (e.g., urls
59        with a blank port will have : removed).
60
61        Example: ::
62
63            >>> U = parse_url('http://google.com/mail/')
64            >>> U.url
65            'http://google.com/mail/'
66            >>> Url('http', 'username:password', 'host.com', 80,
67            ... '/path', 'query', 'fragment').url
68            'http://username:password@host.com:80/path?query#fragment'
69        """
70        scheme, auth, host, port, path, query, fragment = self
71        url = ''
72
73        # We use "is not None" we want things to happen with empty strings (or 0 port)
74        if scheme is not None:
75            url += scheme + '://'
76        if auth is not None:
77            url += auth + '@'
78        if host is not None:
79            url += host
80        if port is not None:
81            url += ':' + str(port)
82        if path is not None:
83            url += path
84        if query is not None:
85            url += '?' + query
86        if fragment is not None:
87            url += '#' + fragment
88
89        return url
90
91    def __str__(self):
92        return self.url
93
94
95def split_first(s, delims):
96    """
97    Given a string and an iterable of delimiters, split on the first found
98    delimiter. Return two split parts and the matched delimiter.
99
100    If not found, then the first part is the full input string.
101
102    Example::
103
104        >>> split_first('foo/bar?baz', '?/=')
105        ('foo', 'bar?baz', '/')
106        >>> split_first('foo/bar?baz', '123')
107        ('foo/bar?baz', '', None)
108
109    Scales linearly with number of delims. Not ideal for large number of delims.
110    """
111    min_idx = None
112    min_delim = None
113    for d in delims:
114        idx = s.find(d)
115        if idx < 0:
116            continue
117
118        if min_idx is None or idx < min_idx:
119            min_idx = idx
120            min_delim = d
121
122    if min_idx is None or min_idx < 0:
123        return s, '', None
124
125    return s[:min_idx], s[min_idx + 1:], min_delim
126
127
128def parse_url(url):
129    """
130    Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is
131    performed to parse incomplete urls. Fields not provided will be None.
132
133    Partly backwards-compatible with :mod:`urlparse`.
134
135    Example::
136
137        >>> parse_url('http://google.com/mail/')
138        Url(scheme='http', host='google.com', port=None, path='/mail/', ...)
139        >>> parse_url('google.com:80')
140        Url(scheme=None, host='google.com', port=80, path=None, ...)
141        >>> parse_url('/foo?bar')
142        Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...)
143    """
144
145    # While this code has overlap with stdlib's urlparse, it is much
146    # simplified for our needs and less annoying.
147    # Additionally, this implementations does silly things to be optimal
148    # on CPython.
149
150    if not url:
151        # Empty
152        return Url()
153
154    scheme = None
155    auth = None
156    host = None
157    port = None
158    path = None
159    fragment = None
160    query = None
161
162    # Scheme
163    if '://' in url:
164        scheme, url = url.split('://', 1)
165
166    # Find the earliest Authority Terminator
167    # (http://tools.ietf.org/html/rfc3986#section-3.2)
168    url, path_, delim = split_first(url, ['/', '?', '#'])
169
170    if delim:
171        # Reassemble the path
172        path = delim + path_
173
174    # Auth
175    if '@' in url:
176        # Last '@' denotes end of auth part
177        auth, url = url.rsplit('@', 1)
178
179    # IPv6
180    if url and url[0] == '[':
181        host, url = url.split(']', 1)
182        host += ']'
183
184    # Port
185    if ':' in url:
186        _host, port = url.split(':', 1)
187
188        if not host:
189            host = _host
190
191        if port:
192            # If given, ports must be integers. No whitespace, no plus or
193            # minus prefixes, no non-integer digits such as ^2 (superscript).
194            if not port.isdigit():
195                raise LocationParseError(url)
196            try:
197                port = int(port)
198            except ValueError:
199                raise LocationParseError(url)
200        else:
201            # Blank ports are cool, too. (rfc3986#section-3.2.3)
202            port = None
203
204    elif not host and url:
205        host = url
206
207    if not path:
208        return Url(scheme, auth, host, port, path, query, fragment)
209
210    # Fragment
211    if '#' in path:
212        path, fragment = path.split('#', 1)
213
214    # Query
215    if '?' in path:
216        path, query = path.split('?', 1)
217
218    return Url(scheme, auth, host, port, path, query, fragment)
219
220
221def get_host(url):
222    """
223    Deprecated. Use :func:`parse_url` instead.
224    """
225    p = parse_url(url)
226    return p.scheme or 'http', p.hostname, p.port
227