Staging
v0.5.1
v0.5.1
https://github.com/python/cpython
Revision 34bfde919e104e34eaa4135867aa2947510623b7 authored by Kurt B. Kaiser on 24 November 2003, 02:34:01 UTC, committed by Kurt B. Kaiser on 24 November 2003, 02:34:01 UTC
- After an exception, run.py was not setting the exception vector. Noam Raphael suggested correcting this so pdb's postmortem pm() would work. IDLEfork Patch 844675 Update NEWS and include some items missed in IDLE1.0b2. Bump the version. Modified Files: Tag: release23-maint NEWS.txt idlever.py run.py
1 parent c9de825
Tip revision: 34bfde919e104e34eaa4135867aa2947510623b7 authored by Kurt B. Kaiser on 24 November 2003, 02:34:01 UTC
Backport:
Backport:
Tip revision: 34bfde9
urlparse.py
"""Parse (absolute and relative) URLs.
See RFC 1808: "Relative Uniform Resource Locators", by R. Fielding,
UC Irvine, June 1995.
"""
__all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag",
"urlsplit", "urlunsplit"]
# A classification of schemes ('' means apply by default)
uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'imap',
'wais', 'file', 'https', 'shttp', 'mms',
'prospero', 'rtsp', 'rtspu', '']
uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet',
'imap', 'wais', 'file', 'mms', 'https', 'shttp',
'snews', 'prospero', 'rtsp', 'rtspu', '']
non_hierarchical = ['gopher', 'hdl', 'mailto', 'news',
'telnet', 'wais', 'imap', 'snews', 'sip']
uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap',
'https', 'shttp', 'rtsp', 'rtspu', 'sip',
'mms', '']
uses_query = ['http', 'wais', 'imap', 'https', 'shttp', 'mms',
'gopher', 'rtsp', 'rtspu', 'sip', '']
uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news',
'nntp', 'wais', 'https', 'shttp', 'snews',
'file', 'prospero', '']
# Characters valid in scheme names
scheme_chars = ('abcdefghijklmnopqrstuvwxyz'
'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
'0123456789'
'+-.')
MAX_CACHE_SIZE = 20
_parse_cache = {}
def clear_cache():
"""Clear the parse cache."""
global _parse_cache
_parse_cache = {}
def urlparse(url, scheme='', allow_fragments=1):
"""Parse a URL into 6 components:
<scheme>://<netloc>/<path>;<params>?<query>#<fragment>
Return a 6-tuple: (scheme, netloc, path, params, query, fragment).
Note that we don't break the components up in smaller bits
(e.g. netloc is a single string) and we don't expand % escapes."""
tuple = urlsplit(url, scheme, allow_fragments)
scheme, netloc, url, query, fragment = tuple
if scheme in uses_params and ';' in url:
url, params = _splitparams(url)
else:
params = ''
return scheme, netloc, url, params, query, fragment
def _splitparams(url):
if '/' in url:
i = url.find(';', url.rfind('/'))
if i < 0:
return url, ''
else:
i = url.find(';')
return url[:i], url[i+1:]
def urlsplit(url, scheme='', allow_fragments=1):
"""Parse a URL into 5 components:
<scheme>://<netloc>/<path>?<query>#<fragment>
Return a 5-tuple: (scheme, netloc, path, query, fragment).
Note that we don't break the components up in smaller bits
(e.g. netloc is a single string) and we don't expand % escapes."""
key = url, scheme, allow_fragments
cached = _parse_cache.get(key, None)
if cached:
return cached
if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth
clear_cache()
netloc = query = fragment = ''
i = url.find(':')
if i > 0:
if url[:i] == 'http': # optimize the common case
scheme = url[:i].lower()
url = url[i+1:]
if url[:2] == '//':
i = url.find('/', 2)
if i < 0:
i = url.find('#')
if i < 0:
i = len(url)
netloc = url[2:i]
url = url[i:]
if allow_fragments and '#' in url:
url, fragment = url.split('#', 1)
if '?' in url:
url, query = url.split('?', 1)
tuple = scheme, netloc, url, query, fragment
_parse_cache[key] = tuple
return tuple
for c in url[:i]:
if c not in scheme_chars:
break
else:
scheme, url = url[:i].lower(), url[i+1:]
if scheme in uses_netloc:
if url[:2] == '//':
i = url.find('/', 2)
if i < 0:
i = len(url)
netloc, url = url[2:i], url[i:]
if allow_fragments and scheme in uses_fragment and '#' in url:
url, fragment = url.split('#', 1)
if scheme in uses_query and '?' in url:
url, query = url.split('?', 1)
tuple = scheme, netloc, url, query, fragment
_parse_cache[key] = tuple
return tuple
def urlunparse((scheme, netloc, url, params, query, fragment)):
"""Put a parsed URL back together again. This may result in a
slightly different, but equivalent URL, if the URL that was parsed
originally had redundant delimiters, e.g. a ? with an empty query
(the draft states that these are equivalent)."""
if params:
url = "%s;%s" % (url, params)
return urlunsplit((scheme, netloc, url, query, fragment))
def urlunsplit((scheme, netloc, url, query, fragment)):
if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'):
if url and url[:1] != '/': url = '/' + url
url = '//' + (netloc or '') + url
if scheme:
url = scheme + ':' + url
if query:
url = url + '?' + query
if fragment:
url = url + '#' + fragment
return url
def urljoin(base, url, allow_fragments = 1):
"""Join a base URL and a possibly relative URL to form an absolute
interpretation of the latter."""
if not base:
return url
if not url:
return base
bscheme, bnetloc, bpath, bparams, bquery, bfragment = \
urlparse(base, '', allow_fragments)
scheme, netloc, path, params, query, fragment = \
urlparse(url, bscheme, allow_fragments)
if scheme != bscheme or scheme not in uses_relative:
return url
if scheme in uses_netloc:
if netloc:
return urlunparse((scheme, netloc, path,
params, query, fragment))
netloc = bnetloc
if path[:1] == '/':
return urlunparse((scheme, netloc, path,
params, query, fragment))
if not path:
if not params:
params = bparams
if not query:
query = bquery
return urlunparse((scheme, netloc, bpath,
params, query, fragment))
segments = bpath.split('/')[:-1] + path.split('/')
# XXX The stuff below is bogus in various ways...
if segments[-1] == '.':
segments[-1] = ''
while '.' in segments:
segments.remove('.')
while 1:
i = 1
n = len(segments) - 1
while i < n:
if (segments[i] == '..'
and segments[i-1] not in ('', '..')):
del segments[i-1:i+1]
break
i = i+1
else:
break
if segments == ['', '..']:
segments[-1] = ''
elif len(segments) >= 2 and segments[-1] == '..':
segments[-2:] = ['']
return urlunparse((scheme, netloc, '/'.join(segments),
params, query, fragment))
def urldefrag(url):
"""Removes any existing fragment from URL.
Returns a tuple of the defragmented URL and the fragment. If
the URL contained no fragments, the second element is the
empty string.
"""
if '#' in url:
s, n, p, a, q, frag = urlparse(url)
defrag = urlunparse((s, n, p, a, q, ''))
return defrag, frag
else:
return url, ''
test_input = """
http://a/b/c/d
g:h = <URL:g:h>
http:g = <URL:http://a/b/c/g>
http: = <URL:http://a/b/c/d>
g = <URL:http://a/b/c/g>
./g = <URL:http://a/b/c/g>
g/ = <URL:http://a/b/c/g/>
/g = <URL:http://a/g>
//g = <URL:http://g>
?y = <URL:http://a/b/c/d?y>
g?y = <URL:http://a/b/c/g?y>
g?y/./x = <URL:http://a/b/c/g?y/./x>
. = <URL:http://a/b/c/>
./ = <URL:http://a/b/c/>
.. = <URL:http://a/b/>
../ = <URL:http://a/b/>
../g = <URL:http://a/b/g>
../.. = <URL:http://a/>
../../g = <URL:http://a/g>
../../../g = <URL:http://a/../g>
./../g = <URL:http://a/b/g>
./g/. = <URL:http://a/b/c/g/>
/./g = <URL:http://a/./g>
g/./h = <URL:http://a/b/c/g/h>
g/../h = <URL:http://a/b/c/h>
http:g = <URL:http://a/b/c/g>
http: = <URL:http://a/b/c/d>
http:?y = <URL:http://a/b/c/d?y>
http:g?y = <URL:http://a/b/c/g?y>
http:g?y/./x = <URL:http://a/b/c/g?y/./x>
"""
def test():
import sys
base = ''
if sys.argv[1:]:
fn = sys.argv[1]
if fn == '-':
fp = sys.stdin
else:
fp = open(fn)
else:
import StringIO
fp = StringIO.StringIO(test_input)
while 1:
line = fp.readline()
if not line: break
words = line.split()
if not words:
continue
url = words[0]
parts = urlparse(url)
print '%-10s : %s' % (url, parts)
abs = urljoin(base, url)
if not base:
base = abs
wrapped = '<URL:%s>' % abs
print '%-10s = %s' % (url, wrapped)
if len(words) == 3 and words[1] == '=':
if wrapped != words[2]:
print 'EXPECTED', words[2], '!!!!!!!!!!'
if __name__ == '__main__':
test()
![swh spinner](/static/img/swh-spinner.gif)
Computing file changes ...