Staging
v0.5.1
https://github.com/python/cpython
Raw File
Tip revision: 84332f91ee463d2e097342964ec304a7c902c1e6 authored by Anthony Baxter on 10 October 2006, 17:28:33 UTC
what month is it again? I get confused...
Tip revision: 84332f9
fetch_data_files.py
"""A helper to download input files needed by assorted encoding tests.

fetch_data_files.py [directory]

Files are downloaded to directory `directory`.  If a directory isn't given,
it defaults to the current directory (.).
"""

DATA_URLS = """
    http://people.freebsd.org/~perky/i18n/BIG5HKSCS.TXT
    http://people.freebsd.org/~perky/i18n/EUC-CN.TXT
    http://people.freebsd.org/~perky/i18n/EUC-JISX0213.TXT
    http://people.freebsd.org/~perky/i18n/EUC-JP.TXT
    http://people.freebsd.org/~perky/i18n/EUC-KR.TXT
    http://people.freebsd.org/~perky/i18n/SHIFT_JISX0213.TXT

    http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP932.TXT
    http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP936.TXT
    http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP949.TXT
    http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP950.TXT

    http://www.unicode.org/Public/3.2-Update/NormalizationTest-3.2.0.txt

    http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/SHIFTJIS.TXT
    http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/KSC/JOHAB.TXT
    http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/OTHER/BIG5.TXT
"""

# Adapted from test_support.open_urlresource() in Python 2.5.
# Fetch the file give by `url` off the web, and store it in directory
# `directory`.  The file name is extracted from the last URL component.
# If the file already exists, it's not fetched again.
def fetch_file_from_url(url, directory):
    import urllib, urlparse
    import os.path

    filename = urlparse.urlparse(url)[2].split('/')[-1] # '/': it's a URL!
    target = os.path.join(directory, filename)
    if os.path.exists(target):
        print "\tskipping %r -- already exists" % target
    else:
        print "\tfetching %s ..." % url
        urllib.urlretrieve(url, target)

def main(urls, directory):
    print "Downloading data files to %r" % directory
    for url in urls.split():
        fetch_file_from_url(url, directory)

if __name__ == "__main__":
    import sys

    n = len(sys.argv)
    if n == 1:
        directory = "."
    elif n == 2:
        directory = sys.argv[1]
    else:
        raise ValueError("no more than one argument allowed")

    main(DATA_URLS, directory)
back to top