Staging
v0.5.1
https://github.com/python/cpython
Revision a4dd011259fa6f3079bd0efd95b3a136c0e3c190 authored by Guido van Rossum on 15 April 2001, 22:16:26 UTC, committed by Guido van Rossum on 15 April 2001, 22:16:26 UTC
and reported to python-dev: because we were calling dict_resize() in
PyDict_Next(), and because GC's dict_traverse() uses PyDict_Next(),
and because PyTuple_New() can cause GC, and because dict_items() calls
PyTuple_New(), it was possible for dict_items() to have the dict
resized right under its nose.

The solution is convoluted, and touches several places: keys(),
values(), items(), popitem(), PyDict_Next(), and PyDict_SetItem().

There are two parts to it. First, we no longer call dict_resize() in
PyDict_Next(), which seems to solve the immediate problem.  But then
PyDict_SetItem() must have a different policy about when *it* calls
dict_resize(), because we want to guarantee (e.g. for an algorithm
that Jeremy uses in the compiler) that you can loop over a dict using
PyDict_Next() and make changes to the dict as long as those changes
are only value replacements for existing keys using PyDict_SetItem().
This is done by resizing *after* the insertion instead of before, and
by remembering the size before we insert the item, and if the size is
still the same, we don't bother to even check if we might need to
resize.  An additional detail is that if the dict starts out empty, we
must still resize it before the insertion.

That was the first part. :-)

The second part is to make keys(), values(), items(), and popitem()
safe against side effects on the dict caused by allocations, under the
assumption that if the GC can cause arbitrary Python code to run, it
can cause other threads to run, and it's not inconceivable that our
dict could be resized -- it would be insane to write code that relies
on this, but not all code is sane.

Now, I have this nagging feeling that the loops in lookdict probably
are blissfully assuming that doing a simple key comparison does not
change the dict's size.  This is not necessarily true (the keys could
be class instances after all).  But that's a battle for another day.
1 parent 0aa30b0
Raw File
Tip revision: a4dd011259fa6f3079bd0efd95b3a136c0e3c190 authored by Guido van Rossum on 15 April 2001, 22:16:26 UTC
Tentative fix for a problem that Tim discovered at the last moment,
Tip revision: a4dd011
regsub.py
"""Regexp-based split and replace using the obsolete regex module.

This module is only for backward compatibility.  These operations
are now provided by the new regular expression module, "re".

sub(pat, repl, str):        replace first occurrence of pattern in string
gsub(pat, repl, str):       replace all occurrences of pattern in string
split(str, pat, maxsplit):  split string using pattern as delimiter
splitx(str, pat, maxsplit): split string using pattern as delimiter plus
                            return delimiters
"""

import warnings
warnings.warn("the regsub module is deprecated; please use re.sub()",
              DeprecationWarning)

# Ignore further deprecation warnings about this module
warnings.filterwarnings("ignore", "", DeprecationWarning, __name__)

import regex

__all__ = ["sub","gsub","split","splitx","capwords"]

# Replace first occurrence of pattern pat in string str by replacement
# repl.  If the pattern isn't found, the string is returned unchanged.
# The replacement may contain references \digit to subpatterns and
# escaped backslashes.  The pattern may be a string or an already
# compiled pattern.

def sub(pat, repl, str):
    prog = compile(pat)
    if prog.search(str) >= 0:
        regs = prog.regs
        a, b = regs[0]
        str = str[:a] + expand(repl, regs, str) + str[b:]
    return str


# Replace all (non-overlapping) occurrences of pattern pat in string
# str by replacement repl.  The same rules as for sub() apply.
# Empty matches for the pattern are replaced only when not adjacent to
# a previous match, so e.g. gsub('', '-', 'abc') returns '-a-b-c-'.

def gsub(pat, repl, str):
    prog = compile(pat)
    new = ''
    start = 0
    first = 1
    while prog.search(str, start) >= 0:
        regs = prog.regs
        a, b = regs[0]
        if a == b == start and not first:
            if start >= len(str) or prog.search(str, start+1) < 0:
                break
            regs = prog.regs
            a, b = regs[0]
        new = new + str[start:a] + expand(repl, regs, str)
        start = b
        first = 0
    new = new + str[start:]
    return new


# Split string str in fields separated by delimiters matching pattern
# pat.  Only non-empty matches for the pattern are considered, so e.g.
# split('abc', '') returns ['abc'].
# The optional 3rd argument sets the number of splits that are performed.

def split(str, pat, maxsplit = 0):
    return intsplit(str, pat, maxsplit, 0)

# Split string str in fields separated by delimiters matching pattern
# pat.  Only non-empty matches for the pattern are considered, so e.g.
# split('abc', '') returns ['abc']. The delimiters are also included
# in the list.
# The optional 3rd argument sets the number of splits that are performed.


def splitx(str, pat, maxsplit = 0):
    return intsplit(str, pat, maxsplit, 1)

# Internal function used to implement split() and splitx().

def intsplit(str, pat, maxsplit, retain):
    prog = compile(pat)
    res = []
    start = next = 0
    splitcount = 0
    while prog.search(str, next) >= 0:
        regs = prog.regs
        a, b = regs[0]
        if a == b:
            next = next + 1
            if next >= len(str):
                break
        else:
            res.append(str[start:a])
            if retain:
                res.append(str[a:b])
            start = next = b
            splitcount = splitcount + 1
            if (maxsplit and (splitcount >= maxsplit)):
                break
    res.append(str[start:])
    return res


# Capitalize words split using a pattern

def capwords(str, pat='[^a-zA-Z0-9_]+'):
    words = splitx(str, pat)
    for i in range(0, len(words), 2):
        words[i] = words[i].capitalize()
    return "".join(words)


# Internal subroutines:
# compile(pat): compile a pattern, caching already compiled patterns
# expand(repl, regs, str): expand \digit escapes in replacement string


# Manage a cache of compiled regular expressions.
#
# If the pattern is a string a compiled version of it is returned.  If
# the pattern has been used before we return an already compiled
# version from the cache; otherwise we compile it now and save the
# compiled version in the cache, along with the syntax it was compiled
# with.  Instead of a string, a compiled regular expression can also
# be passed.

cache = {}

def compile(pat):
    if type(pat) != type(''):
        return pat              # Assume it is a compiled regex
    key = (pat, regex.get_syntax())
    if cache.has_key(key):
        prog = cache[key]       # Get it from the cache
    else:
        prog = cache[key] = regex.compile(pat)
    return prog


def clear_cache():
    global cache
    cache = {}


# Expand \digit in the replacement.
# Each occurrence of \digit is replaced by the substring of str
# indicated by regs[digit].  To include a literal \ in the
# replacement, double it; other \ escapes are left unchanged (i.e.
# the \ and the following character are both copied).

def expand(repl, regs, str):
    if '\\' not in repl:
        return repl
    new = ''
    i = 0
    ord0 = ord('0')
    while i < len(repl):
        c = repl[i]; i = i+1
        if c != '\\' or i >= len(repl):
            new = new + c
        else:
            c = repl[i]; i = i+1
            if '0' <= c <= '9':
                a, b = regs[ord(c)-ord0]
                new = new + str[a:b]
            elif c == '\\':
                new = new + c
            else:
                new = new + '\\' + c
    return new


# Test program, reads sequences "pat repl str" from stdin.
# Optional argument specifies pattern used to split lines.

def test():
    import sys
    if sys.argv[1:]:
        delpat = sys.argv[1]
    else:
        delpat = '[ \t\n]+'
    while 1:
        if sys.stdin.isatty(): sys.stderr.write('--> ')
        line = sys.stdin.readline()
        if not line: break
        if line[-1] == '\n': line = line[:-1]
        fields = split(line, delpat)
        if len(fields) != 3:
            print 'Sorry, not three fields'
            print 'split:', `fields`
            continue
        [pat, repl, str] = split(line, delpat)
        print 'sub :', `sub(pat, repl, str)`
        print 'gsub:', `gsub(pat, repl, str)`
back to top