Staging
v0.5.1
https://github.com/python/cpython
Revision 392520bd78cd18639a27e5d2803c2e1c2bd593a8 authored by Miss Islington (bot) on 20 April 2018, 17:06:21 UTC, committed by Gregory P. Smith on 20 April 2018, 17:06:21 UTC
Fix clang ubsan (undefined behavior sanitizer) warnings in dictobject.c by
adjusting how the internal struct _dictkeysobject shared keys structure is
declared.

This remains ABI compatible.  We get rid of the union at the end of the
struct being used for conveinence to avoid typecasting in favor of char[]
variable length array at the end of a struct. This is known to clang to be
used for variable sized objects and will not cause an undefined behavior
problem.  Similarly, char arrays do not have strict aliasing undefined
behavior when cast.

PEP-007 does not currently list variable length arrays (VLAs) as allowed
in our subset of C99.  If this turns out to be a problem, the fix to this is
to change the char `dk_indices[]` into `dk_indices[1]` and restore the
three size computation subtractions this change removes:
  `- Py_MEMBER_SIZE(PyDictKeysObject, dk_indices)`

If this works as is I'll make a separate PR to update PEP-007.
(cherry picked from commit 397f1b28c4a12e3b3ed59a89599eabc457412649)
1 parent 18cd828
Raw File
Tip revision: 392520bd78cd18639a27e5d2803c2e1c2bd593a8 authored by Miss Islington (bot) on 20 April 2018, 17:06:21 UTC
bpo-33312: Fix clang ubsan out of bounds warnings in dict. (GH-6537) (GH-6543)
Tip revision: 392520b
sre.h
/*
 * Secret Labs' Regular Expression Engine
 *
 * regular expression matching engine
 *
 * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
 *
 * See the _sre.c file for information on usage and redistribution.
 */

#ifndef SRE_INCLUDED
#define SRE_INCLUDED

#include "sre_constants.h"

/* size of a code word (must be unsigned short or larger, and
   large enough to hold a UCS4 character) */
#define SRE_CODE Py_UCS4
#if SIZEOF_SIZE_T > 4
# define SRE_MAXREPEAT (~(SRE_CODE)0)
# define SRE_MAXGROUPS ((~(SRE_CODE)0) / 2)
#else
# define SRE_MAXREPEAT ((SRE_CODE)PY_SSIZE_T_MAX)
# define SRE_MAXGROUPS ((SRE_CODE)PY_SSIZE_T_MAX / SIZEOF_SIZE_T / 2)
#endif

typedef struct {
    PyObject_VAR_HEAD
    Py_ssize_t groups; /* must be first! */
    PyObject* groupindex; /* dict */
    PyObject* indexgroup; /* tuple */
    /* compatibility */
    PyObject* pattern; /* pattern source (or None) */
    int flags; /* flags used when compiling pattern source */
    PyObject *weakreflist; /* List of weak references */
    int isbytes; /* pattern type (1 - bytes, 0 - string, -1 - None) */
    /* pattern code */
    Py_ssize_t codesize;
    SRE_CODE code[1];
} PatternObject;

#define PatternObject_GetCode(o) (((PatternObject*)(o))->code)

typedef struct {
    PyObject_VAR_HEAD
    PyObject* string; /* link to the target string (must be first) */
    PyObject* regs; /* cached list of matching spans */
    PatternObject* pattern; /* link to the regex (pattern) object */
    Py_ssize_t pos, endpos; /* current target slice */
    Py_ssize_t lastindex; /* last index marker seen by the engine (-1 if none) */
    Py_ssize_t groups; /* number of groups (start/end marks) */
    Py_ssize_t mark[1];
} MatchObject;

typedef struct SRE_REPEAT_T {
    Py_ssize_t count;
    SRE_CODE* pattern; /* points to REPEAT operator arguments */
    void* last_ptr; /* helper to check for infinite loops */
    struct SRE_REPEAT_T *prev; /* points to previous repeat context */
} SRE_REPEAT;

typedef struct {
    /* string pointers */
    void* ptr; /* current position (also end of current slice) */
    void* beginning; /* start of original string */
    void* start; /* start of current slice */
    void* end; /* end of original string */
    /* attributes for the match object */
    PyObject* string;
    Py_buffer buffer;
    Py_ssize_t pos, endpos;
    int isbytes;
    int charsize; /* character size */
    /* registers */
    Py_ssize_t lastindex;
    Py_ssize_t lastmark;
    void** mark;
    int match_all;
    int must_advance;
    /* dynamically allocated stuff */
    char* data_stack;
    size_t data_stack_size;
    size_t data_stack_base;
    /* current repeat context */
    SRE_REPEAT *repeat;
} SRE_STATE;

typedef struct {
    PyObject_HEAD
    PyObject* pattern;
    SRE_STATE state;
} ScannerObject;

#endif
back to top