Staging
v0.5.1
v0.5.1
https://github.com/python/cpython
Revision 8497f0a85d97e1d9660bbb450ba9f2dc8b03f87d authored by Giampaolo Rodolà on 07 December 2010, 18:54:43 UTC, committed by Giampaolo Rodolà on 07 December 2010, 18:54:43 UTC
1 parent 1740947
Tip revision: 8497f0a85d97e1d9660bbb450ba9f2dc8b03f87d authored by Giampaolo Rodolà on 07 December 2010, 18:54:43 UTC
backporting security fix of issue 9129 (smtpd module vulnerable to DoS attacks in case of connection bashing)
backporting security fix of issue 9129 (smtpd module vulnerable to DoS attacks in case of connection bashing)
Tip revision: 8497f0a
_json.c
#include "Python.h"
#define DEFAULT_ENCODING "utf-8"
#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
#define MIN_EXPANSION 6
#ifdef Py_UNICODE_WIDE
#define MAX_EXPANSION (2 * MIN_EXPANSION)
#else
#define MAX_EXPANSION MIN_EXPANSION
#endif
static Py_ssize_t
ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars)
{
Py_UNICODE x;
output[chars++] = '\\';
switch (c) {
case '\\': output[chars++] = (char)c; break;
case '"': output[chars++] = (char)c; break;
case '\b': output[chars++] = 'b'; break;
case '\f': output[chars++] = 'f'; break;
case '\n': output[chars++] = 'n'; break;
case '\r': output[chars++] = 'r'; break;
case '\t': output[chars++] = 't'; break;
default:
#ifdef Py_UNICODE_WIDE
if (c >= 0x10000) {
/* UTF-16 surrogate pair */
Py_UNICODE v = c - 0x10000;
c = 0xd800 | ((v >> 10) & 0x3ff);
output[chars++] = 'u';
x = (c & 0xf000) >> 12;
output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
x = (c & 0x0f00) >> 8;
output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
x = (c & 0x00f0) >> 4;
output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
x = (c & 0x000f);
output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
c = 0xdc00 | (v & 0x3ff);
output[chars++] = '\\';
}
#endif
output[chars++] = 'u';
x = (c & 0xf000) >> 12;
output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
x = (c & 0x0f00) >> 8;
output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
x = (c & 0x00f0) >> 4;
output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
x = (c & 0x000f);
output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
}
return chars;
}
static PyObject *
ascii_escape_unicode(PyObject *pystr)
{
Py_ssize_t i;
Py_ssize_t input_chars;
Py_ssize_t output_size;
Py_ssize_t chars;
PyObject *rval;
char *output;
Py_UNICODE *input_unicode;
input_chars = PyUnicode_GET_SIZE(pystr);
input_unicode = PyUnicode_AS_UNICODE(pystr);
/* One char input can be up to 6 chars output, estimate 4 of these */
output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
rval = PyString_FromStringAndSize(NULL, output_size);
if (rval == NULL) {
return NULL;
}
output = PyString_AS_STRING(rval);
chars = 0;
output[chars++] = '"';
for (i = 0; i < input_chars; i++) {
Py_UNICODE c = input_unicode[i];
if (S_CHAR(c)) {
output[chars++] = (char)c;
}
else {
chars = ascii_escape_char(c, output, chars);
}
if (output_size - chars < (1 + MAX_EXPANSION)) {
/* There's more than four, so let's resize by a lot */
output_size *= 2;
/* This is an upper bound */
if (output_size > 2 + (input_chars * MAX_EXPANSION)) {
output_size = 2 + (input_chars * MAX_EXPANSION);
}
if (_PyString_Resize(&rval, output_size) == -1) {
return NULL;
}
output = PyString_AS_STRING(rval);
}
}
output[chars++] = '"';
if (_PyString_Resize(&rval, chars) == -1) {
return NULL;
}
return rval;
}
static PyObject *
ascii_escape_str(PyObject *pystr)
{
Py_ssize_t i;
Py_ssize_t input_chars;
Py_ssize_t output_size;
Py_ssize_t chars;
PyObject *rval;
char *output;
char *input_str;
input_chars = PyString_GET_SIZE(pystr);
input_str = PyString_AS_STRING(pystr);
/* One char input can be up to 6 chars output, estimate 4 of these */
output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
rval = PyString_FromStringAndSize(NULL, output_size);
if (rval == NULL) {
return NULL;
}
output = PyString_AS_STRING(rval);
chars = 0;
output[chars++] = '"';
for (i = 0; i < input_chars; i++) {
Py_UNICODE c = (Py_UNICODE)input_str[i];
if (S_CHAR(c)) {
output[chars++] = (char)c;
}
else if (c > 0x7F) {
/* We hit a non-ASCII character, bail to unicode mode */
PyObject *uni;
Py_DECREF(rval);
uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");
if (uni == NULL) {
return NULL;
}
rval = ascii_escape_unicode(uni);
Py_DECREF(uni);
return rval;
}
else {
chars = ascii_escape_char(c, output, chars);
}
/* An ASCII char can't possibly expand to a surrogate! */
if (output_size - chars < (1 + MIN_EXPANSION)) {
/* There's more than four, so let's resize by a lot */
output_size *= 2;
if (output_size > 2 + (input_chars * MIN_EXPANSION)) {
output_size = 2 + (input_chars * MIN_EXPANSION);
}
if (_PyString_Resize(&rval, output_size) == -1) {
return NULL;
}
output = PyString_AS_STRING(rval);
}
}
output[chars++] = '"';
if (_PyString_Resize(&rval, chars) == -1) {
return NULL;
}
return rval;
}
void
raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
{
static PyObject *errmsg_fn = NULL;
PyObject *pymsg;
if (errmsg_fn == NULL) {
PyObject *decoder = PyImport_ImportModule("json.decoder");
if (decoder == NULL)
return;
errmsg_fn = PyObject_GetAttrString(decoder, "errmsg");
if (errmsg_fn == NULL)
return;
Py_DECREF(decoder);
}
pymsg = PyObject_CallFunction(errmsg_fn, "(zOn)", msg, s, end);
if (pymsg) {
PyErr_SetObject(PyExc_ValueError, pymsg);
Py_DECREF(pymsg);
}
/*
def linecol(doc, pos):
lineno = doc.count('\n', 0, pos) + 1
if lineno == 1:
colno = pos
else:
colno = pos - doc.rindex('\n', 0, pos)
return lineno, colno
def errmsg(msg, doc, pos, end=None):
lineno, colno = linecol(doc, pos)
if end is None:
return '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos)
endlineno, endcolno = linecol(doc, end)
return '%s: line %d column %d - line %d column %d (char %d - %d)' % (
msg, lineno, colno, endlineno, endcolno, pos, end)
*/
}
static PyObject *
join_list_unicode(PyObject *lst)
{
static PyObject *ustr = NULL;
static PyObject *joinstr = NULL;
if (ustr == NULL) {
Py_UNICODE c = 0;
ustr = PyUnicode_FromUnicode(&c, 0);
}
if (joinstr == NULL) {
joinstr = PyString_InternFromString("join");
}
if (joinstr == NULL || ustr == NULL) {
return NULL;
}
return PyObject_CallMethodObjArgs(ustr, joinstr, lst, NULL);
}
static PyObject *
scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict)
{
PyObject *rval;
Py_ssize_t len = PyString_GET_SIZE(pystr);
Py_ssize_t begin = end - 1;
Py_ssize_t next = begin;
char *buf = PyString_AS_STRING(pystr);
PyObject *chunks = PyList_New(0);
if (chunks == NULL) {
goto bail;
}
if (end < 0 || len <= end) {
PyErr_SetString(PyExc_ValueError, "end is out of bounds");
goto bail;
}
while (1) {
/* Find the end of the string or the next escape */
Py_UNICODE c = 0;
PyObject *chunk = NULL;
for (next = end; next < len; next++) {
c = buf[next];
if (c == '"' || c == '\\') {
break;
}
else if (strict && c <= 0x1f) {
raise_errmsg("Invalid control character at", pystr, next);
goto bail;
}
}
if (!(c == '"' || c == '\\')) {
raise_errmsg("Unterminated string starting at", pystr, begin);
goto bail;
}
/* Pick up this chunk if it's not zero length */
if (next != end) {
PyObject *strchunk = PyBuffer_FromMemory(&buf[end], next - end);
if (strchunk == NULL) {
goto bail;
}
chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL);
Py_DECREF(strchunk);
if (chunk == NULL) {
goto bail;
}
if (PyList_Append(chunks, chunk)) {
Py_DECREF(chunk);
goto bail;
}
Py_DECREF(chunk);
}
next++;
if (c == '"') {
end = next;
break;
}
if (next == len) {
raise_errmsg("Unterminated string starting at", pystr, begin);
goto bail;
}
c = buf[next];
if (c != 'u') {
/* Non-unicode backslash escapes */
end = next + 1;
switch (c) {
case '"': break;
case '\\': break;
case '/': break;
case 'b': c = '\b'; break;
case 'f': c = '\f'; break;
case 'n': c = '\n'; break;
case 'r': c = '\r'; break;
case 't': c = '\t'; break;
default: c = 0;
}
if (c == 0) {
raise_errmsg("Invalid \\escape", pystr, end - 2);
goto bail;
}
}
else {
c = 0;
next++;
end = next + 4;
if (end >= len) {
raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
goto bail;
}
/* Decode 4 hex digits */
for (; next < end; next++) {
Py_ssize_t shl = (end - next - 1) << 2;
Py_UNICODE digit = buf[next];
switch (digit) {
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
c |= (digit - '0') << shl; break;
case 'a': case 'b': case 'c': case 'd': case 'e':
case 'f':
c |= (digit - 'a' + 10) << shl; break;
case 'A': case 'B': case 'C': case 'D': case 'E':
case 'F':
c |= (digit - 'A' + 10) << shl; break;
default:
raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
goto bail;
}
}
#ifdef Py_UNICODE_WIDE
/* Surrogate pair */
if (c >= 0xd800 && c <= 0xdbff) {
Py_UNICODE c2 = 0;
if (end + 6 >= len) {
raise_errmsg("Invalid \\uXXXX\\uXXXX surrogate pair", pystr,
end - 5);
}
if (buf[next++] != '\\' || buf[next++] != 'u') {
raise_errmsg("Invalid \\uXXXX\\uXXXX surrogate pair", pystr,
end - 5);
}
end += 6;
/* Decode 4 hex digits */
for (; next < end; next++) {
Py_ssize_t shl = (end - next - 1) << 2;
Py_UNICODE digit = buf[next];
switch (digit) {
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
c2 |= (digit - '0') << shl; break;
case 'a': case 'b': case 'c': case 'd': case 'e':
case 'f':
c2 |= (digit - 'a' + 10) << shl; break;
case 'A': case 'B': case 'C': case 'D': case 'E':
case 'F':
c2 |= (digit - 'A' + 10) << shl; break;
default:
raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
goto bail;
}
}
c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
}
#endif
}
chunk = PyUnicode_FromUnicode(&c, 1);
if (chunk == NULL) {
goto bail;
}
if (PyList_Append(chunks, chunk)) {
Py_DECREF(chunk);
goto bail;
}
Py_DECREF(chunk);
}
rval = join_list_unicode(chunks);
if (rval == NULL) {
goto bail;
}
Py_CLEAR(chunks);
return Py_BuildValue("(Nn)", rval, end);
bail:
Py_XDECREF(chunks);
return NULL;
}
static PyObject *
scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict)
{
PyObject *rval;
Py_ssize_t len = PyUnicode_GET_SIZE(pystr);
Py_ssize_t begin = end - 1;
Py_ssize_t next = begin;
const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr);
PyObject *chunks = PyList_New(0);
if (chunks == NULL) {
goto bail;
}
if (end < 0 || len <= end) {
PyErr_SetString(PyExc_ValueError, "end is out of bounds");
goto bail;
}
while (1) {
/* Find the end of the string or the next escape */
Py_UNICODE c = 0;
PyObject *chunk = NULL;
for (next = end; next < len; next++) {
c = buf[next];
if (c == '"' || c == '\\') {
break;
}
else if (strict && c <= 0x1f) {
raise_errmsg("Invalid control character at", pystr, next);
goto bail;
}
}
if (!(c == '"' || c == '\\')) {
raise_errmsg("Unterminated string starting at", pystr, begin);
goto bail;
}
/* Pick up this chunk if it's not zero length */
if (next != end) {
chunk = PyUnicode_FromUnicode(&buf[end], next - end);
if (chunk == NULL) {
goto bail;
}
if (PyList_Append(chunks, chunk)) {
Py_DECREF(chunk);
goto bail;
}
Py_DECREF(chunk);
}
next++;
if (c == '"') {
end = next;
break;
}
if (next == len) {
raise_errmsg("Unterminated string starting at", pystr, begin);
goto bail;
}
c = buf[next];
if (c != 'u') {
/* Non-unicode backslash escapes */
end = next + 1;
switch (c) {
case '"': break;
case '\\': break;
case '/': break;
case 'b': c = '\b'; break;
case 'f': c = '\f'; break;
case 'n': c = '\n'; break;
case 'r': c = '\r'; break;
case 't': c = '\t'; break;
default: c = 0;
}
if (c == 0) {
raise_errmsg("Invalid \\escape", pystr, end - 2);
goto bail;
}
}
else {
c = 0;
next++;
end = next + 4;
if (end >= len) {
raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
goto bail;
}
/* Decode 4 hex digits */
for (; next < end; next++) {
Py_ssize_t shl = (end - next - 1) << 2;
Py_UNICODE digit = buf[next];
switch (digit) {
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
c |= (digit - '0') << shl; break;
case 'a': case 'b': case 'c': case 'd': case 'e':
case 'f':
c |= (digit - 'a' + 10) << shl; break;
case 'A': case 'B': case 'C': case 'D': case 'E':
case 'F':
c |= (digit - 'A' + 10) << shl; break;
default:
raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
goto bail;
}
}
#ifdef Py_UNICODE_WIDE
/* Surrogate pair */
if (c >= 0xd800 && c <= 0xdbff) {
Py_UNICODE c2 = 0;
if (end + 6 >= len) {
raise_errmsg("Invalid \\uXXXX\\uXXXX surrogate pair", pystr,
end - 5);
}
if (buf[next++] != '\\' || buf[next++] != 'u') {
raise_errmsg("Invalid \\uXXXX\\uXXXX surrogate pair", pystr,
end - 5);
}
end += 6;
/* Decode 4 hex digits */
for (; next < end; next++) {
Py_ssize_t shl = (end - next - 1) << 2;
Py_UNICODE digit = buf[next];
switch (digit) {
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
c2 |= (digit - '0') << shl; break;
case 'a': case 'b': case 'c': case 'd': case 'e':
case 'f':
c2 |= (digit - 'a' + 10) << shl; break;
case 'A': case 'B': case 'C': case 'D': case 'E':
case 'F':
c2 |= (digit - 'A' + 10) << shl; break;
default:
raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
goto bail;
}
}
c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
}
#endif
}
chunk = PyUnicode_FromUnicode(&c, 1);
if (chunk == NULL) {
goto bail;
}
if (PyList_Append(chunks, chunk)) {
Py_DECREF(chunk);
goto bail;
}
Py_DECREF(chunk);
}
rval = join_list_unicode(chunks);
if (rval == NULL) {
goto bail;
}
Py_CLEAR(chunks);
return Py_BuildValue("(Nn)", rval, end);
bail:
Py_XDECREF(chunks);
return NULL;
}
PyDoc_STRVAR(pydoc_scanstring,
"scanstring(basestring, end, encoding) -> (str, end)\n");
static PyObject *
py_scanstring(PyObject* self, PyObject *args)
{
PyObject *pystr;
Py_ssize_t end;
char *encoding = NULL;
int strict = 0;
if (!PyArg_ParseTuple(args, "On|zi:scanstring", &pystr, &end, &encoding, &strict)) {
return NULL;
}
if (encoding == NULL) {
encoding = DEFAULT_ENCODING;
}
if (PyString_Check(pystr)) {
return scanstring_str(pystr, end, encoding, strict);
}
else if (PyUnicode_Check(pystr)) {
return scanstring_unicode(pystr, end, strict);
}
else {
PyErr_Format(PyExc_TypeError,
"first argument must be a string or unicode, not %.80s",
Py_TYPE(pystr)->tp_name);
return NULL;
}
}
PyDoc_STRVAR(pydoc_encode_basestring_ascii,
"encode_basestring_ascii(basestring) -> str\n");
static PyObject *
py_encode_basestring_ascii(PyObject* self, PyObject *pystr)
{
/* METH_O */
if (PyString_Check(pystr)) {
return ascii_escape_str(pystr);
}
else if (PyUnicode_Check(pystr)) {
return ascii_escape_unicode(pystr);
}
else {
PyErr_Format(PyExc_TypeError,
"first argument must be a string or unicode, not %.80s",
Py_TYPE(pystr)->tp_name);
return NULL;
}
}
static PyMethodDef json_methods[] = {
{"encode_basestring_ascii", (PyCFunction)py_encode_basestring_ascii,
METH_O, pydoc_encode_basestring_ascii},
{"scanstring", (PyCFunction)py_scanstring, METH_VARARGS,
pydoc_scanstring},
{NULL, NULL, 0, NULL}
};
PyDoc_STRVAR(module_doc,
"json speedups\n");
void
init_json(void)
{
PyObject *m;
m = Py_InitModule3("_json", json_methods, module_doc);
}
Computing file changes ...