Staging
v0.8.1
v0.8.1
swh:1:snp:635f4099902912592851108bcac178ff574f7c5f
Tip revision: 74f4bd53e03ded8408bcc2de67cf0f5a4ac5b1a1 authored by Barry Warsaw on 23 February 2012, 15:59:38 UTC
Bump some more copyright years (as per PEP 101), since this is the first
Bump some more copyright years (as per PEP 101), since this is the first
Tip revision: 74f4bd5
_json.c
#include "Python.h"
#define DEFAULT_ENCODING "utf-8"
#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
#define MIN_EXPANSION 6
#ifdef Py_UNICODE_WIDE
#define MAX_EXPANSION (2 * MIN_EXPANSION)
#else
#define MAX_EXPANSION MIN_EXPANSION
#endif
static Py_ssize_t
ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars)
{
Py_UNICODE x;
output[chars++] = '\\';
switch (c) {
case '\\': output[chars++] = (char)c; break;
case '"': output[chars++] = (char)c; break;
case '\b': output[chars++] = 'b'; break;
case '\f': output[chars++] = 'f'; break;
case '\n': output[chars++] = 'n'; break;
case '\r': output[chars++] = 'r'; break;
case '\t': output[chars++] = 't'; break;
default:
#ifdef Py_UNICODE_WIDE
if (c >= 0x10000) {
/* UTF-16 surrogate pair */
Py_UNICODE v = c - 0x10000;
c = 0xd800 | ((v >> 10) & 0x3ff);
output[chars++] = 'u';
x = (c & 0xf000) >> 12;
output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
x = (c & 0x0f00) >> 8;
output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
x = (c & 0x00f0) >> 4;
output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
x = (c & 0x000f);
output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
c = 0xdc00 | (v & 0x3ff);
output[chars++] = '\\';
}
#endif
output[chars++] = 'u';
x = (c & 0xf000) >> 12;
output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
x = (c & 0x0f00) >> 8;
output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
x = (c & 0x00f0) >> 4;
output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
x = (c & 0x000f);
output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
}
return chars;
}
static PyObject *
ascii_escape_unicode(PyObject *pystr)
{
Py_ssize_t i;
Py_ssize_t input_chars;
Py_ssize_t output_size;
Py_ssize_t chars;
PyObject *rval;
char *output;
Py_UNICODE *input_unicode;
input_chars = PyUnicode_GET_SIZE(pystr);
input_unicode = PyUnicode_AS_UNICODE(pystr);
/* One char input can be up to 6 chars output, estimate 4 of these */
output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
rval = PyString_FromStringAndSize(NULL, output_size);
if (rval == NULL) {
return NULL;
}
output = PyString_AS_STRING(rval);
chars = 0;
output[chars++] = '"';
for (i = 0; i < input_chars; i++) {
Py_UNICODE c = input_unicode[i];
if (S_CHAR(c)) {
output[chars++] = (char)c;
}
else {
chars = ascii_escape_char(c, output, chars);
}
if (output_size - chars < (1 + MAX_EXPANSION)) {
/* There's more than four, so let's resize by a lot */
output_size *= 2;
/* This is an upper bound */
if (output_size > 2 + (input_chars * MAX_EXPANSION)) {
output_size = 2 + (input_chars * MAX_EXPANSION);
}
if (_PyString_Resize(&rval, output_size) == -1) {
return NULL;
}
output = PyString_AS_STRING(rval);
}
}
output[chars++] = '"';
if (_PyString_Resize(&rval, chars) == -1) {
return NULL;
}
return rval;
}
static PyObject *
ascii_escape_str(PyObject *pystr)
{
Py_ssize_t i;
Py_ssize_t input_chars;
Py_ssize_t output_size;
Py_ssize_t chars;
PyObject *rval;
char *output;
char *input_str;
input_chars = PyString_GET_SIZE(pystr);
input_str = PyString_AS_STRING(pystr);
/* One char input can be up to 6 chars output, estimate 4 of these */
output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
rval = PyString_FromStringAndSize(NULL, output_size);
if (rval == NULL) {
return NULL;
}
output = PyString_AS_STRING(rval);
chars = 0;
output[chars++] = '"';
for (i = 0; i < input_chars; i++) {
Py_UNICODE c = (Py_UNICODE)input_str[i];
if (S_CHAR(c)) {
output[chars++] = (char)c;
}
else if (c > 0x7F) {
/* We hit a non-ASCII character, bail to unicode mode */
PyObject *uni;
Py_DECREF(rval);
uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");
if (uni == NULL) {
return NULL;
}
rval = ascii_escape_unicode(uni);
Py_DECREF(uni);
return rval;
}
else {
chars = ascii_escape_char(c, output, chars);
}
/* An ASCII char can't possibly expand to a surrogate! */
if (output_size - chars < (1 + MIN_EXPANSION)) {
/* There's more than four, so let's resize by a lot */
output_size *= 2;
if (output_size > 2 + (input_chars * MIN_EXPANSION)) {
output_size = 2 + (input_chars * MIN_EXPANSION);
}
if (_PyString_Resize(&rval, output_size) == -1) {
return NULL;
}
output = PyString_AS_STRING(rval);
}
}
output[chars++] = '"';
if (_PyString_Resize(&rval, chars) == -1) {
return NULL;
}
return rval;
}
void
raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
{
static PyObject *errmsg_fn = NULL;
PyObject *pymsg;
if (errmsg_fn == NULL) {
PyObject *decoder = PyImport_ImportModule("json.decoder");
if (decoder == NULL)
return;
errmsg_fn = PyObject_GetAttrString(decoder, "errmsg");
if (errmsg_fn == NULL)
return;
Py_DECREF(decoder);
}
pymsg = PyObject_CallFunction(errmsg_fn, "(zOn)", msg, s, end);
if (pymsg) {
PyErr_SetObject(PyExc_ValueError, pymsg);
Py_DECREF(pymsg);
}
/*
def linecol(doc, pos):
lineno = doc.count('\n', 0, pos) + 1
if lineno == 1:
colno = pos
else:
colno = pos - doc.rindex('\n', 0, pos)
return lineno, colno
def errmsg(msg, doc, pos, end=None):
lineno, colno = linecol(doc, pos)
if end is None:
return '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos)
endlineno, endcolno = linecol(doc, end)
return '%s: line %d column %d - line %d column %d (char %d - %d)' % (
msg, lineno, colno, endlineno, endcolno, pos, end)
*/
}
static PyObject *
join_list_unicode(PyObject *lst)
{
static PyObject *ustr = NULL;
static PyObject *joinstr = NULL;
if (ustr == NULL) {
Py_UNICODE c = 0;
ustr = PyUnicode_FromUnicode(&c, 0);
}
if (joinstr == NULL) {
joinstr = PyString_InternFromString("join");
}
if (joinstr == NULL || ustr == NULL) {
return NULL;
}
return PyObject_CallMethodObjArgs(ustr, joinstr, lst, NULL);
}
static PyObject *
scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict)
{
PyObject *rval;
Py_ssize_t len = PyString_GET_SIZE(pystr);
Py_ssize_t begin = end - 1;
Py_ssize_t next = begin;
char *buf = PyString_AS_STRING(pystr);
PyObject *chunks = PyList_New(0);
if (chunks == NULL) {
goto bail;
}
if (end < 0 || len <= end) {
PyErr_SetString(PyExc_ValueError, "end is out of bounds");
goto bail;
}
while (1) {
/* Find the end of the string or the next escape */
Py_UNICODE c = 0;
PyObject *chunk = NULL;
for (next = end; next < len; next++) {
c = buf[next];
if (c == '"' || c == '\\') {
break;
}
else if (strict && c <= 0x1f) {
raise_errmsg("Invalid control character at", pystr, next);
goto bail;
}
}
if (!(c == '"' || c == '\\')) {
raise_errmsg("Unterminated string starting at", pystr, begin);
goto bail;
}
/* Pick up this chunk if it's not zero length */
if (next != end) {
PyObject *strchunk = PyBuffer_FromMemory(&buf[end], next - end);
if (strchunk == NULL) {
goto bail;
}
chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL);
Py_DECREF(strchunk);
if (chunk == NULL) {
goto bail;
}
if (PyList_Append(chunks, chunk)) {
Py_DECREF(chunk);
goto bail;
}
Py_DECREF(chunk);
}
next++;
if (c == '"') {
end = next;
break;
}
if (next == len) {
raise_errmsg("Unterminated string starting at", pystr, begin);
goto bail;
}
c = buf[next];
if (c != 'u') {
/* Non-unicode backslash escapes */
end = next + 1;
switch (c) {
case '"': break;
case '\\': break;
case '/': break;
case 'b': c = '\b'; break;
case 'f': c = '\f'; break;
case 'n': c = '\n'; break;
case 'r': c = '\r'; break;
case 't': c = '\t'; break;
default: c = 0;
}
if (c == 0) {
raise_errmsg("Invalid \\escape", pystr, end - 2);
goto bail;
}
}
else {
c = 0;
next++;
end = next + 4;
if (end >= len) {
raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
goto bail;
}
/* Decode 4 hex digits */
for (; next < end; next++) {
Py_ssize_t shl = (end - next - 1) << 2;
Py_UNICODE digit = buf[next];
switch (digit) {
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
c |= (digit - '0') << shl; break;
case 'a': case 'b': case 'c': case 'd': case 'e':
case 'f':
c |= (digit - 'a' + 10) << shl; break;
case 'A': case 'B': case 'C': case 'D': case 'E':
case 'F':
c |= (digit - 'A' + 10) << shl; break;
default:
raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
goto bail;
}
}
#ifdef Py_UNICODE_WIDE
/* Surrogate pair */
if (c >= 0xd800 && c <= 0xdbff) {
Py_UNICODE c2 = 0;
if (end + 6 >= len) {
raise_errmsg("Invalid \\uXXXX\\uXXXX surrogate pair", pystr,
end - 5);
}
if (buf[next++] != '\\' || buf[next++] != 'u') {
raise_errmsg("Invalid \\uXXXX\\uXXXX surrogate pair", pystr,
end - 5);
}
end += 6;
/* Decode 4 hex digits */
for (; next < end; next++) {
Py_ssize_t shl = (end - next - 1) << 2;
Py_UNICODE digit = buf[next];
switch (digit) {
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
c2 |= (digit - '0') << shl; break;
case 'a': case 'b': case 'c': case 'd': case 'e':
case 'f':
c2 |= (digit - 'a' + 10) << shl; break;
case 'A': case 'B': case 'C': case 'D': case 'E':
case 'F':
c2 |= (digit - 'A' + 10) << shl; break;
default:
raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
goto bail;
}
}
c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
}
#endif
}
chunk = PyUnicode_FromUnicode(&c, 1);
if (chunk == NULL) {
goto bail;
}
if (PyList_Append(chunks, chunk)) {
Py_DECREF(chunk);
goto bail;
}
Py_DECREF(chunk);
}
rval = join_list_unicode(chunks);
if (rval == NULL) {
goto bail;
}
Py_CLEAR(chunks);
return Py_BuildValue("(Nn)", rval, end);
bail:
Py_XDECREF(chunks);
return NULL;
}
static PyObject *
scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict)
{
PyObject *rval;
Py_ssize_t len = PyUnicode_GET_SIZE(pystr);
Py_ssize_t begin = end - 1;
Py_ssize_t next = begin;
const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr);
PyObject *chunks = PyList_New(0);
if (chunks == NULL) {
goto bail;
}
if (end < 0 || len <= end) {
PyErr_SetString(PyExc_ValueError, "end is out of bounds");
goto bail;
}
while (1) {
/* Find the end of the string or the next escape */
Py_UNICODE c = 0;
PyObject *chunk = NULL;
for (next = end; next < len; next++) {
c = buf[next];
if (c == '"' || c == '\\') {
break;
}
else if (strict && c <= 0x1f) {
raise_errmsg("Invalid control character at", pystr, next);
goto bail;
}
}
if (!(c == '"' || c == '\\')) {
raise_errmsg("Unterminated string starting at", pystr, begin);
goto bail;
}
/* Pick up this chunk if it's not zero length */
if (next != end) {
chunk = PyUnicode_FromUnicode(&buf[end], next - end);
if (chunk == NULL) {
goto bail;
}
if (PyList_Append(chunks, chunk)) {
Py_DECREF(chunk);
goto bail;
}
Py_DECREF(chunk);
}
next++;
if (c == '"') {
end = next;
break;
}
if (next == len) {
raise_errmsg("Unterminated string starting at", pystr, begin);
goto bail;
}
c = buf[next];
if (c != 'u') {
/* Non-unicode backslash escapes */
end = next + 1;
switch (c) {
case '"': break;
case '\\': break;
case '/': break;
case 'b': c = '\b'; break;
case 'f': c = '\f'; break;
case 'n': c = '\n'; break;
case 'r': c = '\r'; break;
case 't': c = '\t'; break;
default: c = 0;
}
if (c == 0) {
raise_errmsg("Invalid \\escape", pystr, end - 2);
goto bail;
}
}
else {
c = 0;
next++;
end = next + 4;
if (end >= len) {
raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
goto bail;
}
/* Decode 4 hex digits */
for (; next < end; next++) {
Py_ssize_t shl = (end - next - 1) << 2;
Py_UNICODE digit = buf[next];
switch (digit) {
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
c |= (digit - '0') << shl; break;
case 'a': case 'b': case 'c': case 'd': case 'e':
case 'f':
c |= (digit - 'a' + 10) << shl; break;
case 'A': case 'B': case 'C': case 'D': case 'E':
case 'F':
c |= (digit - 'A' + 10) << shl; break;
default:
raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
goto bail;
}
}
#ifdef Py_UNICODE_WIDE
/* Surrogate pair */
if (c >= 0xd800 && c <= 0xdbff) {
Py_UNICODE c2 = 0;
if (end + 6 >= len) {
raise_errmsg("Invalid \\uXXXX\\uXXXX surrogate pair", pystr,
end - 5);
}
if (buf[next++] != '\\' || buf[next++] != 'u') {
raise_errmsg("Invalid \\uXXXX\\uXXXX surrogate pair", pystr,
end - 5);
}
end += 6;
/* Decode 4 hex digits */
for (; next < end; next++) {
Py_ssize_t shl = (end - next - 1) << 2;
Py_UNICODE digit = buf[next];
switch (digit) {
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
c2 |= (digit - '0') << shl; break;
case 'a': case 'b': case 'c': case 'd': case 'e':
case 'f':
c2 |= (digit - 'a' + 10) << shl; break;
case 'A': case 'B': case 'C': case 'D': case 'E':
case 'F':
c2 |= (digit - 'A' + 10) << shl; break;
default:
raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
goto bail;
}
}
c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
}
#endif
}
chunk = PyUnicode_FromUnicode(&c, 1);
if (chunk == NULL) {
goto bail;
}
if (PyList_Append(chunks, chunk)) {
Py_DECREF(chunk);
goto bail;
}
Py_DECREF(chunk);
}
rval = join_list_unicode(chunks);
if (rval == NULL) {
goto bail;
}
Py_CLEAR(chunks);
return Py_BuildValue("(Nn)", rval, end);
bail:
Py_XDECREF(chunks);
return NULL;
}
PyDoc_STRVAR(pydoc_scanstring,
"scanstring(basestring, end, encoding) -> (str, end)\n");
static PyObject *
py_scanstring(PyObject* self, PyObject *args)
{
PyObject *pystr;
Py_ssize_t end;
char *encoding = NULL;
int strict = 0;
if (!PyArg_ParseTuple(args, "On|zi:scanstring", &pystr, &end, &encoding, &strict)) {
return NULL;
}
if (encoding == NULL) {
encoding = DEFAULT_ENCODING;
}
if (PyString_Check(pystr)) {
return scanstring_str(pystr, end, encoding, strict);
}
else if (PyUnicode_Check(pystr)) {
return scanstring_unicode(pystr, end, strict);
}
else {
PyErr_Format(PyExc_TypeError,
"first argument must be a string or unicode, not %.80s",
Py_TYPE(pystr)->tp_name);
return NULL;
}
}
PyDoc_STRVAR(pydoc_encode_basestring_ascii,
"encode_basestring_ascii(basestring) -> str\n");
static PyObject *
py_encode_basestring_ascii(PyObject* self, PyObject *pystr)
{
/* METH_O */
if (PyString_Check(pystr)) {
return ascii_escape_str(pystr);
}
else if (PyUnicode_Check(pystr)) {
return ascii_escape_unicode(pystr);
}
else {
PyErr_Format(PyExc_TypeError,
"first argument must be a string or unicode, not %.80s",
Py_TYPE(pystr)->tp_name);
return NULL;
}
}
static PyMethodDef json_methods[] = {
{"encode_basestring_ascii", (PyCFunction)py_encode_basestring_ascii,
METH_O, pydoc_encode_basestring_ascii},
{"scanstring", (PyCFunction)py_scanstring, METH_VARARGS,
pydoc_scanstring},
{NULL, NULL, 0, NULL}
};
PyDoc_STRVAR(module_doc,
"json speedups\n");
void
init_json(void)
{
PyObject *m;
m = Py_InitModule3("_json", json_methods, module_doc);
}