Staging
v0.5.0
https://github.com/python/cpython
Raw File
Tip revision: 4ab4ec258c01aa28d4b2019f2d5d2489e3a1daf1 authored by Georg Brandl on 25 August 2012, 10:16:37 UTC
Bump to 3.3.0rc1.
Tip revision: 4ab4ec2
_lzmamodule.c
/* _lzma - Low-level Python interface to liblzma.

   Initial implementation by Per Øyvind Karlsen.
   Rewritten by Nadeem Vawda.

*/

#define PY_SSIZE_T_CLEAN

#include "Python.h"
#include "structmember.h"
#ifdef WITH_THREAD
#include "pythread.h"
#endif

#include <stdarg.h>
#include <string.h>

#include <lzma.h>


#ifndef PY_LONG_LONG
#error "This module requires PY_LONG_LONG to be defined"
#endif


#ifdef WITH_THREAD
#define ACQUIRE_LOCK(obj) do { \
    if (!PyThread_acquire_lock((obj)->lock, 0)) { \
        Py_BEGIN_ALLOW_THREADS \
        PyThread_acquire_lock((obj)->lock, 1); \
        Py_END_ALLOW_THREADS \
    } } while (0)
#define RELEASE_LOCK(obj) PyThread_release_lock((obj)->lock)
#else
#define ACQUIRE_LOCK(obj)
#define RELEASE_LOCK(obj)
#endif


/* Container formats: */
enum {
    FORMAT_AUTO,
    FORMAT_XZ,
    FORMAT_ALONE,
    FORMAT_RAW,
};

#define LZMA_CHECK_UNKNOWN (LZMA_CHECK_ID_MAX + 1)


typedef struct {
    PyObject_HEAD
    lzma_stream lzs;
    int flushed;
#ifdef WITH_THREAD
    PyThread_type_lock lock;
#endif
} Compressor;

typedef struct {
    PyObject_HEAD
    lzma_stream lzs;
    int check;
    char eof;
    PyObject *unused_data;
#ifdef WITH_THREAD
    PyThread_type_lock lock;
#endif
} Decompressor;

/* LZMAError class object. */
static PyObject *Error;

/* An empty tuple, used by the filter specifier parsing code. */
static PyObject *empty_tuple;


/* Helper functions. */

static int
catch_lzma_error(lzma_ret lzret)
{
    switch (lzret) {
        case LZMA_OK:
        case LZMA_GET_CHECK:
        case LZMA_NO_CHECK:
        case LZMA_STREAM_END:
            return 0;
        case LZMA_UNSUPPORTED_CHECK:
            PyErr_SetString(Error, "Unsupported integrity check");
            return 1;
        case LZMA_MEM_ERROR:
            PyErr_NoMemory();
            return 1;
        case LZMA_MEMLIMIT_ERROR:
            PyErr_SetString(Error, "Memory usage limit exceeded");
            return 1;
        case LZMA_FORMAT_ERROR:
            PyErr_SetString(Error, "Input format not supported by decoder");
            return 1;
        case LZMA_OPTIONS_ERROR:
            PyErr_SetString(Error, "Invalid or unsupported options");
            return 1;
        case LZMA_DATA_ERROR:
            PyErr_SetString(Error, "Corrupt input data");
            return 1;
        case LZMA_BUF_ERROR:
            PyErr_SetString(Error, "Insufficient buffer space");
            return 1;
        case LZMA_PROG_ERROR:
            PyErr_SetString(Error, "Internal error");
            return 1;
        default:
            PyErr_Format(Error, "Unrecognized error from liblzma: %d", lzret);
            return 1;
    }
}

#if BUFSIZ < 8192
#define INITIAL_BUFFER_SIZE 8192
#else
#define INITIAL_BUFFER_SIZE BUFSIZ
#endif

static int
grow_buffer(PyObject **buf)
{
    size_t size = PyBytes_GET_SIZE(*buf);
    return _PyBytes_Resize(buf, size + (size >> 3) + 6);
}


/* Some custom type conversions for PyArg_ParseTupleAndKeywords(),
   since the predefined conversion specifiers do not suit our needs:

      uint32_t - the "I" (unsigned int) specifier is the right size, but
      silently ignores overflows on conversion.

      lzma_vli - the "K" (unsigned PY_LONG_LONG) specifier is the right
      size, but like "I" it silently ignores overflows on conversion.

      lzma_mode and lzma_match_finder - these are enumeration types, and
      so the size of each is implementation-defined. Worse, different
      enum types can be of different sizes within the same program, so
      to be strictly correct, we need to define two separate converters.
 */

#define INT_TYPE_CONVERTER_FUNC(TYPE, FUNCNAME) \
    static int \
    FUNCNAME(PyObject *obj, void *ptr) \
    { \
        unsigned PY_LONG_LONG val; \
        \
        val = PyLong_AsUnsignedLongLong(obj); \
        if (PyErr_Occurred()) \
            return 0; \
        if ((unsigned PY_LONG_LONG)(TYPE)val != val) { \
            PyErr_SetString(PyExc_OverflowError, \
                            "Value too large for " #TYPE " type"); \
            return 0; \
        } \
        *(TYPE *)ptr = (TYPE)val; \
        return 1; \
    }

INT_TYPE_CONVERTER_FUNC(uint32_t, uint32_converter)
INT_TYPE_CONVERTER_FUNC(lzma_vli, lzma_vli_converter)
INT_TYPE_CONVERTER_FUNC(lzma_mode, lzma_mode_converter)
INT_TYPE_CONVERTER_FUNC(lzma_match_finder, lzma_mf_converter)

#undef INT_TYPE_CONVERTER_FUNC


/* Filter specifier parsing.

   This code handles converting filter specifiers (Python dicts) into
   the C lzma_filter structs expected by liblzma. */

static void *
parse_filter_spec_lzma(PyObject *spec)
{
    static char *optnames[] = {"id", "preset", "dict_size", "lc", "lp",
                               "pb", "mode", "nice_len", "mf", "depth", NULL};
    PyObject *id;
    PyObject *preset_obj;
    uint32_t preset = LZMA_PRESET_DEFAULT;
    lzma_options_lzma *options;

    /* First, fill in default values for all the options using a preset.
       Then, override the defaults with any values given by the caller. */

    preset_obj = PyMapping_GetItemString(spec, "preset");
    if (preset_obj == NULL) {
        if (PyErr_ExceptionMatches(PyExc_KeyError))
            PyErr_Clear();
        else
            return NULL;
    } else {
        int ok = uint32_converter(preset_obj, &preset);
        Py_DECREF(preset_obj);
        if (!ok)
            return NULL;
    }

    options = (lzma_options_lzma *)PyMem_Malloc(sizeof *options);
    if (options == NULL)
        return PyErr_NoMemory();
    memset(options, 0, sizeof *options);

    if (lzma_lzma_preset(options, preset)) {
        PyMem_Free(options);
        PyErr_Format(Error, "Invalid compression preset: %d", preset);
        return NULL;
    }

    if (!PyArg_ParseTupleAndKeywords(empty_tuple, spec,
                                     "|OOO&O&O&O&O&O&O&O&", optnames,
                                     &id, &preset_obj,
                                     uint32_converter, &options->dict_size,
                                     uint32_converter, &options->lc,
                                     uint32_converter, &options->lp,
                                     uint32_converter, &options->pb,
                                     lzma_mode_converter, &options->mode,
                                     uint32_converter, &options->nice_len,
                                     lzma_mf_converter, &options->mf,
                                     uint32_converter, &options->depth)) {
        PyErr_SetString(PyExc_ValueError,
                        "Invalid filter specifier for LZMA filter");
        PyMem_Free(options);
        options = NULL;
    }
    return options;
}

static void *
parse_filter_spec_delta(PyObject *spec)
{
    static char *optnames[] = {"id", "dist", NULL};
    PyObject *id;
    uint32_t dist = 1;
    lzma_options_delta *options;

    if (!PyArg_ParseTupleAndKeywords(empty_tuple, spec, "|OO&", optnames,
                                     &id, uint32_converter, &dist)) {
        PyErr_SetString(PyExc_ValueError,
                        "Invalid filter specifier for delta filter");
        return NULL;
    }

    options = (lzma_options_delta *)PyMem_Malloc(sizeof *options);
    if (options == NULL)
        return PyErr_NoMemory();
    memset(options, 0, sizeof *options);
    options->type = LZMA_DELTA_TYPE_BYTE;
    options->dist = dist;
    return options;
}

static void *
parse_filter_spec_bcj(PyObject *spec)
{
    static char *optnames[] = {"id", "start_offset", NULL};
    PyObject *id;
    uint32_t start_offset = 0;
    lzma_options_bcj *options;

    if (!PyArg_ParseTupleAndKeywords(empty_tuple, spec, "|OO&", optnames,
                                     &id, uint32_converter, &start_offset)) {
        PyErr_SetString(PyExc_ValueError,
                        "Invalid filter specifier for BCJ filter");
        return NULL;
    }

    options = (lzma_options_bcj *)PyMem_Malloc(sizeof *options);
    if (options == NULL)
        return PyErr_NoMemory();
    memset(options, 0, sizeof *options);
    options->start_offset = start_offset;
    return options;
}

static void *
parse_filter_spec(lzma_filter *f, PyObject *spec)
{
    PyObject *id_obj;

    if (!PyMapping_Check(spec)) {
        PyErr_SetString(PyExc_TypeError,
                        "Filter specifier must be a dict or dict-like object");
        return NULL;
    }
    id_obj = PyMapping_GetItemString(spec, "id");
    if (id_obj == NULL) {
        if (PyErr_ExceptionMatches(PyExc_KeyError))
            PyErr_SetString(PyExc_ValueError,
                            "Filter specifier must have an \"id\" entry");
        return NULL;
    }
    f->id = PyLong_AsUnsignedLongLong(id_obj);
    Py_DECREF(id_obj);
    if (PyErr_Occurred())
        return NULL;

    switch (f->id) {
        case LZMA_FILTER_LZMA1:
        case LZMA_FILTER_LZMA2:
            f->options = parse_filter_spec_lzma(spec);
            return f->options;
        case LZMA_FILTER_DELTA:
            f->options = parse_filter_spec_delta(spec);
            return f->options;
        case LZMA_FILTER_X86:
        case LZMA_FILTER_POWERPC:
        case LZMA_FILTER_IA64:
        case LZMA_FILTER_ARM:
        case LZMA_FILTER_ARMTHUMB:
        case LZMA_FILTER_SPARC:
            f->options = parse_filter_spec_bcj(spec);
            return f->options;
        default:
            PyErr_Format(PyExc_ValueError, "Invalid filter ID: %llu", f->id);
            return NULL;
    }
}

static void
free_filter_chain(lzma_filter filters[])
{
    int i;

    for (i = 0; filters[i].id != LZMA_VLI_UNKNOWN; i++)
        PyMem_Free(filters[i].options);
}

static int
parse_filter_chain_spec(lzma_filter filters[], PyObject *filterspecs)
{
    Py_ssize_t i, num_filters;

    num_filters = PySequence_Length(filterspecs);
    if (num_filters == -1)
        return -1;
    if (num_filters > LZMA_FILTERS_MAX) {
        PyErr_Format(PyExc_ValueError,
                     "Too many filters - liblzma supports a maximum of %d",
                     LZMA_FILTERS_MAX);
        return -1;
    }

    for (i = 0; i < num_filters; i++) {
        int ok = 1;
        PyObject *spec = PySequence_GetItem(filterspecs, i);
        if (spec == NULL || parse_filter_spec(&filters[i], spec) == NULL)
            ok = 0;
        Py_XDECREF(spec);
        if (!ok) {
            filters[i].id = LZMA_VLI_UNKNOWN;
            free_filter_chain(filters);
            return -1;
        }
    }
    filters[num_filters].id = LZMA_VLI_UNKNOWN;
    return 0;
}


/* Filter specifier construction.

   This code handles converting C lzma_filter structs into
   Python-level filter specifiers (represented as dicts). */

static int
spec_add_field(PyObject *spec, _Py_Identifier *key, unsigned PY_LONG_LONG value)
{
    int status;
    PyObject *value_object;

    value_object = PyLong_FromUnsignedLongLong(value);
    if (value_object == NULL)
        return -1;

    status = _PyDict_SetItemId(spec, key, value_object);
    Py_DECREF(value_object);
    return status;
}

static PyObject *
build_filter_spec(const lzma_filter *f)
{
    PyObject *spec;

    spec = PyDict_New();
    if (spec == NULL)
        return NULL;

#define ADD_FIELD(SOURCE, FIELD) \
    do { \
        _Py_IDENTIFIER(FIELD); \
        if (spec_add_field(spec, &PyId_##FIELD, SOURCE->FIELD) == -1) \
            goto error;\
    } while (0)

    ADD_FIELD(f, id);

    switch (f->id) {
        /* For LZMA1 filters, lzma_properties_{encode,decode}() only look at the
           lc, lp, pb, and dict_size fields. For LZMA2 filters, only the
           dict_size field is used. */
        case LZMA_FILTER_LZMA1: {
            lzma_options_lzma *options = f->options;
            ADD_FIELD(options, lc);
            ADD_FIELD(options, lp);
            ADD_FIELD(options, pb);
            ADD_FIELD(options, dict_size);
            break;
        }
        case LZMA_FILTER_LZMA2: {
            lzma_options_lzma *options = f->options;
            ADD_FIELD(options, dict_size);
            break;
        }
        case LZMA_FILTER_DELTA: {
            lzma_options_delta *options = f->options;
            ADD_FIELD(options, dist);
            break;
        }
        case LZMA_FILTER_X86:
        case LZMA_FILTER_POWERPC:
        case LZMA_FILTER_IA64:
        case LZMA_FILTER_ARM:
        case LZMA_FILTER_ARMTHUMB:
        case LZMA_FILTER_SPARC: {
            lzma_options_bcj *options = f->options;
            ADD_FIELD(options, start_offset);
            break;
        }
        default:
            PyErr_Format(PyExc_ValueError, "Invalid filter ID: %llu", f->id);
            goto error;
    }

#undef ADD_FIELD

    return spec;

error:
    Py_DECREF(spec);
    return NULL;
}


/* LZMACompressor class. */

static PyObject *
compress(Compressor *c, uint8_t *data, size_t len, lzma_action action)
{
    size_t data_size = 0;
    PyObject *result;

    result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE);
    if (result == NULL)
        return NULL;
    c->lzs.next_in = data;
    c->lzs.avail_in = len;
    c->lzs.next_out = (uint8_t *)PyBytes_AS_STRING(result);
    c->lzs.avail_out = PyBytes_GET_SIZE(result);
    for (;;) {
        lzma_ret lzret;

        Py_BEGIN_ALLOW_THREADS
        lzret = lzma_code(&c->lzs, action);
        data_size = (char *)c->lzs.next_out - PyBytes_AS_STRING(result);
        Py_END_ALLOW_THREADS
        if (catch_lzma_error(lzret))
            goto error;
        if ((action == LZMA_RUN && c->lzs.avail_in == 0) ||
            (action == LZMA_FINISH && lzret == LZMA_STREAM_END)) {
            break;
        } else if (c->lzs.avail_out == 0) {
            if (grow_buffer(&result) == -1)
                goto error;
            c->lzs.next_out = (uint8_t *)PyBytes_AS_STRING(result) + data_size;
            c->lzs.avail_out = PyBytes_GET_SIZE(result) - data_size;
        }
    }
    if (data_size != PyBytes_GET_SIZE(result))
        if (_PyBytes_Resize(&result, data_size) == -1)
            goto error;
    return result;

error:
    Py_XDECREF(result);
    return NULL;
}

PyDoc_STRVAR(Compressor_compress_doc,
"compress(data) -> bytes\n"
"\n"
"Provide data to the compressor object. Returns a chunk of\n"
"compressed data if possible, or b\"\" otherwise.\n"
"\n"
"When you have finished providing data to the compressor, call the\n"
"flush() method to finish the conversion process.\n");

static PyObject *
Compressor_compress(Compressor *self, PyObject *args)
{
    Py_buffer buffer;
    PyObject *result = NULL;

    if (!PyArg_ParseTuple(args, "y*:compress", &buffer))
        return NULL;

    ACQUIRE_LOCK(self);
    if (self->flushed)
        PyErr_SetString(PyExc_ValueError, "Compressor has been flushed");
    else
        result = compress(self, buffer.buf, buffer.len, LZMA_RUN);
    RELEASE_LOCK(self);
    PyBuffer_Release(&buffer);
    return result;
}

PyDoc_STRVAR(Compressor_flush_doc,
"flush() -> bytes\n"
"\n"
"Finish the compression process. Returns the compressed data left\n"
"in internal buffers.\n"
"\n"
"The compressor object cannot be used after this method is called.\n");

static PyObject *
Compressor_flush(Compressor *self, PyObject *noargs)
{
    PyObject *result = NULL;

    ACQUIRE_LOCK(self);
    if (self->flushed) {
        PyErr_SetString(PyExc_ValueError, "Repeated call to flush()");
    } else {
        self->flushed = 1;
        result = compress(self, NULL, 0, LZMA_FINISH);
    }
    RELEASE_LOCK(self);
    return result;
}

static int
Compressor_init_xz(lzma_stream *lzs, int check, uint32_t preset,
                   PyObject *filterspecs)
{
    lzma_ret lzret;

    if (filterspecs == Py_None) {
        lzret = lzma_easy_encoder(lzs, preset, check);
    } else {
        lzma_filter filters[LZMA_FILTERS_MAX + 1];

        if (parse_filter_chain_spec(filters, filterspecs) == -1)
            return -1;
        lzret = lzma_stream_encoder(lzs, filters, check);
        free_filter_chain(filters);
    }
    if (catch_lzma_error(lzret))
        return -1;
    else
        return 0;
}

static int
Compressor_init_alone(lzma_stream *lzs, uint32_t preset, PyObject *filterspecs)
{
    lzma_ret lzret;

    if (filterspecs == Py_None) {
        lzma_options_lzma options;

        if (lzma_lzma_preset(&options, preset)) {
            PyErr_Format(Error, "Invalid compression preset: %d", preset);
            return -1;
        }
        lzret = lzma_alone_encoder(lzs, &options);
    } else {
        lzma_filter filters[LZMA_FILTERS_MAX + 1];

        if (parse_filter_chain_spec(filters, filterspecs) == -1)
            return -1;
        if (filters[0].id == LZMA_FILTER_LZMA1 &&
            filters[1].id == LZMA_VLI_UNKNOWN) {
            lzret = lzma_alone_encoder(lzs, filters[0].options);
        } else {
            PyErr_SetString(PyExc_ValueError,
                            "Invalid filter chain for FORMAT_ALONE - "
                            "must be a single LZMA1 filter");
            lzret = LZMA_PROG_ERROR;
        }
        free_filter_chain(filters);
    }
    if (PyErr_Occurred() || catch_lzma_error(lzret))
        return -1;
    else
        return 0;
}

static int
Compressor_init_raw(lzma_stream *lzs, PyObject *filterspecs)
{
    lzma_filter filters[LZMA_FILTERS_MAX + 1];
    lzma_ret lzret;

    if (filterspecs == Py_None) {
        PyErr_SetString(PyExc_ValueError,
                        "Must specify filters for FORMAT_RAW");
        return -1;
    }
    if (parse_filter_chain_spec(filters, filterspecs) == -1)
        return -1;
    lzret = lzma_raw_encoder(lzs, filters);
    free_filter_chain(filters);
    if (catch_lzma_error(lzret))
        return -1;
    else
        return 0;
}

static int
Compressor_init(Compressor *self, PyObject *args, PyObject *kwargs)
{
    static char *arg_names[] = {"format", "check", "preset", "filters", NULL};
    int format = FORMAT_XZ;
    int check = -1;
    uint32_t preset = LZMA_PRESET_DEFAULT;
    PyObject *preset_obj = Py_None;
    PyObject *filterspecs = Py_None;

    if (!PyArg_ParseTupleAndKeywords(args, kwargs,
                                     "|iiOO:LZMACompressor", arg_names,
                                     &format, &check, &preset_obj,
                                     &filterspecs))
        return -1;

    if (format != FORMAT_XZ && check != -1 && check != LZMA_CHECK_NONE) {
        PyErr_SetString(PyExc_ValueError,
                        "Integrity checks are only supported by FORMAT_XZ");
        return -1;
    }

    if (preset_obj != Py_None && filterspecs != Py_None) {
        PyErr_SetString(PyExc_ValueError,
                        "Cannot specify both preset and filter chain");
        return -1;
    }

    if (preset_obj != Py_None)
        if (!uint32_converter(preset_obj, &preset))
            return -1;

#ifdef WITH_THREAD
    self->lock = PyThread_allocate_lock();
    if (self->lock == NULL) {
        PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
        return -1;
    }
#endif

    self->flushed = 0;
    switch (format) {
        case FORMAT_XZ:
            if (check == -1)
                check = LZMA_CHECK_CRC64;
            if (Compressor_init_xz(&self->lzs, check, preset, filterspecs) != 0)
                break;
            return 0;

        case FORMAT_ALONE:
            if (Compressor_init_alone(&self->lzs, preset, filterspecs) != 0)
                break;
            return 0;

        case FORMAT_RAW:
            if (Compressor_init_raw(&self->lzs, filterspecs) != 0)
                break;
            return 0;

        default:
            PyErr_Format(PyExc_ValueError,
                         "Invalid container format: %d", format);
            break;
    }

#ifdef WITH_THREAD
    PyThread_free_lock(self->lock);
    self->lock = NULL;
#endif
    return -1;
}

static void
Compressor_dealloc(Compressor *self)
{
    lzma_end(&self->lzs);
#ifdef WITH_THREAD
    if (self->lock != NULL)
        PyThread_free_lock(self->lock);
#endif
    Py_TYPE(self)->tp_free((PyObject *)self);
}

static PyMethodDef Compressor_methods[] = {
    {"compress", (PyCFunction)Compressor_compress, METH_VARARGS,
     Compressor_compress_doc},
    {"flush", (PyCFunction)Compressor_flush, METH_NOARGS,
     Compressor_flush_doc},
    {NULL}
};

PyDoc_STRVAR(Compressor_doc,
"LZMACompressor(format=FORMAT_XZ, check=-1, preset=None, filters=None)\n"
"\n"
"Create a compressor object for compressing data incrementally.\n"
"\n"
"format specifies the container format to use for the output. This can\n"
"be FORMAT_XZ (default), FORMAT_ALONE, or FORMAT_RAW.\n"
"\n"
"check specifies the integrity check to use. For FORMAT_XZ, the default\n"
"is CHECK_CRC64. FORMAT_ALONE and FORMAT_RAW do not suport integrity\n"
"checks; for these formats, check must be omitted, or be CHECK_NONE.\n"
"\n"
"The settings used by the compressor can be specified either as a\n"
"preset compression level (with the 'preset' argument), or in detail\n"
"as a custom filter chain (with the 'filters' argument). For FORMAT_XZ\n"
"and FORMAT_ALONE, the default is to use the PRESET_DEFAULT preset\n"
"level. For FORMAT_RAW, the caller must always specify a filter chain;\n"
"the raw compressor does not support preset compression levels.\n"
"\n"
"preset (if provided) should be an integer in the range 0-9, optionally\n"
"OR-ed with the constant PRESET_EXTREME.\n"
"\n"
"filters (if provided) should be a sequence of dicts. Each dict should\n"
"have an entry for \"id\" indicating the ID of the filter, plus\n"
"additional entries for options to the filter.\n"
"\n"
"For one-shot compression, use the compress() function instead.\n");

static PyTypeObject Compressor_type = {
    PyVarObject_HEAD_INIT(NULL, 0)
    "_lzma.LZMACompressor",             /* tp_name */
    sizeof(Compressor),                 /* tp_basicsize */
    0,                                  /* tp_itemsize */
    (destructor)Compressor_dealloc,     /* tp_dealloc */
    0,                                  /* tp_print */
    0,                                  /* tp_getattr */
    0,                                  /* tp_setattr */
    0,                                  /* tp_reserved */
    0,                                  /* tp_repr */
    0,                                  /* tp_as_number */
    0,                                  /* tp_as_sequence */
    0,                                  /* tp_as_mapping */
    0,                                  /* tp_hash */
    0,                                  /* tp_call */
    0,                                  /* tp_str */
    0,                                  /* tp_getattro */
    0,                                  /* tp_setattro */
    0,                                  /* tp_as_buffer */
    Py_TPFLAGS_DEFAULT,                 /* tp_flags */
    Compressor_doc,                     /* tp_doc */
    0,                                  /* tp_traverse */
    0,                                  /* tp_clear */
    0,                                  /* tp_richcompare */
    0,                                  /* tp_weaklistoffset */
    0,                                  /* tp_iter */
    0,                                  /* tp_iternext */
    Compressor_methods,                 /* tp_methods */
    0,                                  /* tp_members */
    0,                                  /* tp_getset */
    0,                                  /* tp_base */
    0,                                  /* tp_dict */
    0,                                  /* tp_descr_get */
    0,                                  /* tp_descr_set */
    0,                                  /* tp_dictoffset */
    (initproc)Compressor_init,          /* tp_init */
    0,                                  /* tp_alloc */
    PyType_GenericNew,                  /* tp_new */
};


/* LZMADecompressor class. */

static PyObject *
decompress(Decompressor *d, uint8_t *data, size_t len)
{
    size_t data_size = 0;
    PyObject *result;

    result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE);
    if (result == NULL)
        return NULL;
    d->lzs.next_in = data;
    d->lzs.avail_in = len;
    d->lzs.next_out = (uint8_t *)PyBytes_AS_STRING(result);
    d->lzs.avail_out = PyBytes_GET_SIZE(result);
    for (;;) {
        lzma_ret lzret;

        Py_BEGIN_ALLOW_THREADS
        lzret = lzma_code(&d->lzs, LZMA_RUN);
        data_size = (char *)d->lzs.next_out - PyBytes_AS_STRING(result);
        Py_END_ALLOW_THREADS
        if (catch_lzma_error(lzret))
            goto error;
        if (lzret == LZMA_GET_CHECK || lzret == LZMA_NO_CHECK)
            d->check = lzma_get_check(&d->lzs);
        if (lzret == LZMA_STREAM_END) {
            d->eof = 1;
            if (d->lzs.avail_in > 0) {
                Py_CLEAR(d->unused_data);
                d->unused_data = PyBytes_FromStringAndSize(
                        (char *)d->lzs.next_in, d->lzs.avail_in);
                if (d->unused_data == NULL)
                    goto error;
            }
            break;
        } else if (d->lzs.avail_in == 0) {
            break;
        } else if (d->lzs.avail_out == 0) {
            if (grow_buffer(&result) == -1)
                goto error;
            d->lzs.next_out = (uint8_t *)PyBytes_AS_STRING(result) + data_size;
            d->lzs.avail_out = PyBytes_GET_SIZE(result) - data_size;
        }
    }
    if (data_size != PyBytes_GET_SIZE(result))
        if (_PyBytes_Resize(&result, data_size) == -1)
            goto error;
    return result;

error:
    Py_XDECREF(result);
    return NULL;
}

PyDoc_STRVAR(Decompressor_decompress_doc,
"decompress(data) -> bytes\n"
"\n"
"Provide data to the decompressor object. Returns a chunk of\n"
"decompressed data if possible, or b\"\" otherwise.\n"
"\n"
"Attempting to decompress data after the end of the stream is\n"
"reached raises an EOFError. Any data found after the end of the\n"
"stream is ignored, and saved in the unused_data attribute.\n");

static PyObject *
Decompressor_decompress(Decompressor *self, PyObject *args)
{
    Py_buffer buffer;
    PyObject *result = NULL;

    if (!PyArg_ParseTuple(args, "y*:decompress", &buffer))
        return NULL;

    ACQUIRE_LOCK(self);
    if (self->eof)
        PyErr_SetString(PyExc_EOFError, "Already at end of stream");
    else
        result = decompress(self, buffer.buf, buffer.len);
    RELEASE_LOCK(self);
    PyBuffer_Release(&buffer);
    return result;
}

static int
Decompressor_init_raw(lzma_stream *lzs, PyObject *filterspecs)
{
    lzma_filter filters[LZMA_FILTERS_MAX + 1];
    lzma_ret lzret;

    if (parse_filter_chain_spec(filters, filterspecs) == -1)
        return -1;
    lzret = lzma_raw_decoder(lzs, filters);
    free_filter_chain(filters);
    if (catch_lzma_error(lzret))
        return -1;
    else
        return 0;
}

static int
Decompressor_init(Decompressor *self, PyObject *args, PyObject *kwargs)
{
    static char *arg_names[] = {"format", "memlimit", "filters", NULL};
    const uint32_t decoder_flags = LZMA_TELL_ANY_CHECK | LZMA_TELL_NO_CHECK;
    int format = FORMAT_AUTO;
    uint64_t memlimit = UINT64_MAX;
    PyObject *memlimit_obj = Py_None;
    PyObject *filterspecs = Py_None;
    lzma_ret lzret;

    if (!PyArg_ParseTupleAndKeywords(args, kwargs,
                                     "|iOO:LZMADecompressor", arg_names,
                                     &format, &memlimit_obj, &filterspecs))
        return -1;

    if (memlimit_obj != Py_None) {
        if (format == FORMAT_RAW) {
            PyErr_SetString(PyExc_ValueError,
                            "Cannot specify memory limit with FORMAT_RAW");
            return -1;
        }
        memlimit = PyLong_AsUnsignedLongLong(memlimit_obj);
        if (PyErr_Occurred())
            return -1;
    }

    if (format == FORMAT_RAW && filterspecs == Py_None) {
        PyErr_SetString(PyExc_ValueError,
                        "Must specify filters for FORMAT_RAW");
        return -1;
    } else if (format != FORMAT_RAW && filterspecs != Py_None) {
        PyErr_SetString(PyExc_ValueError,
                        "Cannot specify filters except with FORMAT_RAW");
        return -1;
    }

#ifdef WITH_THREAD
    self->lock = PyThread_allocate_lock();
    if (self->lock == NULL) {
        PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
        return -1;
    }
#endif

    self->check = LZMA_CHECK_UNKNOWN;
    self->unused_data = PyBytes_FromStringAndSize(NULL, 0);
    if (self->unused_data == NULL)
        goto error;

    switch (format) {
        case FORMAT_AUTO:
            lzret = lzma_auto_decoder(&self->lzs, memlimit, decoder_flags);
            if (catch_lzma_error(lzret))
                break;
            return 0;

        case FORMAT_XZ:
            lzret = lzma_stream_decoder(&self->lzs, memlimit, decoder_flags);
            if (catch_lzma_error(lzret))
                break;
            return 0;

        case FORMAT_ALONE:
            self->check = LZMA_CHECK_NONE;
            lzret = lzma_alone_decoder(&self->lzs, memlimit);
            if (catch_lzma_error(lzret))
                break;
            return 0;

        case FORMAT_RAW:
            self->check = LZMA_CHECK_NONE;
            if (Decompressor_init_raw(&self->lzs, filterspecs) == -1)
                break;
            return 0;

        default:
            PyErr_Format(PyExc_ValueError,
                         "Invalid container format: %d", format);
            break;
    }

error:
    Py_CLEAR(self->unused_data);
#ifdef WITH_THREAD
    PyThread_free_lock(self->lock);
    self->lock = NULL;
#endif
    return -1;
}

static void
Decompressor_dealloc(Decompressor *self)
{
    lzma_end(&self->lzs);
    Py_CLEAR(self->unused_data);
#ifdef WITH_THREAD
    if (self->lock != NULL)
        PyThread_free_lock(self->lock);
#endif
    Py_TYPE(self)->tp_free((PyObject *)self);
}

static PyMethodDef Decompressor_methods[] = {
    {"decompress", (PyCFunction)Decompressor_decompress, METH_VARARGS,
     Decompressor_decompress_doc},
    {NULL}
};

PyDoc_STRVAR(Decompressor_check_doc,
"ID of the integrity check used by the input stream.");

PyDoc_STRVAR(Decompressor_eof_doc,
"True if the end-of-stream marker has been reached.");

PyDoc_STRVAR(Decompressor_unused_data_doc,
"Data found after the end of the compressed stream.");

static PyMemberDef Decompressor_members[] = {
    {"check", T_INT, offsetof(Decompressor, check), READONLY,
     Decompressor_check_doc},
    {"eof", T_BOOL, offsetof(Decompressor, eof), READONLY,
     Decompressor_eof_doc},
    {"unused_data", T_OBJECT_EX, offsetof(Decompressor, unused_data), READONLY,
     Decompressor_unused_data_doc},
    {NULL}
};

PyDoc_STRVAR(Decompressor_doc,
"LZMADecompressor(format=FORMAT_AUTO, memlimit=None, filters=None)\n"
"\n"
"Create a decompressor object for decompressing data incrementally.\n"
"\n"
"format specifies the container format of the input stream. If this is\n"
"FORMAT_AUTO (the default), the decompressor will automatically detect\n"
"whether the input is FORMAT_XZ or FORMAT_ALONE. Streams created with\n"
"FORMAT_RAW cannot be autodetected.\n"
"\n"
"memlimit can be specified to limit the amount of memory used by the\n"
"decompressor. This will cause decompression to fail if the input\n"
"cannot be decompressed within the given limit.\n"
"\n"
"filters specifies a custom filter chain. This argument is required for\n"
"FORMAT_RAW, and not accepted with any other format. When provided,\n"
"this should be a sequence of dicts, each indicating the ID and options\n"
"for a single filter.\n"
"\n"
"For one-shot decompression, use the decompress() function instead.\n");

static PyTypeObject Decompressor_type = {
    PyVarObject_HEAD_INIT(NULL, 0)
    "_lzma.LZMADecompressor",           /* tp_name */
    sizeof(Decompressor),               /* tp_basicsize */
    0,                                  /* tp_itemsize */
    (destructor)Decompressor_dealloc,   /* tp_dealloc */
    0,                                  /* tp_print */
    0,                                  /* tp_getattr */
    0,                                  /* tp_setattr */
    0,                                  /* tp_reserved */
    0,                                  /* tp_repr */
    0,                                  /* tp_as_number */
    0,                                  /* tp_as_sequence */
    0,                                  /* tp_as_mapping */
    0,                                  /* tp_hash */
    0,                                  /* tp_call */
    0,                                  /* tp_str */
    0,                                  /* tp_getattro */
    0,                                  /* tp_setattro */
    0,                                  /* tp_as_buffer */
    Py_TPFLAGS_DEFAULT,                 /* tp_flags */
    Decompressor_doc,                   /* tp_doc */
    0,                                  /* tp_traverse */
    0,                                  /* tp_clear */
    0,                                  /* tp_richcompare */
    0,                                  /* tp_weaklistoffset */
    0,                                  /* tp_iter */
    0,                                  /* tp_iternext */
    Decompressor_methods,               /* tp_methods */
    Decompressor_members,               /* tp_members */
    0,                                  /* tp_getset */
    0,                                  /* tp_base */
    0,                                  /* tp_dict */
    0,                                  /* tp_descr_get */
    0,                                  /* tp_descr_set */
    0,                                  /* tp_dictoffset */
    (initproc)Decompressor_init,        /* tp_init */
    0,                                  /* tp_alloc */
    PyType_GenericNew,                  /* tp_new */
};


/* Module-level functions. */

PyDoc_STRVAR(is_check_supported_doc,
"is_check_supported(check_id) -> bool\n"
"\n"
"Test whether the given integrity check is supported.\n"
"\n"
"Always returns True for CHECK_NONE and CHECK_CRC32.\n");

static PyObject *
is_check_supported(PyObject *self, PyObject *args)
{
    int check_id;

    if (!PyArg_ParseTuple(args, "i:is_check_supported", &check_id))
        return NULL;

    return PyBool_FromLong(lzma_check_is_supported(check_id));
}


PyDoc_STRVAR(_encode_filter_properties_doc,
"_encode_filter_properties(filter) -> bytes\n"
"\n"
"Return a bytes object encoding the options (properties) of the filter\n"
"specified by *filter* (a dict).\n"
"\n"
"The result does not include the filter ID itself, only the options.\n");

static PyObject *
_encode_filter_properties(PyObject *self, PyObject *args)
{
    PyObject *filterspec;
    lzma_filter filter;
    lzma_ret lzret;
    uint32_t encoded_size;
    PyObject *result = NULL;

    if (!PyArg_ParseTuple(args, "O:_encode_filter_properties", &filterspec))
        return NULL;

    if (parse_filter_spec(&filter, filterspec) == NULL)
        return NULL;

    lzret = lzma_properties_size(&encoded_size, &filter);
    if (catch_lzma_error(lzret))
        goto error;

    result = PyBytes_FromStringAndSize(NULL, encoded_size);
    if (result == NULL)
        goto error;

    lzret = lzma_properties_encode(
            &filter, (uint8_t *)PyBytes_AS_STRING(result));
    if (catch_lzma_error(lzret))
        goto error;

    PyMem_Free(filter.options);
    return result;

error:
    Py_XDECREF(result);
    PyMem_Free(filter.options);
    return NULL;
}


PyDoc_STRVAR(_decode_filter_properties_doc,
"_decode_filter_properties(filter_id, encoded_props) -> dict\n"
"\n"
"Return a dict describing a filter with ID *filter_id*, and options\n"
"(properties) decoded from the bytes object *encoded_props*.\n");

static PyObject *
_decode_filter_properties(PyObject *self, PyObject *args)
{
    Py_buffer encoded_props;
    lzma_filter filter;
    lzma_ret lzret;
    PyObject *result = NULL;

    if (!PyArg_ParseTuple(args, "O&y*:_decode_filter_properties",
                          lzma_vli_converter, &filter.id, &encoded_props))
        return NULL;

    lzret = lzma_properties_decode(
            &filter, NULL, encoded_props.buf, encoded_props.len);
    PyBuffer_Release(&encoded_props);
    if (catch_lzma_error(lzret))
        return NULL;

    result = build_filter_spec(&filter);

    /* We use vanilla free() here instead of PyMem_Free() - filter.options was
       allocated by lzma_properties_decode() using the default allocator. */
    free(filter.options);
    return result;
}


/* Module initialization. */

static PyMethodDef module_methods[] = {
    {"is_check_supported", (PyCFunction)is_check_supported,
     METH_VARARGS, is_check_supported_doc},
    {"_encode_filter_properties", (PyCFunction)_encode_filter_properties,
     METH_VARARGS, _encode_filter_properties_doc},
    {"_decode_filter_properties", (PyCFunction)_decode_filter_properties,
     METH_VARARGS, _decode_filter_properties_doc},
    {NULL}
};

static PyModuleDef _lzmamodule = {
    PyModuleDef_HEAD_INIT,
    "_lzma",
    NULL,
    -1,
    module_methods,
    NULL,
    NULL,
    NULL,
    NULL,
};

/* Some of our constants are more than 32 bits wide, so PyModule_AddIntConstant
   would not work correctly on platforms with 32-bit longs. */
static int
module_add_int_constant(PyObject *m, const char *name, PY_LONG_LONG value)
{
    PyObject *o = PyLong_FromLongLong(value);
    if (o == NULL)
        return -1;
    if (PyModule_AddObject(m, name, o) == 0)
        return 0;
    Py_DECREF(o);
    return -1;
}

#define ADD_INT_PREFIX_MACRO(m, macro) \
    module_add_int_constant(m, #macro, LZMA_ ## macro)

PyMODINIT_FUNC
PyInit__lzma(void)
{
    PyObject *m;

    empty_tuple = PyTuple_New(0);
    if (empty_tuple == NULL)
        return NULL;

    m = PyModule_Create(&_lzmamodule);
    if (m == NULL)
        return NULL;

    if (PyModule_AddIntMacro(m, FORMAT_AUTO) == -1 ||
        PyModule_AddIntMacro(m, FORMAT_XZ) == -1 ||
        PyModule_AddIntMacro(m, FORMAT_ALONE) == -1 ||
        PyModule_AddIntMacro(m, FORMAT_RAW) == -1 ||
        ADD_INT_PREFIX_MACRO(m, CHECK_NONE) == -1 ||
        ADD_INT_PREFIX_MACRO(m, CHECK_CRC32) == -1 ||
        ADD_INT_PREFIX_MACRO(m, CHECK_CRC64) == -1 ||
        ADD_INT_PREFIX_MACRO(m, CHECK_SHA256) == -1 ||
        ADD_INT_PREFIX_MACRO(m, CHECK_ID_MAX) == -1 ||
        ADD_INT_PREFIX_MACRO(m, CHECK_UNKNOWN) == -1 ||
        ADD_INT_PREFIX_MACRO(m, FILTER_LZMA1) == -1 ||
        ADD_INT_PREFIX_MACRO(m, FILTER_LZMA2) == -1 ||
        ADD_INT_PREFIX_MACRO(m, FILTER_DELTA) == -1 ||
        ADD_INT_PREFIX_MACRO(m, FILTER_X86) == -1 ||
        ADD_INT_PREFIX_MACRO(m, FILTER_IA64) == -1 ||
        ADD_INT_PREFIX_MACRO(m, FILTER_ARM) == -1 ||
        ADD_INT_PREFIX_MACRO(m, FILTER_ARMTHUMB) == -1 ||
        ADD_INT_PREFIX_MACRO(m, FILTER_SPARC) == -1 ||
        ADD_INT_PREFIX_MACRO(m, FILTER_POWERPC) == -1 ||
        ADD_INT_PREFIX_MACRO(m, MF_HC3) == -1 ||
        ADD_INT_PREFIX_MACRO(m, MF_HC4) == -1 ||
        ADD_INT_PREFIX_MACRO(m, MF_BT2) == -1 ||
        ADD_INT_PREFIX_MACRO(m, MF_BT3) == -1 ||
        ADD_INT_PREFIX_MACRO(m, MF_BT4) == -1 ||
        ADD_INT_PREFIX_MACRO(m, MODE_FAST) == -1 ||
        ADD_INT_PREFIX_MACRO(m, MODE_NORMAL) == -1 ||
        ADD_INT_PREFIX_MACRO(m, PRESET_DEFAULT) == -1 ||
        ADD_INT_PREFIX_MACRO(m, PRESET_EXTREME) == -1)
        return NULL;

    Error = PyErr_NewExceptionWithDoc(
            "_lzma.LZMAError", "Call to liblzma failed.", NULL, NULL);
    if (Error == NULL)
        return NULL;
    Py_INCREF(Error);
    if (PyModule_AddObject(m, "LZMAError", Error) == -1)
        return NULL;

    if (PyType_Ready(&Compressor_type) == -1)
        return NULL;
    Py_INCREF(&Compressor_type);
    if (PyModule_AddObject(m, "LZMACompressor",
                           (PyObject *)&Compressor_type) == -1)
        return NULL;

    if (PyType_Ready(&Decompressor_type) == -1)
        return NULL;
    Py_INCREF(&Decompressor_type);
    if (PyModule_AddObject(m, "LZMADecompressor",
                           (PyObject *)&Decompressor_type) == -1)
        return NULL;

    return m;
}
back to top