1

I'm writing C code for python (Python C API), and I noticed that python is not releasing the memory of the file, I'm wondering if the issue is in my code.

I want to simplify as much as passable, but I hope that no details will be missing.

The file is a binary file with buffers, first 4 bytes is the buffer size, then the buffer.

The binary file (big_file.comp):

du ~/Desktop/TEST_FILES/big_file.comp
4175416 ~/Desktop/TEST_FILES/big_file.comp

The python code (test.py):

#!/usr/bin/env python3

from struct import unpack_from
from psutil import Process
from os import getpid
import decomplib


def file_handler(file_name):
    with open(file_name, 'rb') as reader:
        while True:
            next_4_bytes = reader.read(4)
            if next_4_bytes == b'':
                break
            next_size, *_ = unpack_from("I", next_4_bytes)
            buffer = reader.read(next_size)
            yield buffer, next_size


def main():
    args = _parse_args()
    decompress = decomplib.Decompress()
    for buf, buf_size in file_handler(args.file):
        for msg in decompress.decompress_buffer(buf, buf_size):
            print(msg)


if __name__ == "__main__":
    pid = getpid()
    ps = Process(pid)
    main()
    print(ps.memory_info())

Some of the C code simplified:

#include <Python.h>
#include "structmember.h"

typedef struct {
    PyObject_HEAD
    uint32_t arr_size;
} DecompressObject;


static int Decompress_init(DecompressObject *self, PyObject *args, PyObject *kwds){
    return 0;
}

static PyObject* Decompress_handle_buffer(DecompressObject* self, PyObject* args){
    uint32_t buf_size = 0;
    uint8_t *buf = NULL;

    // get buffer and buffer length from python function
    if(!PyArg_ParseTuple(args, "y*i", &buf, &buf_size)){
        PyErr_SetString(PyExc_Exception, "Failed to parse function arguments");
        return NULL;
    }

    self->arr_size = 10;
    Py_XINCREF(self);
    return (PyObject *) self;
}

static PyObject* Decompress_next(DecompressObject *self, PyObject *Py_UNUSED(ignored)){
    static uint32_t seq_index = 0;
    if (seq_index < self->arr_size) {
        seq_index++;
        Py_RETURN_NONE;
    }
    seq_index = 0;
    return NULL;
}

static void Decompress_dealloc(DecompressObject *self){
    Py_TYPE(self)->tp_free((PyObject *) self);
}


static PyMethodDef Decompress_methods[] = {
    {"decompress_buffer", (PyCFunction) Decompress_handle_buffer, METH_VARARGS, "Decompress a buffer to asc data."},
    {NULL}  /* Sentinel */
};

static PyTypeObject DecompressType = {
    PyVarObject_HEAD_INIT(NULL, 0)
    .tp_name = "decomplib.Decompress",
    .tp_doc = "Decompress object",
    .tp_basicsize = sizeof(DecompressObject),
    .tp_itemsize = 0,
    .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
    .tp_alloc = PyType_GenericAlloc,
    .tp_new = PyType_GenericNew,
    .tp_iter = PyObject_SelfIter,
    .tp_init = (initproc) Decompress_init,
    .tp_dealloc = (destructor) Decompress_dealloc,
    .tp_iternext = (iternextfunc) Decompress_next,
    .tp_methods = Decompress_methods,
};

static PyModuleDef Decompressmodule = {
    PyModuleDef_HEAD_INIT,
    .m_name = "decomplib",
    .m_doc = "Decompress an compressed file.",
    .m_size = -1,
};


PyMODINIT_FUNC PyInit_decomplib(void){
    PyObject *d;
    if (PyType_Ready(&DecompressType) < 0)
        return NULL;

    d = PyModule_Create(&Decompressmodule);
    if (d == NULL)
        return NULL;

    Py_INCREF(&DecompressType);
    if (PyModule_AddObject(d, "Decompress", (PyObject *) &DecompressType) < 0) {
        Py_DECREF(&DecompressType);
        Py_DECREF(d);
        return NULL;
    }

    return d;
}

As a result, I got the following output:

./test.py -f ~/Desktop/TEST_CAN_OPT/big_fie.comp
None
None
None
...
None
None
None
pmem(rss=4349915136, vms=4412583936, shared=6270976, text=2867200, lib=0, data=4344135680, dirty=0)

While playing around I noticed that if I change in the C function Decompress_handle_buffer the call to the function PyArg_ParseTuple the second argument from "y*i" to "Si", Python do cleanup the memory...

./test.py -f ~/Desktop/TEST_CAN_OPT/big_fie.comp
None
None
None
...
None
None
None
pmem(rss=22577152, vms=84869120, shared=6361088, text=2867200, lib=0, data=16420864, dirty=0)

However, The buffer is NOT correctly read.
Any ideas?!

Extra Info:

  • I'm using a virtual machine (VMware Workstation 15)
  • OS Ubuntu 18.4
  • Python 3.6.9

    1 Answer 1

    1

    y* does not correspond to uint8_t like you're using it. As stated in the documentation, it fills a Py_buffer struct that you're supposed to provide.

    You need to actually provide a Py_buffer, and when you're done with it, you need to release the buffer with PyBuffer_Release.

    Sign up to request clarification or add additional context in comments.

    1 Comment

    Thanks a lot, I don't know how I missed that.

    Your Answer

    By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

    Start asking to get answers

    Find the answer to your question by asking.

    Ask question

    Explore related questions

    See similar questions with these tags.