Python classes which can be used as arrays with any number of bits per index

Question

https://github.com/speedrun-program/small_array/blob/main/small_array.py

As a personal project, I wanted to try to make Snake Game in pygame where the grid can be arbitrarily large. I made these classes so I could make larger grids before running out of memory.

class nbitndarray:
    "a class which lets you use a byte array as if it was an arbitrarily deeply nested "
    "array with each index being any number of bits wide"
    
    def __init__(self, dimensions, bits_per_index):
        self.bits_per_index = bits_per_index
        self.dimensions = tuple(dimensions)
        
        if any(d <= 0 for d in dimensions):
            raise ValueError("all dimensions must be greater than 0")
        elif bits_per_index <= 0:
            raise ValueError("bits_per_index must be greater than 0")
        elif not dimensions:
            raise ValueError("no dimensions given")
        
        total_bits = bits_per_index
        for d in dimensions:
            total_bits *= d
        
        total_bytes = (total_bits // 8) + (total_bits % 8 != 0)
        self._array = bytearray(total_bytes)
    
    
    def _index_generator(self, position):
        for depth, (pos, dimension_length) in enumerate(zip(position, self.dimensions)):
            actual_position = pos if pos >= 0 else dimension_length + pos
            if not 0 <= actual_position < dimension_length:
                raise IndexError(f"attempted to access index {pos} of dimension {depth}, which is length {dimension_length}")
            
            yield actual_position
    
    
    def _get_actual_position(self, position): # returns (start_byte, start_bit)
        if (len(position) != len(self.dimensions)):
            raise ValueError(
                f"position argument had {len(position)} dimensions, "
                f"but self.dimensions has {len(self.dimensions)} dimensions"
            )
        
        index_generator = self._index_generator(position)
        which_bit = next(index_generator)
        for i, pos in enumerate(index_generator, 1):
            which_bit = (which_bit * self.dimensions[i]) + pos
        which_bit *= self.bits_per_index
        
        return divmod(which_bit, 8)
    
    
    def zero_bytes(self):
        self._array.__init__(len(self._array))
    
    
    def get(self, position):
        which_byte, byte_start_position = self._get_actual_position(position)
        bits_left_to_read = self.bits_per_index
        current_bit_position = min(8 - byte_start_position, bits_left_to_read)
        
        # reading first byte
        value = (self._array[which_byte] >> byte_start_position) & ((1 << current_bit_position) - 1)
        bits_left_to_read -= current_bit_position
        which_byte += 1
        
        # reading middle byte(s)
        while bits_left_to_read >= 8:
            value += self._array[which_byte] << current_bit_position
            bits_left_to_read -= 8
            current_bit_position += 8
            which_byte += 1
        
        #reading last byte
        if bits_left_to_read > 0:
            value += (self._array[which_byte] & ((1 << bits_left_to_read) - 1)) << current_bit_position
        
        return value
    
    
    def set(self, position, new_value):
        if new_value > ((1 << self.bits_per_index) - 1) or new_value < 0:
            raise ValueError(f"value must be in range(0, {((1 << self.bits_per_index) - 1)})")
        
        which_byte, byte_start_position = self._get_actual_position(position)
        bits_left_to_set = self.bits_per_index
        
        # setting first byte
        bits_left_in_first_byte = min(8 - byte_start_position, bits_left_to_set)
        first_byte_start_bits = self._array[which_byte] & ((1 << byte_start_position) - 1)
        current_value_to_write = new_value & ((1 << bits_left_in_first_byte) - 1)
        self._array[which_byte] >>= byte_start_position + bits_left_in_first_byte
        self._array[which_byte] <<= bits_left_in_first_byte
        self._array[which_byte] += current_value_to_write
        self._array[which_byte] <<= byte_start_position
        self._array[which_byte] += first_byte_start_bits
        new_value >>= bits_left_in_first_byte
        bits_left_to_set -= bits_left_in_first_byte
        which_byte += 1
        
        # setting middle byte(s)
        while bits_left_to_set >= 8:
            current_value_to_write = new_value & 0b11111111
            self._array[which_byte] = new_value
            new_value >>= 8
            bits_left_to_set -= 8
            which_byte += 1
        
        # setting last bit
        if bits_left_to_set > 0:
            self._array[which_byte] >>= bits_left_to_set
            self._array[which_byte] <<= bits_left_to_set
            self._array[which_byte] += new_value


class nbitarray(nbitndarray):
    "a class which lets you use a byte array as if it was an "
    "array with each index being any number of bits wide"
    
    def __init__(self, bits_per_index, initial_length=0):
        if bits_per_index <= 0:
            raise ValueError("bits_per_index must be greater than 0")
        elif initial_length < 0:
            raise ValueError("initial_length must be greater than or equal to 0")
        
        self.bits_per_index = bits_per_index
        self.length = initial_length
        
        total_bits = bits_per_index * initial_length
        total_bytes = (total_bits // 8) + (total_bits % 8 != 0)
        self._array = bytearray(total_bytes)
    
    
    def _get_actual_position(self, idx):
        non_negative_idx = idx if idx >= 0 else self.length + idx
        
        if not 0 <= non_negative_idx < self.length:
            raise IndexError(f"attempted to access index {idx} of nbitarray, which is length {self.length}")
        
        return divmod(non_negative_idx * self.bits_per_index, 8)
    
    
    def append(self, new_value):
        # allocating more space if necessary
        total_bits = self.bits_per_index * (self.length + 1)
        total_bytes = (total_bits // 8) + (total_bits % 8 != 0)
        while len(self._array) < total_bytes:
            self._array.append(0)
        
        self.length += 1
        self.set(-1, new_value)

If capacity is the one thing you're optimising for, you'll want to persist to the hard drive and use compression — Reinderien
– Reinderien, Commented Sep 6, 2022 at 2:45
Much much more important than bit packing is using a sparse matrix. It seems like you're not doing that, although I've only given the code a cursory look-over. — Reinderien
– Reinderien, Commented Sep 6, 2022 at 2:47

AJNeufeld · Accepted Answer · 2022-09-06 17:36:24Z

PEP-8

Your code does not conform to PEP 8: The Style Guide for Python Code

In particular, CapitalizedWords should be used for ClassNames. This means class nbitndarray should be (perhaps) class NBitNDArray.

Doc Strings

Your doc-strings are not created properly.

class nbitndarray:
    "a class which lets you use a byte array as if it was an arbitrarily deeply nested "
    "array with each index being any number of bits wide"
    
    ...

If you type help(nbitndarray) in an interactive shell, you'll get:

>>> help(nbitndarray)
Help on class nbitndarray in module __main__:

class nbitndarray(builtins.object)
 |  a class which lets you use a byte array as if it was an arbitrarily deeply nested
 |  
 |  Data descriptors defined here:
 |  ...

Notice that the description ends abruptly at "deeply nested". The remainder of the the intended docstring, "array with each index being any number of bits wide" is missing.

The docstring must be created as a single string. For multi-line docstrings (or simply, for every docstring), use a triple-quoted """...""" string:

class nbitndarray:
    """a class which lets you use a byte array as if it was an arbitrarily deeply nested
       array with each index being any number of bits wide"""

    ...

Inheritance

Nowhere are you calling the constructor for your parent class:

class nbitarray(nbitndarray):
    
    def __init__(self, bits_per_index, initial_length=0):
        ...
        super().__init__(dimensions, bits_per_index)   # <--- MISSING
        ...

This must be done for proper subclassing.

Bit packing

You are trying to pack a (multi-dimensional) array into consecutive bits of a bytearray.

It would be simpler to pack the array into a bitarray, and leave the bitarray responsible for the splitting consecutive bits across byte boundaries.

First, install the bitarray module: pip install --upgrade bitarray

Once the bitarray module has been installed, it is easily used for packing & unpacking integers into the bitarray. For example, a packed 2x3 array of 5-bit values could be represented by a 30 bit bitarray:

>>> import bitarray, bitarray.util
>>> ba = bitarray.bitarray([0]*2*3*5)
>>> for i in range(6):
       ba[5*i:5*i+5] = bitarray.util.int2ba(i*2, 5)

>>> ba
bitarray('000000001000100001100100001010')
>>> for i in range(6):
        print(bitarray.util.ba2int(ba[5*i:5*i+5]))
    
0
2
4
6
8
10

Your multi-dimensional array could convert the dimensions into a 1-dimensional index, and the the 1-dimensional index could then index into the bitarray.

In terms of inheritance, I would reverse your class structure. The 1-d array should be the base class, and the multiple dimension array would be the subclass.

class NBitArray:
    def __init__(self, bits, length):
        self._bits = bitarray.bitarray(bits * length)
        self._bits.setall(0)
        ...
    ...

class NBitNDArray(NBitArray):
    def __init__(self, bits, dimensions):
        length = ...
        super().__init__(bits, length)
        ...
    ...

Stack Exchange Network

Python classes which can be used as arrays with any number of bits per index

1 Answer 1

PEP-8

Doc Strings

Inheritance

Bit packing

You must log in to answer this question.

Hot Network Questions

Python classes which can be used as arrays with any number of bits per index

1 Answer 1

PEP-8

Doc Strings

Inheritance

Bit packing

You must log in to answer this question.

Related

Hot Network Questions