3

I'm creating a tool that analyses code used within definition of user defined function, for example:

Test 1 ✅

def help_fun(a, b): return a * b
def test1(numbers):
    r1, r2 = np.multiply.reduce(numbers), sum(numbers)/len(numbers)
    r = math.sin(help_fun(r1, r2))
    return math.sqrt(r)

I understand, however, how to interpret results of dis.dis(test1):

3           0 LOAD_GLOBAL              0 (np)
            2 LOAD_ATTR                1 (multiply)
            4 LOAD_METHOD              2 (reduce)
            6 LOAD_FAST                0 (numbers)
            8 CALL_METHOD              1
           10 LOAD_GLOBAL              3 (sum)
           ...
5          46 LOAD_GLOBAL              5 (math)
           48 LOAD_METHOD              8 (sqrt)
           50 LOAD_FAST                3 (r)
           52 CALL_METHOD              1
           54 RETURN_VALUE

My expect output is:

{0: {'functions': ['sum', 'len', 'help_fun'], 
     'methods': ['np.multiply.reduce', 'math.sin', 'math.sqrt']}} #frame 0: scope of test1

In order to collect function and method names, I implement a wrapper for contents of stack of disassemler and use a specific way to extract these names from stack archive.

import types
import dis
def get_frames(code):
    '''given <function>.__code__ instance, iterate each code frame
    >>> [type(x) for x in get_frames(test1.__code__)]
    [code]
    '''
    yield code
    for c in code.co_consts:
        if isinstance(c, types.CodeType):
            yield from get_frames(c)
            break

def get_calls(instructions, stack):
    '''get called functions and methods in CALL_FUNCTION and CALL_METHOD opnames'''
    functions, methods = [], []
    for idx, instr in enumerate(instructions):
        if instr.opname == 'CALL_FUNCTION':
            functions.append(stack[idx - 1][- 1 - instr.arg])
        elif instr.opname == 'CALL_METHOD':
            methods.append(stack[idx - 1][- 1 - instr.arg])
    return {'functions': functions, 'methods': methods}

def get_stack(instructions):
    '''Wrapper for stack contents'''
    stack = []
    for n in instructions:
        if n.opname in ('LOAD_FAST', 'LOAD_GLOBAL', 'LOAD_CONST'):
            stack.append(n.argrepr) #global var
        elif n.opname in ('LOAD_METHOD', 'LOAD_ATTR'):
            stack[-1] = f'{stack[-1]}.{n.argrepr}'
        elif n.opname in ('CALL_FUNCTION', 'CALL_METHOD'):
            args = stack[-n.arg:]
            del stack[-n.arg:]
            stack[-1] = f'{stack[-1]}({", ".join(args)})'
        elif n.opname == 'BINARY_TRUE_DIVIDE':
            stack[-2:] = [' / '.join(stack[-2:])]
        elif n.opname == 'STORE_FAST':
            del stack[-1]
        elif n.opname == 'ROT_TWO':
            stack[-1], stack[-2] = stack[-2], stack[-1]
        elif n.opname == 'GET_ITER':
            stack[-1] = f'iter({stack[-1]})'
        yield stack.copy()

code = list(get_frames(test1.__code__))
out = dict()
for i, c in enumerate(code):
    instructions = dis.Bytecode(c)
    stack = list(get_stack(instructions))
    out[i] = get_calls(instructions, stack)
out
>>> {0: {'functions': ['sum', 'len', 'help_fun'], 'methods': ['np.multiply.reduce', 'math.sin', 'math.sqrt']}}

In my approach names of functions and methods are extracted from stack column of table:

|   line | opname             |   arg | argrepr   | stack                                                    |
|--------|--------------------|-------|-----------|----------------------------------------------------------|
|      3 | LOAD_GLOBAL        |     0 | np        | np                                                       |
|        | LOAD_ATTR          |     1 | multiply  | np.multiply                                              |
|        | LOAD_METHOD        |     2 | reduce    | np.multiply.reduce                                       |
|        | LOAD_FAST          |     0 | numbers   | np.multiply.reduce, numbers                              |
|        | CALL_METHOD        |     1 |           | np.multiply.reduce(numbers)                              |
|        | LOAD_GLOBAL        |     3 | sum       | np.multiply.reduce(numbers), sum                         |
|        | LOAD_FAST          |     0 | numbers   | np.multiply.reduce(numbers), sum, numbers                |
|        | CALL_FUNCTION      |     1 |           | np.multiply.reduce(numbers), sum(numbers)                |
|        | LOAD_GLOBAL        |     4 | len       | np.multiply.reduce(numbers), sum(numbers), len           |
|        | LOAD_FAST          |     0 | numbers   | np.multiply.reduce(numbers), sum(numbers), len, numbers  |
|        | CALL_FUNCTION      |     1 |           | np.multiply.reduce(numbers), sum(numbers), len(numbers)  |
|        | BINARY_TRUE_DIVIDE |       |           | np.multiply.reduce(numbers), sum(numbers) / len(numbers) |
|        | ROT_TWO            |       |           | sum(numbers) / len(numbers), np.multiply.reduce(numbers) |
|        | STORE_FAST         |     1 | r1        | sum(numbers) / len(numbers)                              |
|        | STORE_FAST         |     2 | r2        |                                                          |
|      4 | LOAD_GLOBAL        |     5 | math      | math                                                     |
|        | LOAD_METHOD        |     6 | sin       | math.sin                                                 |
|        | LOAD_GLOBAL        |     7 | help_fun  | math.sin, help_fun                                       |
|        | LOAD_FAST          |     1 | r1        | math.sin, help_fun, r1                                   |
|        | LOAD_FAST          |     2 | r2        | math.sin, help_fun, r1, r2                               |
|        | CALL_FUNCTION      |     2 |           | math.sin, help_fun(r1, r2)                               |
|        | CALL_METHOD        |     1 |           | math.sin(help_fun(r1, r2))                               |
|        | STORE_FAST         |     3 | r         |                                                          |
|      5 | LOAD_GLOBAL        |     5 | math      | math                                                     |
|        | LOAD_METHOD        |     8 | sqrt      | math.sqrt                                                |
|        | LOAD_FAST          |     3 | r         | math.sqrt, r                                             |
|        | CALL_METHOD        |     1 |           | math.sqrt(r)                                             |
|        | RETURN_VALUE       |       |           | math.sqrt(r)                                             |

However, things get more complicated if other kind of opnames in my instructions are included. For instance, I'm not sure about behaviour of stack if there are any list comprehensions used. Getting names of methods and functions might display incorrect results:

Test 2 ❌

(<listcomp> is not a function but my wrapper things it is)

def test2(x): 
    return [[math.sqrt(m) for m in list(n)] for n in x]

Expected output:

{0: {'functions': [], 'methods': []}, #frame 0: scope of test2
 1: {'functions': ['list'], 'methods': [], #frame 1: scope of <listcomp>
 2: {'functions': [], 'methods': ['math.sqrt']}} #frame 2: scope of <listcomp>

Output of current code:

{0: {'functions': ["'test6.<locals>.<listcomp>'"], 'methods': []},
 1: {'functions': ['list', "'test6.<locals>.<listcomp>.<listcomp>'"], 'methods': []}, 
 2: {'functions': [], 'methods': ['math.sqrt']}}

Test 3 ❌

It might also crash sometimes because I am not sure what's happening with stack:

def test3(x,y):
    return [pow(i,j) for i,j in zip(x,y)]

Expected output:

{0: {'functions': ['zip'], 'methods': []},
 1: {'functions': ['pow'], 'methods': []}}

It crashes after second STORE_FAST command tries to pop item from empty stack. The instructions of second scope looks like:

|   line | opname          |   arg | argrepr   | stack   |
|--------|-----------------|-------|-----------|---------|
|    104 | BUILD_LIST      |     0 |           |         |
|        | LOAD_FAST       |     0 | .0        | .0      |
|        | FOR_ITER        |    18 | to 24     | .0      |
|        | UNPACK_SEQUENCE |     2 |           | .0      |
|        | STORE_FAST      |     1 | i         |         |
|        | STORE_FAST      |     2 | j         | ???     | ### stuck here
|        | LOAD_GLOBAL     |     0 | pow       | ???     |
|        | LOAD_FAST       |     1 | i         | ???     |
|        | LOAD_FAST       |     2 | j         | ???     |
|        | CALL_FUNCTION   |     2 |           | ???     |
|        | LIST_APPEND     |     2 |           | ???     |
|        | JUMP_ABSOLUTE   |     4 |           | ???     |
|        | RETURN_VALUE    |       |           | ???     |

Is there any easier way to get names of methods and functions used inside a caller? Are there any better ways to get archive of stack? I know, my implementation of get_stack is poor at the moment, I'm looking for a different approach or better documentation of stack control.

Remarks

  • I can add more tests and wrapper corrections if needed
  • Attributes (or methods) of class instances also involves, e.g. ls.append
7
  • What output are you expecting for test2? Commented Dec 12, 2022 at 11:26
  • 1
    And what output would you expect for a = math.sqrt; a(9)? Commented Dec 12, 2022 at 11:46
  • @Hack5 Nice catch. For any test I'm running list(get_frames(test.__code__)) on my script for further processing of test (as it is shown in my code). If I try to assign a = math.sqrt instead of function definition, a becomes builtin_function_or_method, not a function. It does not have attribute __code__. Hence I do not expect to further process global variables like a = math.sqrt. Updated a question. Commented Dec 12, 2022 at 14:40
  • Could you clarify your question? Your approach to viewing the stack seems valid, but this looks like a potential XY problem. What are you actually trying to do? You may not need to do any disassembly at all. And what is wrong with your existing code? If you filter out unwanted invocations of listcomps, it works perfectly on my machine. Commented Dec 12, 2022 at 15:47
  • At the moment I'm trying to find names of methods and functions used within scope of a caller. I'm convinced that viewing stack items solves it and share my efforts. Disassembling won't necessarily be the only way for solving it. If someone finds a different way, I'll accept that answer too. Commented Dec 12, 2022 at 18:09

1 Answer 1

2
+100

If you want your code to be more portable across Python versions, you should consider using ast. Although the AST does change across versions, it usually does so in ways that cause easy-to-understand errors.

To fix your code, you will need to implement a lot more operations. I did this by ignoring most things and just applying their stack effect (the number of elements placed or removed from the stack).

import dis
import inspect


def iterate_stack(instructions):
    stack = []
    called = []
    for instruction in instructions:
        old_stack_len = len(stack)
        if instruction.opname == "ROT_TWO":
            stack[-1], stack[-2] = stack[-2], stack[-1]
        elif instruction.opname == "ROT_THREE":
            stack[-1], stack[-2], stack[-3] = stack[-2], stack[-3], stack[-1]
        elif instruction.opname == "ROT_FOUR":
            stack[-1], stack[-2], stack[-3], stack[-4] = stack[-2], stack[-3], stack[-4], stack[-1]
        elif instruction.opname == "DUP_TOP":
            stack.append(stack[-1])
        elif instruction.opname == "DUP_TOP_TWO":
            stack.extend(stack[-2:])
        elif instruction.opname == "LOAD_ASSERTION_ERROR":
            stack.append("AssertionError")
        elif instruction.opname == "LOAD_NAME":
            stack.append(instruction.argrepr)
        elif instruction.opname == "LOAD_ATTR" or instruction.opname == "LOAD_METHOD":
            if stack[-1]:
                stack[-1] = stack[-1] + "." + instruction.argrepr
        elif instruction.opname == "LOAD_GLOBAL":
            stack.append(instruction.argrepr)
        elif instruction.opname == "LOAD_FAST" or instruction.opname == "LOAD_CLOSURE" or instruction.opname == "LOAD_DEREF" or instruction.opname == "LOAD_CLASSDEREF":
            if inspect.iscode(instruction.argval):
                stack.append(None)
            else:
                stack.append(instruction.argrepr)
        elif instruction.opname == "CALL_FUNCTION":
            args = stack[-instruction.arg:]
            del stack[-instruction.arg:]
            if stack[-1] is not None:
                called.append(f'{stack[-1]}({", ".join(args)})')
                stack.pop()
        elif instruction.opname == "CALL_FUNCTION_KW":
            # TODO get the names of keyword arguments
            called.append(stack[-1 - instruction.arg])
            del stack[-1 - instruction.arg:]
            stack.append(None)
        elif instruction.opname == "CALL_FUNCTION_EX":
            # TODO get the arguments
            if instruction.arg & 0x01:
                stack.pop()
            stack.pop()
            called.append(stack.pop())
        elif instruction.opname == "CALL_METHOD":
            # TODO get the arguments
            called.append(stack[-2 - instruction.arg])
            del stack[-2 - instruction.arg:]
            stack.append(None)
        elif instruction.opname == "ROT_N":
            tos = stack.pop()
            stack.insert(1 - instruction.arg, tos)
        stack_effect = dis.stack_effect(instruction.opcode, instruction.arg)
        while old_stack_len + stack_effect < len(stack):
            stack.pop()
        while old_stack_len + stack_effect > len(stack):
            stack.append(None)
    return called


def get_frames(code):
    yield code
    for c in code.co_consts:
        if inspect.iscode(c):
            yield from get_frames(c)


def help_fun(a, b):
    return a * b


def test1(numbers):
    r1, r2 = np.multiply.reduce(numbers), sum(numbers)/len(numbers)
    r = math.sin(help_fun(r1, r2))
    return math.sqrt(r)


def test2(x):
    return [[math.sqrt(m) for m in list(n)] for n in x]

def test3(x,y):
    return [pow(i,j) for i,j in zip(x,y)]




def main():
    code = list(get_frames(test1.__code__))
    out = dict()
    for i, c in enumerate(code):
        instructions = dis.get_instructions(c)
        out[i] = iterate_stack(instructions)
    print(out)
main()

This gives the expected result for all three of your examples, under Python 3.10.8 (main, Nov 14 2022, 00:00:00) [GCC 12.2.1 20220819 (Red Hat 12.2.1-2)] on linux. It will not work on Python 3.11 because bytecode changes every version.

I would really recommend against using this code, because it will break all the time. You should probably just build a pylint checker, or at least use a library like astroid.

Sign up to request clarification or add additional context in comments.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.