Here's some timeit code that compares the speeds of 3 different ways of converting a 2D Python list of small integers to a 1D Numpy array.
The straight-forward way is to simply pass the whole list to Numpy's array function, and then flatten the resulting 2D array to 1D via its .flatten method. That's the approach taken by grid_to_array.
The other ways that I tested both use the numpy.append function. grid_rows_to_array builds the array row by row, grid_items_to_array builds it item by item. As you might guess, this last approach is very slow. Even for a 10x10 list it's around 50 times slower than grid_to_array. For larger lists, it's speed is truly glacial.
#!/usr/bin/env python3
''' Compare the speeds of various functions that convert a
2D integer list to a 1D Numpy array.
See https://stackoverflow.com/q/44512661/4014959
Written by PM 2Ring 2017.06.13
'''
import numpy as np
from timeit import Timer
def make_grid(n):
''' Make a 2D list of integers '''
return [list(range(i, i + n)) for i in range(0, n * n, n)]
# The functions to test
def grid_to_array(g):
''' Create a 2D array from the whole grid and convert it to 1D '''
return np.array(g).flatten()
def grid_rows_to_array(g):
''' Create a 1D array from the 1st row of the grid,
then append all the other rows to it, row by row
'''
# An iterator that yields the rows
it = iter(g)
a = np.array(next(it))
for row in it:
a = np.append(a, row)
return a
def grid_items_to_array(g):
''' Create an array from the 1st item of the grid,
then append all the other items to it, item by item
'''
# A generator that yields the items
gen = (u for row in g for u in row)
a = np.array(next(gen))
for u in gen:
a = np.append(a, u)
return a
funcs = (
grid_to_array,
grid_rows_to_array,
grid_items_to_array,
)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
def verify(size):
''' Verify that all the functions give the same result '''
print('Grid rows, size=', size)
grid = make_grid(size)
for row in grid:
print(row)
print()
results = []
for func in funcs:
print(func.__name__)
a = func(grid)
print(a, '\n')
results.append(a)
# Test that all arrays are identical
first, *results = results
rc = all((first == u).all() for u in results)
print('all ok' if rc else 'Error!')
return rc
def time_test(loops, reps):
''' Print timing stats for all the functions '''
timings = []
for func in funcs:
fname = func.__name__
setup = 'from __main__ import grid, ' + fname
cmd = fname + '(grid)'
t = Timer(cmd, setup)
result = t.repeat(reps, loops)
result.sort()
timings.append((result, fname))
timings.sort()
for result, fname in timings:
print('{:20} {}'.format(fname, result))
verify(5)
# Do the timing tests
reps = 3
loops = 128
for i in range(6):
size = 10 * (2 ** i)
grid = make_grid(size)
print('\n{0}: Size={1}, Loops={2}'.format(i, size, loops))
time_test(loops, reps)
loops >>= 1
output
Grid rows, size= 5
[0, 1, 2, 3, 4]
[5, 6, 7, 8, 9]
[10, 11, 12, 13, 14]
[15, 16, 17, 18, 19]
[20, 21, 22, 23, 24]
grid_to_array
[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24]
grid_rows_to_array
[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24]
grid_items_to_array
[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24]
all ok
0: Size=10, Loops=128
grid_to_array [0.007200194995675702, 0.01232630800222978, 0.014947812996979337]
grid_rows_to_array [0.047056858995347284, 0.04709799499687506, 0.047196302002703305]
grid_items_to_array [0.4025771490050829, 0.40894141500029946, 0.4112219040034688]
1: Size=20, Loops=64
grid_to_array [0.009614694004994817, 0.01012843299395172, 0.015028686997538898]
grid_rows_to_array [0.0559136279989616, 0.05666288200154668, 0.08348615399881965]
grid_items_to_array [0.8405412119973334, 0.8444936599989887, 0.8777696510005626]
2: Size=40, Loops=32
grid_to_array [0.018018493996351026, 0.019878495004377328, 0.023297555999306496]
grid_rows_to_array [0.06234459100232925, 0.06246465100412024, 0.06281185599800665]
grid_items_to_array [1.7235709279993898, 1.7587534330014023, 1.8295516790021793]
3: Size=80, Loops=16
grid_to_array [0.029057150000880938, 0.03226659999927506, 0.03832875600346597]
grid_rows_to_array [0.08397659000183921, 0.08470724899962079, 0.0852264919958543]
grid_items_to_array [3.7167786709978827, 3.8199421919998713, 4.046950017000199]
4: Size=160, Loops=8
grid_to_array [0.06241827599296812, 0.06254585299757309, 0.07094153799698688]
grid_rows_to_array [0.14922553599899402, 0.1522045050005545, 0.15509943600045517]
grid_items_to_array [11.045316871000978, 11.064624926999386, 12.66077643600147]
5: Size=320, Loops=4
grid_to_array [0.13331966300029308, 0.1439433339983225, 0.15014286800578702]
grid_rows_to_array [0.6806031250016531, 0.685745176000637, 0.7797461770023801]
grid_items_to_array [162.62937470299948, 162.73040329200012, 167.80105410800024]
These timings were obtained using Python 3.6.0 on a rather ancient 2GHz single core 32 bit machine with 2GB of RAM, running a Debian derivative of Linux. YMMV.
listinstead of an array. In fact, you cannot append to anumpyarray in-place. You can make a whole new array, though. The major limitation of anumpyarray is that it is fixed-size.cis not an array, it is a list. What array loop? If you want to make anumpy.ndarrayout of listcthen you only need to doa = np.array(c, np.int16)