I've read that functools.partial(fn, *args, **kw) saves overhead compared to lambda: fn(*args, **kw). But is it also advantageous over stmt + globals?
timeit.Timer(partial(fn, *args, **kw)).repeat(200, 5)
vs
timeit.Timer(stmt='fn(*args, **kw)',
globals={'fn': fn, 'args': args, 'kw': kw})
or even, though docs suggest they're interchangeable,
timeit.Timer(stmt='fn(*args, **kw)',
globals={'args': args, 'kw': kw},
setup='from __main__ import fn')
I've not found much material on this. For what order of duration of fn (e.g. milliseconds) does it make a non-negligible difference? Are they all interchangeable past a certain order, including for parallel (not Python-multiprocessing, just multi-core math by e.g. numpy) and (optional question) GPU benchmarking? Besides using the command line, is any one generally preferred?
A use case
For reference: a costly def setup, then we bench inside def main over several configurations. It doesn't concern "micro-benchmarking", e.g. x += 1, but stuff that takes on order of milliseconds.
Here's a dummy self-contained example (try live); answers are free to invoke other cases. I've since edited it to put it on order of us (by changing x's size) - here's for partial vs "long form":
- Very fast CPU:
1.61usvs1.67us(i7-13700HX) - Very slow CPU:
3.93usvs3.92us(replit's)
# -*- coding: utf-8 -*-
import numpy as np
from timeit import Timer
from functools import partial
USE_PARTIAL = 1
#%% Timer, setup, target function --------------------------------------------
def setup():
k_bool = False
objs_all = []
for j_bool in (False, True):
x = [[[np.random.randn(1, 1) for _ in range(1)] for _ in range(1)]
for _ in range(1)]
fkw = dict(j_bool=j_bool)
objs_all.append((x, fkw))
return objs_all, k_bool
def my_func(x, i_bool, j_bool=False, k_bool=True):
# bools, floops, appends
x_notcopy = []
for i in range(len(x)):
if i + 1 % 2:
i_bool = not i_bool
x_notcopy.append([])
for j in range(len(x[0])):
if j + 1 % 2:
j_bool = not j_bool
x_notcopy[-1].append([])
for k in range(len(x[0][0])):
if k + 1 % 2:
k_bool = not k_bool
x_notcopy[-1][-1].extend(x[i][j][k])
# array ops
out = np.array(x_notcopy)
return out
#%% Bench funcs --------------------------------------------------------------
def main(objs_all, k_bool):
total = 0
n_iters = 100
n_repeats = 10000
for objs in objs_all:
x, fkw = objs
for i_bool in (False, True):
for negate_k_bool in (False, True):
if negate_k_bool:
k_bool = not k_bool
if USE_PARTIAL:
fn_partial = partial(my_func,
x, i_bool, k_bool=k_bool, **fkw)
total += min(Timer(fn_partial).repeat(n_repeats, n_iters)
) / n_iters
else:
total += min(Timer(
'my_func(x, i_bool, k_bool=k_bool, **fkw)',
globals={'x': x, 'i_bool': i_bool, 'k_bool': k_bool,
'fkw': fkw},
setup='from __main__ import my_func'
).repeat(n_repeats, n_iters)) / n_iters
print(total / 8) # 8 is total number of loops
#%% Execute ------------------------------------------------------------------
args = setup()
main(*args)