Some of the other people's answers contain mistakes, so I've summarized them below. The perfect answer is below.
Prepare the dataset. The version of pandas uses 1.1.5
.
import numpy as np
import pandas as pd
import timeit
# check pandas version
print(pd.__version__)
# 1.1.5
# prepare DataFrame
df = pd.DataFrame({
'x': [0.120117, 0.117188, 0.119141, 0.116211, 0.119141],
'y': [0.987305, 0.984375, 0.987305, 0.984375, 0.983398],
'z': [0.116211, 0.122070, 0.119141, 0.120117, 0.118164]},
index=[
'2014-05-15 10:38',
'2014-05-15 10:39',
'2014-05-15 10:40',
'2014-05-15 10:41',
'2014-05-15 10:42'],
columns=['x', 'y', 'z'])
df.index.name = 'ts'
# x y z
# ts
# 2014-05-15 10:38 0.120117 0.987305 0.116211
# 2014-05-15 10:39 0.117188 0.984375 0.122070
# 2014-05-15 10:40 0.119141 0.987305 0.119141
# 2014-05-15 10:41 0.116211 0.984375 0.120117
# 2014-05-15 10:42 0.119141 0.983398 0.118164
Solution 01.
Returns pd.Series
in the apply function.
def myfunc1(args):
e = args[0] + 2*args[1]
f = args[1]*args[2] + 1
g = args[2] + args[0] * args[1]
return pd.Series([e, f, g])
df[['e', 'f', 'g']] = df.apply(myfunc1, axis=1)
# x y z e f g
# ts
# 2014-05-15 10:38 0.120117 0.987305 0.116211 2.094727 1.114736 0.234803
# 2014-05-15 10:39 0.117188 0.984375 0.122070 2.085938 1.120163 0.237427
# 2014-05-15 10:40 0.119141 0.987305 0.119141 2.093751 1.117629 0.236770
# 2014-05-15 10:41 0.116211 0.984375 0.120117 2.084961 1.118240 0.234512
# 2014-05-15 10:42 0.119141 0.983398 0.118164 2.085937 1.116202 0.235327
t1 = timeit.timeit(
'df.apply(myfunc1, axis=1)',
globals=dict(df=df, myfunc1=myfunc1), number=10000)
print(round(t1, 3), 'seconds')
# 14.571 seconds
Solution 02.
Use result_type ='expand'
when applying.
def myfunc2(args):
e = args[0] + 2*args[1]
f = args[1]*args[2] + 1
g = args[2] + args[0] * args[1]
return [e, f, g]
df[['e', 'f', 'g']] = df.apply(myfunc2, axis=1, result_type='expand')
# x y z e f g
# ts
# 2014-05-15 10:38 0.120117 0.987305 0.116211 2.094727 1.114736 0.234803
# 2014-05-15 10:39 0.117188 0.984375 0.122070 2.085938 1.120163 0.237427
# 2014-05-15 10:40 0.119141 0.987305 0.119141 2.093751 1.117629 0.236770
# 2014-05-15 10:41 0.116211 0.984375 0.120117 2.084961 1.118240 0.234512
# 2014-05-15 10:42 0.119141 0.983398 0.118164 2.085937 1.116202 0.235327
t2 = timeit.timeit(
"df.apply(myfunc2, axis=1, result_type='expand')",
globals=dict(df=df, myfunc2=myfunc2), number=10000)
print(round(t2, 3), 'seconds')
# 9.907 seconds
Solution 03.
If you want to make it faster, use np.vectorize
. Note that args cannot be a single argument when using np.vectorize
.
def myfunc3(args0, args1, args2):
e = args0 + 2*args1
f = args1*args2 + 1
g = args2 + args0 * args1
return [e, f, g]
df[['e', 'f', 'g']] = pd.DataFrame(np.row_stack(np.vectorize(myfunc3, otypes=['O'])(df['x'], df['y'], df['z'])), index=df.index)
# x y z e f g
# ts
# 2014-05-15 10:38 0.120117 0.987305 0.116211 2.094727 1.114736 0.234803
# 2014-05-15 10:39 0.117188 0.984375 0.122070 2.085938 1.120163 0.237427
# 2014-05-15 10:40 0.119141 0.987305 0.119141 2.093751 1.117629 0.236770
# 2014-05-15 10:41 0.116211 0.984375 0.120117 2.084961 1.118240 0.234512
# 2014-05-15 10:42 0.119141 0.983398 0.118164 2.085937 1.116202 0.235327
t3 = timeit.timeit(
"pd.DataFrame(np.row_stack(np.vectorize(myfunc3, otypes=['O'])(df['x'], df['y'], df['z'])), index=df.index)",
globals=dict(pd=pd, np=np, df=df, myfunc3=myfunc3), number=10000)
print(round(t3, 3), 'seconds')
# 1.598 seconds