Code: Select all
import pandas as pd
import numpy as np
import time
df = pd.DataFrame(
data = np.random.randint(0, 100, size=(1000, 5)).astype(float),
columns = [f'col{i}' for i in range(1, 6)]
)
def apply_row(row):
row['col1'] = row['col1'] * 2 + 3
row['col2'] = row['col2'] + 5
row['col3'] = row['col3'] - 1
row['col4'] = row['col4'] / 2
row['col5'] = row['col5'] ** 2
return row
start = time.time()
df.apply(apply_row, axis=1)
end = time.time()
print(f"Row-based time: {end - start} seconds")
start = time.time()
df['col1'] = df['col1'].apply(lambda x: x* 2 + 3)
df['col2'] = df['col2'].apply(lambda x: x + 5)
df['col3'] = df['col3'].apply(lambda x: x - 1)
df['col4'] = df['col4'].apply(lambda x: x / 2)
df['col5'] = df['col5'].apply(lambda x: x ** 2)
end = time.time()
print(f"Column-based time: {end - start} seconds")