Im wirklichen Leben führe ich die folgenden Operationen möglicherweise mehrmals aus, für mehr Funktionen, mehr Symbole und größere Körbe.
import polars as pl
from polars import col
from vega_datasets import data
df = pl.from_pandas(data.stocks())
list_symbols = df.select(col('symbol').unique()).to_series().to_list()
"""
[
"IBM",
"MSFT",
"AAPL",
"AMZN",
"GOOG"
]
"""
basket = pl.DataFrame(
{
"MSFT": ["AMZN", "GOOG"],
"AMZN": ["MSFT", "GOOG"],
"GOOG": ["AAPL", "IBM"],
"IBM": ["AMZN", "AAPL"],
"AAPL": ["AMZN", "IBM"],
}
).transpose(
include_header=True,
header_name="symbol",
column_names=["symbol_1", "symbol_2"],
)
- Aufeinanderfolgende Verknüpfungen
df
.join(basket, on='symbol', how='left')
# I've put an iterative function to do the successive joins when needed, assume more than 2 joins in real life
.join(df.select('date','symbol', col('price').name.suffix('_1')),
left_on = ['date', 'symbol_1'],
right_on = ['date', 'symbol'],
how='left',
)
.join(df.select('date','symbol', col('price').name.suffix('_2')),
left_on = ['date', 'symbol_2'],
right_on = ['date', 'symbol'],
how='left',
)
)
- Entpivotieren, verbinden, verschachtelte Spalte (zusammenfügen + wann)
# I normally use the lazyframe pivot implementation to wrok with lazyframe
df.pivot(index='date', on='symbol', values='price')
)
(
df.join(basket, on="symbol", how="left")
# alternative to the successive joins
.join(
df_pivot.select('date', pl.exclude('date').name.suffix('_price_to_drop')),
on="date",
how="left",
)
.with_columns(
*[
pl.coalesce(
pl.when(col(f'symbol_{i}')==symbol)
.then(col(f'{symbol}_price_to_drop'))
for symbol in list_symbols
).alias(f'price_{i}')
for i in [1,2]
]
)
.select(pl.exclude("^.*to_drop$"))
)
Beachten Sie, dass ich normalerweise mit Lazyframes arbeite. Dies ist im obigen Beispiel nicht der Fall.