Code: Select all
df
time bid ask time_msc flags wdayLab wday rowid
1: 2025-01-02 04:00:00 21036.48 21043.08 2025-01-02 04:00:00.888 134 Thu 5 1
2: 2025-01-02 04:00:00 21037.54 21043.27 2025-01-02 04:00:00.888 134 Thu 5 2
3: 2025-01-02 04:00:00 21036.52 21042.55 2025-01-02 04:00:00.888 134 Thu 5 3
4: 2025-01-02 04:00:00 21036.82 21041.75 2025-01-02 04:00:00.888 134 Thu 5 4
5: 2025-01-02 04:00:00 21036.79 21040.78 2025-01-02 04:00:00.891 134 Thu 5 5
6: 2025-01-02 04:00:00 21035.86 21039.95 2025-01-02 04:00:00.891 134 Thu 5 6
7: 2025-01-02 04:00:00 21036.05 21038.76 2025-01-02 04:00:00.891 134 Thu 5 7
8: 2025-01-02 04:00:00 21034.74 21038.33 2025-01-02 04:00:00.891 134 Thu 5 8
9: 2025-01-02 04:00:00 21034.72 21039.35 2025-01-02 04:00:00.892 134 Thu 5 9
10: 2025-01-02 04:00:00 21034.99 21038.08 2025-01-02 04:00:00.892 134 Thu 5 10
R -Daten
Code: Select all
library(data.table)
setDTthreads(detectCores() - 2) # no effect
df_joined = ask),
mult = "last", # Take the closest (most recent) match
# by = .EACHI, # Do it row-by-row
nomatch = NA, # Allow NA if no such row exists
#.(i.rowid, last_higher_row = x.rowid, last_higher = x.time, lastHigh = x.ask)
][, difference_from_previous_higher := ask_prevHi - ask]
Code: Select all
rowid ask time_msc rowid_prevHi ask_prevHi time bid i.time_msc flags
1: 1 21043.08 NA NA 2025-01-02 04:00:00 21036.48 2025-01-02 04:00:00.888 134
2: 2 21043.27 NA NA 2025-01-02 04:00:00 21037.54 2025-01-02 04:00:00.888 134
3: 3 21042.55 2025-01-02 04:00:00.888 2 21043.27 2025-01-02 04:00:00 21036.52 2025-01-02 04:00:00.888 134
4: 4 21041.75 2025-01-02 04:00:00.888 3 21042.55 2025-01-02 04:00:00 21036.82 2025-01-02 04:00:00.888 134
5: 5 21040.78 2025-01-02 04:00:00.888 4 21041.75 2025-01-02 04:00:00 21036.79 2025-01-02 04:00:00.891 134
6: 6 21039.95 2025-01-02 04:00:00.891 5 21040.78 2025-01-02 04:00:00 21035.86 2025-01-02 04:00:00.891 134
7: 7 21038.76 2025-01-02 04:00:00.891 6 21039.95 2025-01-02 04:00:00 21036.05 2025-01-02 04:00:00.891 134
8: 8 21038.33 2025-01-02 04:00:00.891 7 21038.76 2025-01-02 04:00:00 21034.74 2025-01-02 04:00:00.891 134
9: 9 21039.35 2025-01-02 04:00:00.891 6 21039.95 2025-01-02 04:00:00 21034.72 2025-01-02 04:00:00.892 134
10: 10 21038.08 2025-01-02 04:00:00.892 9 21039.35 2025-01-02 04:00:00 21034.99 2025-01-02 04:00:00.892 134
wdayLab wday difference_from_previous_higher
1: Thu 5 NA
2: Thu 5 NA
3: Thu 5 0.72
4: Thu 5 0.80
5: Thu 5 0.97
6: Thu 5 0.83
7: Thu 5 1.19
8: Thu 5 0.43
9: Thu 5 0.60
10: Thu 5 1.27
Ich habe eine Polars Implementierung in Python ausprobiert, aber obwohl Join_asof multiproziert ist, schnell und unterstützt die Backward -Strategie, die andere Ungleichheiten beim Verbinden nicht unterstützt.
Code: Select all
joined = df.join_asof(
df.select(['rowid', 'time_msc', 'ask']).with_columns([
pl.col('time_msc').alias('time_prevhi')
]),
on="time_msc",
strategy="backward",
suffix="_prevhi",
allow_exact_matches=False
).with_columns([
(pl.col('rowid')-pl.col('rowid_prevhi')).alias('ticksdiff_prevhi'),
(pl.col('ask')-pl.col('ask_prevhi')).alias('askdiff_prevhi'),
])
Code: Select all
shape: (10, 13)
┌─────┬─────┬─────┬───────┬────────┬──────┬───────┬────────┬───────┬───────┬───────┬───────┬───────┐
│ bid ┆ ask ┆ tim ┆ flags ┆ wdayLa ┆ wday ┆ rowid ┆ time ┆ rowid ┆ ask_p ┆ time_ ┆ ticks ┆ askdi │
│ --- ┆ --- ┆ e_m ┆ --- ┆ b ┆ --- ┆ --- ┆ --- ┆ _prev ┆ revhi ┆ prevh ┆ diff_ ┆ ff_pr │
│ f64 ┆ f64 ┆ sc ┆ i64 ┆ --- ┆ i64 ┆ i64 ┆ dateti ┆ hi ┆ --- ┆ i ┆ prevh ┆ evhi │
│ ┆ ┆ --- ┆ ┆ str ┆ ┆ ┆ me[μs] ┆ --- ┆ f64 ┆ --- ┆ i ┆ --- │
│ ┆ ┆ dat ┆ ┆ ┆ ┆ ┆ ┆ i64 ┆ ┆ datet ┆ --- ┆ f64 │
│ ┆ ┆ eti ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ime[m ┆ i64 ┆ │
│ ┆ ┆ me[ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ s] ┆ ┆ │
│ ┆ ┆ ms] ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
╞═════╪═════╪═════╪═══════╪════════╪══════╪═══════╪════════╪═══════╪═══════╪═══════╪═══════╪═══════╡
│ 210 ┆ 210 ┆ 202 ┆ 134 ┆ Thu ┆ 5 ┆ 1 ┆ 2025-0 ┆ null ┆ null ┆ null ┆ null ┆ null │
│ 36. ┆ 43. ┆ 5-0 ┆ ┆ ┆ ┆ ┆ 1-02 ┆ ┆ ┆ ┆ ┆ │
│ 48 ┆ 08 ┆ 1-0 ┆ ┆ ┆ ┆ ┆ 00:00: ┆ ┆ ┆ ┆ ┆ │
│ ┆ ┆ 2 ┆ ┆ ┆ ┆ ┆ 00 ┆ ┆ ┆ ┆ ┆ │
│ ┆ ┆ 00: ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
│ ┆ ┆ 00: ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
│ ┆ ┆ 00. ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
│ ┆ ┆ 888 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
│ 210 ┆ 210 ┆ 202 ┆ 134 ┆ Thu ┆ 5 ┆ 2 ┆ 2025-0 ┆ 1 ┆ 21043 ┆ 2025- ┆ 1 ┆ 0.19 │
│ 37. ┆ 43. ┆ 5-0 ┆ ┆ ┆ ┆ ┆ 1-02 ┆ ┆ .08 ┆ 01-02 ┆ ┆ │
│ 54 ┆ 27 ┆ 1-0 ┆ ┆ ┆ ┆ ┆ 00:00: ┆ ┆ ┆ 00:00 ┆ ┆ │
│ ┆ ┆ 2 ┆ ┆ ┆ ┆ ┆ 00 ┆ ┆ ┆ :00.8 ┆ ┆ │
│ ┆ ┆ 00: ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 88 ┆ ┆ │
│ ┆ ┆ 00: ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
│ ┆ ┆ 00. ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
│ ┆ ┆ 889 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
│ 210 ┆ 210 ┆ 202 ┆ 134 ┆ Thu ┆ 5 ┆ 3 ┆ 2025-0 ┆ 1 ┆ 21043 ┆ 2025- ┆ 2 ┆ -0.53 │
│ 36. ┆ 42. ┆ 5-0 ┆ ┆ ┆ ┆ ┆ 1-02 ┆ ┆ .08 ┆ 01-02 ┆ ┆ │
│ 52 ┆ 55 ┆ 1-0 ┆ ┆ ┆ ┆ ┆ 00:00: ┆ ┆ ┆ 00:00 ┆ ┆ │
│ ┆ ┆ 2 ┆ ┆ ┆ ┆ ┆ 00 ┆ ┆ ┆ :00.8 ┆ ┆ │
│ ┆ ┆ 00: ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 88 ┆ ┆ │
Code: Select all
jw = df.join_where( df.select(['rowid', 'time_msc', 'ask']), pl.col("rowid") > pl.col("rowid_prevhi"), pl.col("ask") > pl.col("ask_prevhi"), suffix="_prevhi",)