by Anonymous » 17 Aug 2025, 02:59
Wenn ich eine horizontale kumulative Summe verwende, wird eine "wörtliche" Spalte gebildet, die im Schema bleibt, auch wenn es fallen gelassen wird.import polars as pl
def test_literal_bug():
print("Polars version:", pl.__version__)
# Create simple test data
df = pl.DataFrame({
"A": [1, 2, 3],
"T0": [0.1, 0.2, 0.3],
"T1": [0.4, 0.5, 0.6],
"T2": [0.7, 0.8, 0.9],
})
time_cols = ["T0", "T1", "T2"]
print("Original columns:", df.columns)
print("Time columns:", time_cols)
lazy_df = df.lazy()
print("Schema before cumsum:", lazy_df.collect_schema().names())
result = (
lazy_df.select(pl.cum_sum_horizontal(time_cols))
.unnest("cum_sum")
.rename({col: f"C{col}" for col in time_cols})
)
print("Schema after cumsum:", result.collect_schema().names())
# This will fail with: ColumnNotFoundError: "literal" not found
try:
collected = result.collect()
print("v1: No bug reproduced")
except pl.exceptions.ColumnNotFoundError as e:
print(f"v1: BUG REPRODUCED: {e}")
result_2 = result.drop("literal")
result_2 = pl.concat([pl.LazyFrame({"B": [1, 2, 3]}), result_2], how="horizontal")
print("Schema after drop and concat:", result_2.collect_schema().names())
try:
collected_2 = result_2.collect()
print("v2: No bug reproduced")
except pl.exceptions.ColumnNotFoundError as e:
print(f"v2: BUG REPRODUCED: {e}")
if __name__ == "__main__":
test_literal_bug()
< /code>
Ausgabe: < /p>
Polars version: 1.31.0
Original columns: ['A', 'T0', 'T1', 'T2']
Time columns: ['T0', 'T1', 'T2']
Schema before cumsum: ['A', 'T0', 'T1', 'T2']
Schema after cumsum: ['CT0', 'CT1', 'CT2', 'literal']
v1: BUG REPRODUCED: "literal" not found
Schema after drop and concat: ['B', 'CT0', 'CT1', 'CT2']
v2: BUG REPRODUCED: "literal" not found
< /code>
Was ist los? Mache ich etwas falsch oder ist es ein Fehler?
Wenn ich eine horizontale kumulative Summe verwende, wird eine "wörtliche" Spalte gebildet, die im Schema bleibt, auch wenn es fallen gelassen wird.import polars as pl
def test_literal_bug():
print("Polars version:", pl.__version__)
# Create simple test data
df = pl.DataFrame({
"A": [1, 2, 3],
"T0": [0.1, 0.2, 0.3],
"T1": [0.4, 0.5, 0.6],
"T2": [0.7, 0.8, 0.9],
})
time_cols = ["T0", "T1", "T2"]
print("Original columns:", df.columns)
print("Time columns:", time_cols)
lazy_df = df.lazy()
print("Schema before cumsum:", lazy_df.collect_schema().names())
result = (
lazy_df.select(pl.cum_sum_horizontal(time_cols))
.unnest("cum_sum")
.rename({col: f"C{col}" for col in time_cols})
)
print("Schema after cumsum:", result.collect_schema().names())
# This will fail with: ColumnNotFoundError: "literal" not found
try:
collected = result.collect()
print("v1: No bug reproduced")
except pl.exceptions.ColumnNotFoundError as e:
print(f"v1: BUG REPRODUCED: {e}")
result_2 = result.drop("literal")
result_2 = pl.concat([pl.LazyFrame({"B": [1, 2, 3]}), result_2], how="horizontal")
print("Schema after drop and concat:", result_2.collect_schema().names())
try:
collected_2 = result_2.collect()
print("v2: No bug reproduced")
except pl.exceptions.ColumnNotFoundError as e:
print(f"v2: BUG REPRODUCED: {e}")
if __name__ == "__main__":
test_literal_bug()
< /code>
Ausgabe: < /p>
Polars version: 1.31.0
Original columns: ['A', 'T0', 'T1', 'T2']
Time columns: ['T0', 'T1', 'T2']
Schema before cumsum: ['A', 'T0', 'T1', 'T2']
Schema after cumsum: ['CT0', 'CT1', 'CT2', 'literal']
v1: BUG REPRODUCED: "literal" not found
Schema after drop and concat: ['B', 'CT0', 'CT1', 'CT2']
v2: BUG REPRODUCED: "literal" not found
< /code>
Was ist los? Mache ich etwas falsch oder ist es ein Fehler?