Hier ist eine vereinfachte Version meines Code:
Code: Select all
import psutil
import time
import gc
from datetime import datetime
import pyarrow.parquet as pq
import pyarrow.compute as pc
import pyarrow as pa
def print_memory_usage():
process = psutil.Process()
mem_info = process.memory_info()
print(f"Memory Usage: {mem_info.rss / 1024 / 1024:.2f} MB")
def mem_and_time(func):
def wrapper(*args, **kwargs):
start_time = time.time()
result = func(*args, **kwargs)
end_time = time.time()
print(f"Execution Time: {end_time - start_time:.6f} seconds")
print_memory_usage()
return result
return wrapper
@mem_and_time
def test_func_pyarrow():
# Read Parquet file into a PyArrow Table
# Memory usage increasing also happens when I call
# pd.read_parquet or pl.read_parquet
table = pq.read_table("/Users/test.parquet")
del table
gc.collect()
pa.jemalloc_set_decay_ms(0)
pool = pa.default_memory_pool()
pool.release_unused()
return None
if __name__ == "__main__":
# Run the function repeatedly
for _ in range(1000):
test_func_pyarrow()
time.sleep(20)
< /code>
Bei Ausführen dieses Skripts zeigt die Speicherverbrauchsausgabe eine stetige Erhöhung: < /p>
Execution Time: 0.985749 seconds
Memory Usage: 4542.09 MB
Execution Time: 0.873830 seconds
Memory Usage: 5926.19 MB
...
Execution Time: 0.774829 seconds
Memory Usage: 7985.73 MB
< /code>
Wie kann ich dieses [url=viewtopic.php?t=11587]Problem[/url] behandeln? Es geschieht auch, wenn ich pd.read_parquet