Code: Select all
import multiprocessing as mp
import numpy as np
import tqdm
class CustomClass:
def __init(self, data):
## assigns multiple huge vectors and objects
class ParallelComputation:
...
def compute(n_jobs=-1):
custom_obj = CustomCLass(data) # ~1GB of memory
cfg = dict(some_config=True)
inputs = [(1, np.random.rand(1, 300)), (1, np.random.rand(1, 300)), ...] # array of tuples
worker = worker_task
with mp.Pool(processes=n_jobs, initializer=_set_globals, initargs=(custom_obj, cfg)) as pool:
for result in tqdm(pool.imap(worker, inputs, chunksize=8), total=n_rows):
i, row = result
out[i, :] = row
...
def worker_task(args):
lib = _G_LIB
cfg = _G_CFG
# some computation
def _set_globals(custom_obj, cfg):
global _G_LIB, _G_CFG
_G_LIB = custom_obj
_G_CFG = cfg
Code: Select all
if __name__ == "__main__":
parallel = ParallelComputation(...)
parallel.compute()
Wie kann ich Multiprocessing mit einem großen CustomClass-Objekt unter Windows effizient nutzen?
Mobile version