Wie stoppe ich Pandas.Aggregate (Engine = 'numba') Downcasting Uint64 bis int64?

Anonymous · Post by **Anonymous** » 07 Sep 2025, 09:52

Gibt es eine Möglichkeit, die Daten von uint64 bis int64 zu verhindern? < /p>

NumbaTypeSafetyWarning

fordert einen Datentyp-Überlauf korrekt voraus.

Code: Select all

C:\apps\mapFolding\.venv\Lib\site-packages\pandas\core\_numba\executor.py:82:
NumbaTypeSafetyWarning: unsafe cast from uint64 to int64. Precision may be lost.
na_positions[i] = np.array(na_pos)
< /code>
 Relevanter Code < /h1>
dictionaryCurveLocations: dict[int, int]

datatypeCurveLocations = numpy.uint64
datatypeDistinctCrossings = numpy.uint64

dataframeAnalyzed = pandas.DataFrame({
'analyzed': pandas.Series(name='analyzed'
, data=list(dictionaryCurveLocations.keys())
, dtype=datatypeCurveLocations)
, 'distinctCrossings': pandas.Series(name='distinctCrossings'
, data=list(dictionaryCurveLocations.values())
, dtype=datatypeDistinctCrossings)
}, dtype=datatypeCurveLocations
)

dataframeAnalyzed = dataframeAnalyzed.groupby('analyzed', sort=False)['distinctCrossings'
].aggregate('sum'
, engine='numba'
, engine_kwargs={'nogil': True, 'nopython': True, 'parallel': True}
).reset_index()
< /code>
 Umgebung < /h1>
(.venv) C:\apps\mapFolding>py -VV
Python 3.13.7 (tags/v3.13.7:bcee1c3, Aug 14 2025, 14:15:11) [MSC v.1944 64 bit (AMD64)]

(.venv) C:\apps\mapFolding>pip show numba
Name: numba
Version: 0.61.2

(.venv) C:\apps\mapFolding>pip show pandas
Name: pandas
Version: 2.3.2

(.venv) C:\apps\mapFolding>ver

Microsoft Windows [Version 10.0.26100.5074]
< /code>
 Das vollständige Modul < /h1>
Wenn Sie das Paket klonen, müssen Sie die Arbeitslinie durch die obere Funktionsweise ersetzen."""Count meanders with matrix transfer algorithm."""
from functools import cache
from gc import collect as goByeBye
import numpy
import pandas
import sys

# ----------------- [url=viewtopic.php?t=25360]environment[/url] configuration -------------------------------------------------------------------------
_bitWidthOfFixedSizeInteger: int = 64

_bitWidthOffsetCurveLocationsNecessary: int = 3 # `curveLocations` analysis may need 3 extra bits. For example, `groupZulu  numpy.uint64:
return numpy.uint64(intWithExtra_0b1 ^ walkDyckPath(int(intWithExtra_0b1)))

flipTheExtra_0b1AsUfunc = numpy.frompyfunc(_flipTheExtra_0b1, 1, 1)

def getLocatorGroupAlpha(bitWidth: int) -> int:
"""Compute an odd-parity bit-mask with `bitWidth` bits.

Notes
-----
In binary, `locatorGroupAlpha` has alternating 0s and 1s and ends with a 1, such as '101', '0101', and '10101'. The last
digit is in the 1's column, but programmers usually call it the "least significant bit" (LSB).  If we count the columns
from the right, the 1's column is column 1, the 2's column is column 2, the 4's column is column 3, and so on. When
counting this way, `locatorGroupAlpha` has 1s in the columns with odd index numbers. Mathematicians and programmers,
therefore, tend to call `locatorGroupAlpha` something like the "odd bit-mask", the "odd-parity numbers", or simply "odd
mask" or "odd numbers". In addition to "odd" being inherently ambiguous in this context, this algorithm also segregates
odd numbers from even numbers, so I avoid using "odd" and "even" in the names of these bit-masks.

"""
return sum(1  int:
"""Compute an even-parity bit-mask with `bitWidth` bits."""
return sum(1  int:
"""Compute the maximum value of `curveLocations` for the current iteration of the transfer matrix."""
return 1  dict[tuple[int, int], int]:
"""Outfit `dictionaryCurveGroups` so it may manage the computations for one iteration of the transfer matrix.

`dictionaryCurveGroups` holds the input data, and `dictionaryCurveLocations` aggregates the output data as it is computed.

Parameters
----------
dictionaryCurveLocations : dict[int, int]
A dictionary of `curveLocations` to `distinctCrossings`.

Returns
-------
dictionaryCurveGroups : dict[tuple[int, int], int]
A dictionary of `(groupAlpha, groupZulu)` to `distinctCrossings`.
"""
bitWidth: int = max(dictionaryCurveLocations.keys()).bit_length()
locatorGroupAlpha: int = getLocatorGroupAlpha(bitWidth)
locatorGroupZulu: int = getLocatorGroupZulu(bitWidth)
return {(curveLocations & locatorGroupAlpha, (curveLocations & locatorGroupZulu) >> 1): distinctCrossings
for curveLocations, distinctCrossings in dictionaryCurveLocations.items()}

@cache
def walkDyckPath(intWithExtra_0b1: int) -> int:
"""Find the bit position for flipping paired curve endpoints in meander transfer matrices.

Parameters
----------
intWithExtra_0b1 : int
Binary representation of curve locations with an extra bit encoding parity information.

Returns
-------
flipExtra_0b1_Here : int
Bit mask indicating the position where the balance condition fails, formatted as 2^(2k).

3L33T H@X0R
------------
Binary search for first negative balance in shifted bit pairs. Returns 2^(2k) mask for
bit position k where cumulative balance counter transitions from non-negative to negative.

Mathematics
-----------
Implements the Dyck path balance verification algorithm from Jensen's transfer matrix
enumeration.  Computes the position where ∑(i=0 to k) (-1)^b_i < 0 for the first time,
where b_i are the bits of the input at positions 2i.

"""
findTheExtra_0b1: int = 0
flipExtra_0b1_Here: int = 1
while True:
flipExtra_0b1_Here  0
and ((max(dictionaryCurveLocations.keys()).bit_length() > bitWidthCurveLocationsMaximum)
or (max(dictionaryCurveLocations.values()).bit_length() > bitWidthDistinctCrossingsMaximum))):

indexTransferMatrix -= 1

MAXIMUMcurveLocations: int = getMAXIMUMcurveLocations(indexTransferMatrix)
dictionaryCurveGroups = outfitDictionaryCurveGroups(dictionaryCurveLocations)
dictionaryCurveLocations = {}
# TODO is `dictionaryCurveLocations.clear()` better for garbage collection?

for (groupAlpha, groupZulu), distinctCrossings in dictionaryCurveGroups.items():
groupAlphaCurves: bool = groupAlpha > 1
groupZuluCurves: bool = groupZulu > 1
groupAlphaIsEven = groupZuluIsEven = 0

# simple
curveLocationAnalysis = ((groupAlpha | (groupZulu  2) | (groupZulu  1) | (groupAlpha > 2) > 2)
if curveLocationAnalysis < MAXIMUMcurveLocations:
dictionaryCurveLocations[curveLocationAnalysis] = dictionaryCurveLocations.get(curveLocationAnalysis, 0) + distinctCrossings

return (indexTransferMatrix, dictionaryCurveLocations)

def countPandas(indexTransferMatrix: int, dictionaryCurveLocations: dict[int, int]) -> tuple[int, dict[int, int]]:
"""Count meanders with matrix transfer algorithm using pandas DataFrame.

indexTransferMatrix : int
The current index in the transfer matrix algorithm.
dictionaryCurveLocations : dict[int, int]
A dictionary of `curveLocations` to `distinctCrossings`.

Returns
-------
matrixMeandersState : tuple[int, dict[int, int]]
The state of the algorithm computation: the current `indexTransferMatrix`, `curveLocations`, and `distinctCrossings`.
"""
def aggregateCurveLocations(MAXIMUMcurveLocations: int) -> None:
nonlocal dataframeAnalyzed, dataframeCurveLocations

dataframeCurveLocations.loc[dataframeCurveLocations['analyzed'] >= MAXIMUMcurveLocations, 'analyzed'] = 0

dataframeAnalyzed = pandas.concat([dataframeAnalyzed
, dataframeCurveLocations.loc[(dataframeCurveLocations['analyzed'] > 0), ['analyzed', 'distinctCrossings']].reset_index(
).groupby(by='analyzed', as_index=False, sort=True, group_keys=False)['distinctCrossings'
].aggregate('sum').reset_index()
], ignore_index=True)

dataframeCurveLocations.loc[:, 'analyzed'] = 0

def analyzeCurveLocationsAligned(MAXIMUMcurveLocations: int) ->  None:
"""Compute `curveLocations` from `groupAlpha` and `groupZulu` if at least one is an even number.

Before computing `curveLocations`, some values of `groupAlpha` and `groupZulu` are modified.

Warning
-------
This function deletes rows from `dataframeCurveLocations`. Always run this analysis last.

Formula
-------
```python
if groupAlpha > 1 and groupZulu > 1 and (groupAlphaIsEven or groupZuluIsEven):
curveLocations = (groupAlpha >> 2) | ((groupZulu >> 2)  1 and groupZulu > 1`, its 'dropModify' value will be between -20 and -8. If a row passes that check but fails
`(groupAlphaIsEven or groupZuluIsEven)`, meaning both values are odd numbers, its 'dropModify' value will be 0. Rows with
`['dropModify']  1:
curveLocations = ((1 - (groupAlpha & 1))  2)
```

Parameters
----------
MAXIMUMcurveLocations : int
Maximum value of `curveLocations` for the current iteration of `bridges`.
"""
nonlocal dataframeCurveLocations
dataframeCurveLocations.loc[:, 'analyzed'] = dataframeCurveLocations['groupAlpha']
dataframeCurveLocations.loc[:, 'analyzed'] &= 1 # (groupAlpha & 1)
dataframeCurveLocations.loc[:, 'analyzed'] = 1 - dataframeCurveLocations['analyzed'] # (1 - (groupAlpha ...))

dataframeCurveLocations.loc[dataframeCurveLocations['analyzed'] == 1, 'dropModify'] += groupAlphaAtEven # groupAlphaIsEven

dataframeCurveLocations.loc[:, 'analyzed'] *= 2**1 # ((groupAlpha ...)  2)
dataframeCurveLocations.loc[:, 'analyzed'] |= dataframeCurveLocations['groupAlpha'] # ... | (groupAlpha)
dataframeCurveLocations.loc[:, 'analyzed'] //= 2**2 # (... >> 2)
dataframeCurveLocations.loc[dataframeCurveLocations['groupAlpha']  1

aggregateCurveLocations(MAXIMUMcurveLocations)
computeCurveGroups(alpha=False)

def analyzeCurveLocationsSimple(MAXIMUMcurveLocations: int) -> None:
"""Compute curveLocations with the 'simple' bridges formula.

Formula
-------
```python
curveLocations = ((groupAlpha | (groupZulu  1:
curveLocations = (1 - (groupZulu & 1)) | (groupAlpha > 1)
```

Parameters
----------
MAXIMUMcurveLocations : int
Maximum value of `curveLocations` for the current iteration of `bridges`.
"""
nonlocal dataframeCurveLocations
dataframeCurveLocations.loc[:, 'analyzed'] = dataframeCurveLocations['groupZulu']
dataframeCurveLocations.loc[:, 'analyzed'] &= 1 # (groupZulu & 1)
dataframeCurveLocations.loc[:, 'analyzed'] = 1 - dataframeCurveLocations['analyzed'] # (1 - (groupZulu ...))

dataframeCurveLocations.loc[(dataframeCurveLocations['analyzed'] == 1), 'dropModify'] += groupZuluAtEven # groupZuluIsEven

dataframeCurveLocations.loc[:, 'groupAlpha'] *= 2**2 # (groupAlpha > 1)
dataframeCurveLocations.loc[:, 'analyzed'] |= dataframeCurveLocations['groupZulu'] # ... | (groupZulu)
dataframeCurveLocations.loc[:, 'analyzed'] //= 2**1 # (...  >> 1)
dataframeCurveLocations.loc[dataframeCurveLocations['groupZulu']  1

aggregateCurveLocations(MAXIMUMcurveLocations)
computeCurveGroups(zulu=False)

def computeCurveGroups(*, alpha: bool = True, zulu: bool = True) -> None:
"""Compute `groupAlpha` and `groupZulu` with 'bit-masks' on `curveLocations`.

Parameters
----------
alpha : bool = True
Should column `groupAlpha` be computed?

zulu : bool = True
Should column `groupZulu` be computed?

3L33T H@X0R
-----------
- `groupAlpha`: odd-parity bit-masked `curveLocations`
- `groupZulu`: even-parity bit-masked `curveLocations`
"""
nonlocal dataframeCurveLocations
bitWidth: int = int(dataframeCurveLocations['curveLocations'].max()).bit_length()
if alpha:
dataframeCurveLocations['groupAlpha'] = dataframeCurveLocations['curveLocations']
dataframeCurveLocations.loc[:, 'groupAlpha'] &= getLocatorGroupAlpha(bitWidth)
if zulu:
dataframeCurveLocations['groupZulu'] = dataframeCurveLocations['curveLocations']
dataframeCurveLocations.loc[:, 'groupZulu'] &= getLocatorGroupZulu(bitWidth)
dataframeCurveLocations.loc[:, 'groupZulu'] //= 2**1 # (groupZulu >> 1)

def outfitDataframeCurveLocations() -> None:
nonlocal dataframeAnalyzed, dataframeCurveLocations
dataframeCurveLocations = dataframeCurveLocations.iloc[0:0]
dataframeCurveLocations['curveLocations'] = dataframeAnalyzed['analyzed']
dataframeCurveLocations['distinctCrossings'] = dataframeAnalyzed['distinctCrossings']
dataframeCurveLocations['dropModify'] = groupsHaveCurvesNotEven
dataframeCurveLocations['analyzed'] = 0
dataframeAnalyzed = dataframeAnalyzed.iloc[0:0]
computeCurveGroups()

dataframeAnalyzed = pandas.DataFrame({
'analyzed': pandas.Series(name='analyzed', data=list(dictionaryCurveLocations.keys()), dtype=datatypeCurveLocations)
, 'distinctCrossings': pandas.Series(name='distinctCrossings', data=list(dictionaryCurveLocations.values()), dtype=datatypeDistinctCrossings)
}, dtype=datatypeCurveLocations
)
del dictionaryCurveLocations

dataframeCurveLocations = pandas.DataFrame({
'curveLocations': pandas.Series(name='curveLocations', data=0, dtype=datatypeCurveLocations)
, 'groupAlpha': pandas.Series(name='groupAlpha', data=0, dtype=datatypeCurveLocations)
, 'groupZulu': pandas.Series(name='groupZulu', data=0, dtype=datatypeCurveLocations)
, 'analyzed': pandas.Series(name='analyzed', data=0, dtype=datatypeCurveLocations)
, 'distinctCrossings': pandas.Series(name='distinctCrossings', data=0, dtype=datatypeDistinctCrossings)
, 'dropModify': pandas.Series(name='dropModify', data=groupsHaveCurvesNotEven, dtype=numpy.int8)
}
)

while (indexTransferMatrix >  0
and (int(dataframeAnalyzed['analyzed'].max()).bit_length()  0:
bitWidthCurveLocations: int = max(dictionaryCurveLocations.keys()).bit_length()
bitWidthDistinctCrossings: int = max(dictionaryCurveLocations.values()).bit_length()

goByeBye()

if (bitWidthCurveLocations > bitWidthCurveLocationsMaximum) or (bitWidthDistinctCrossings > bitWidthDistinctCrossingsMaximum):
sys.stdout.write(f"countBigInt({indexTransferMatrix}).\t")
indexTransferMatrix, dictionaryCurveLocations = countBigInt(indexTransferMatrix, dictionaryCurveLocations)
else:
sys.stdout.write(f"countPandas({indexTransferMatrix}).\t")
indexTransferMatrix, dictionaryCurveLocations = countPandas(indexTransferMatrix, dictionaryCurveLocations)

return sum(dictionaryCurveLocations.values())

@cache
def A000682(n: int) -> int:
"""Compute A000682(n)."""
if n & 0b1:
curveLocations: int = 5
else:
curveLocations = 1
listCurveLocations: list[int] = [(curveLocations

Wie stoppe ich Pandas.Aggregate (Engine = 'numba') Downcasting Uint64 bis int64?

Wie stoppe ich Pandas.Aggregate (Engine = 'numba') Downcasting Uint64 bis int64? ⇐ Python

Quick Reply