verbinden.
Code: Select all
import unicodedata
import string
def make_ascii(txt):
txt = txt.strip()
# necessary for the loop below
# NFD - normalization form decomposed
txt = unicodedata.normalize("NFKD", txt) # à -> a\u1234
# é -> e à -> a å -> a
latin_base = False
preserve = []
for c in txt:
if unicodedata.combining(c) and latin_base:
continue # ignore diacritic on Latin base char
preserve.append(c)
# if it isn't a combining char, it's a new base char
if not unicodedata.combining(c):
latin_base = c in string.ascii_letters
txt = "".join(preserve)
txt = txt.casefold() # B -> b ß -> ss
return txt
p1 = """´2´µEst陯‚™∑£´®´®´®†¥¨¨¨ˆøπåß∂ƒ©˙∆˚¬…ç√∫˜µεφυρος,÷rocafe\u0301'caféééßß½ààà'…A㊷cafe\N{COMBINING ACUTE ACCENT}4²'"""
print()
print(p1)
p2 = make_ascii(p1)
print(p2)
print()
# ´2´µEst陯‚™∑£´®´®´®†¥¨¨¨ˆøπåß∂ƒ©˙∆˚¬…ç√∫˜µεφυρος,÷rocafé'caféééßß½ààà'…A㊷café4²'
# ́2 ́μestetmæ‚tm∑£ ́® ́® ́®†¥ ̈ ̈ ̈ˆøπass∂ƒ© ̇∆ ̊¬...c√∫ ̃μεφυροσ,÷rocafe'cafeeessss1⁄2aaa'...a42cafe42'