Code: Select all
import pymorphy2
from transformers import pipeline
corrector = pipeline("text2text-generation", model="cointegrated/rut5-base-paraphraser")
morph = pymorphy2.MorphAnalyzer()
def correct_text(text):
result = corrector(text)
corrected_text = result[0]['generated_text']
original_words = text.split()
corrected_words = corrected_text.split()
final_words = []
for orig_word in original_words:
orig_normal = morph.parse(orig_word)[0].normal_form
replacement_found = False
for corrected_word in corrected_words:
corrected_parse = morph.parse(corrected_word)[0]
corrected_normal = corrected_parse.normal_form
corrected_nomn = corrected_parse.inflect({'nomn'})
corrected_nomn_word = corrected_nomn.word if corrected_nomn else None
if orig_normal == corrected_normal:
final_words.append(corrected_word)
replacement_found = True
break
elif corrected_nomn_word == orig_normal:
final_words.append(corrected_word)
replacement_found = True
break
elif corrected_normal in orig_word or orig_word in corrected_normal:
final_words.append(corrected_word)
replacement_found = True
break
if not replacement_found:
final_words.append(orig_word)
return ' '.join(final_words)