Etikettstudio präsentiert nicht die Etiketten und den OCR -Text

Anonymous · Post by **Anonymous** » 05 Apr 2025, 21:26

Ich bin neu, indem ich Label Studio verwende und ich bin zugewiesen, ein Projekt zum Extrahieren von Daten aus Rechnungen mit verschiedenen Layouts mit Layoutlm zu erstellen. Um jedes Wort mit O zu kennzeichnen, werde ich die Entitäten korrigieren, die ich extrahieren möchte. < /p>
Ich habe ein seltsames Problem. Ich nenne die Vorhersage von Label Studio und es gibt eine 200 von der Backend -Seite und auch einen auf einer Registerkarte zugeordneten Vorhersage, aber der OCR_TEXT geht nicht an die Frontend und auch die Beschriftung wird nicht angezeigt.import pytesseract
import json
from PIL import Image
import os
from flask import Flask, request, jsonify

class OCRNERModel:
def setup(self):
"""Required setup endpoint for Label Studio"""
print("ML Backend is set up successfully.")
return jsonify({"status": "ok"})

def predict(self, request_data, **kwargs):
tasks = request_data.get("tasks", [])
if not tasks:
return jsonify({"error": "No tasks found in request."}), 400

task = tasks[0]
image_path = task["data"].get("image")

# Convert relative URL to local path
if image_path and image_path.startswith("/data/local-files/?d="):
image_path = image_path.replace("/data/local-files/?d=", "/")

if not image_path or not os.path.exists(image_path):
return jsonify({"error": f"Image file not found: {image_path}"}), 404

try:
img = Image.open(image_path)
ocr_data = pytesseract.image_to_data(img, output_type=pytesseract.Output.DICT)
ocr_text = pytesseract.image_to_string(img).strip()
except Exception as e:
return jsonify({"error": f"OCR extraction failed: {str(e)}"}), 500

predictions = []
id_counter = 1
img_width, img_height = img.size
for i in range(len(ocr_data['text'])):
text = ocr_data['text'].strip()
if text:
x_pixel = ocr_data['left']
y_pixel = ocr_data['top']
w_pixel = ocr_data['width']
h_pixel = ocr_data['height']

x_normalized = (x_pixel / img_width) * 100
y_normalized = (y_pixel / img_height) * 100
w_normalized = (w_pixel / img_width) * 100
h_normalized = (h_pixel / img_height) * 100

region_id = str(id_counter)

predictions.append({
"id": region_id,
"type": "rectanglelabels",
"from_name": "label",
"to_name": "image",
"value": {
"x": x_normalized,
"y": y_normalized,
"width": w_normalized,
"height": h_normalized,
"rotation": 0,
"rectanglelabels": ["O"],
},
})

predictions.append({
"id": f"T{region_id}",
"type": "textarea",
"from_name": "transcription",
"to_name": "image",
"value": {
"text": [text], # Must be a list
}
})

id_counter += 1

response = {
"data": {
"image": task["data"].get("image"),
"ocr_text": ocr_text
},
"results": [
{
"model_version": "ocr-bbox-v1",
"result": predictions
}
]
}

print("Sending Response:", json.dumps(response, indent=2))
return jsonify(response)

# Initialize Flask app and model instance
app = Flask(__name__)
model = OCRNERModel()

# Setup endpoint to configure the model backend
@app.route('/setup', methods=['POST'])
def setup():
""" Required setup endpoint for Label Studio """
return jsonify({"status": "ok"})

# Predict endpoint that Label Studio will call
@app.route('/predict', methods=['POST'])
def predict():
data = request.get_json()
print(f"Received data: {json.dumps(data, indent=2)}")
return model.predict(data) # Directly call model.predict and return its jsonify result

# Health check endpoint for Label Studio to verify the backend is up
@app.route('/health', methods=['GET'])
def health():
""" Health check endpoint required by Label Studio """
return jsonify({"status": "ok"})

# Run the Flask application
if __name__ == "__main__":
app.run(host="0.0.0.0", port=8081)
< /code>
Hier ist meine Annotationskonfiguration: < /p>

< /code>
Ich habe einige Beispiele und auch die Dokumentation und alles funktioniert. Fehlt mir etwas, das mir hier fehlt?
Danke < /p>

Etikettstudio präsentiert nicht die Etiketten und den OCR -Text

Etikettstudio präsentiert nicht die Etiketten und den OCR -Text ⇐ Python

Quick Reply