Yolov8 Handerkennung schlägt nach TensorFlow.js -Umwandlung aus nächster Nähe aus

Anonymous · Post by **Anonymous** » 27 Feb 2025, 05:29

Ich verwende Yolov8 für die Echtzeit-Handerkennung in einer Web-App. Das Modell funktioniert gut in Python, aber nachdem es in TensorFlow.js konvertiert wurde, kämpft die Erkennung, wenn die Hand zu nahe an der Webcam ist - manchmal fehlt es vollständig oder verlegt die Begrenzungsbox. Das Problem scheint mit der Skalierungsvariation verbunden zu sein, es erscheint jedoch erst nach der Konvertierung von TensorFlow.js.

Code: Select all

import os
from ultralytics import YOLO
import shutil
import tensorflow as tf
from google.colab import files as colab_files

def find_saved_model(base_path):
"""Find the SavedModel directory in the export path"""
for root, dirs, filenames in os.walk(base_path):
if 'saved_model.pb' in filenames:
return root
return None

def add_signatures(saved_model_dir):
"""Load the SavedModel and add required signatures"""
print("Adding signatures to SavedModel...")

# Load the model
model = tf.saved_model.load(saved_model_dir)

# Create a wrapper function that matches the model's interface
@tf.function(input_signature=[
tf.TensorSpec(shape=[1, 640, 640, 3], dtype=tf.float32, name='images')
])
def serving_fn(images):
# Pass False for training parameter
return model(images, False, None)

# Convert the model
concrete_func = serving_fn.get_concrete_function()

# Create a new SavedModel with the signature
tf.saved_model.save(
model,
saved_model_dir,
signatures={
'serving_default': concrete_func
}
)

print("Signatures added successfully")
return saved_model_dir

def convert_to_tfjs(pt_model_path, output_dir):
"""
Convert a PyTorch YOLO model to TensorFlow.js format

Args:
pt_model_path (str): Path to the .pt file
output_dir (str): Directory to save the converted model
"""
try:
# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Load the model
print(f"Loading YOLO model from {pt_model_path}...")
model = YOLO(pt_model_path)

# First export to TensorFlow format
print("Exporting to TensorFlow format...")

success = model.export(
format='saved_model',
imgsz=640,
half=False,
simplify=True
)

# Find the SavedModel directory
saved_model_dir = find_saved_model(os.path.join(os.getcwd(), "best_saved_model"))
if not saved_model_dir:
raise Exception(f"Cannot find SavedModel directory in {os.path.dirname(pt_model_path)}")

print(f"Found SavedModel at: {saved_model_dir}")

# Add signatures to the model
saved_model_dir = add_signatures(saved_model_dir)

# Convert to TensorFlow.js
print("Converting to TensorFlow.js format...")
tfjs_target_dir = os.path.join(output_dir, 'tfjs_model')

# Ensure clean target directory
if os.path.exists(tfjs_target_dir):
shutil.rmtree(tfjs_target_dir)
os.makedirs(tfjs_target_dir)

# Try conversion with modified parameters
conversion_command = (
f"tensorflowjs_converter "
f"--input_format=tf_saved_model "
f"--output_format=tfjs_graph_model "
f"--saved_model_tags=serve "
f"--control_flow_v2=True "
f"'{saved_model_dir}' "
f"'{tfjs_target_dir}'"
)

print(f"Running conversion command: {conversion_command}")
result = os.system(conversion_command)

if result != 0:
raise Exception("TensorFlow.js conversion failed")

# Verify conversion
if not os.path.exists(os.path.join(tfjs_target_dir, 'model.json')):
raise Exception("TensorFlow.js conversion failed - model.json not found")

print(f"Successfully converted model to TensorFlow.js format")
print(f"Output saved to: {tfjs_target_dir}")

# Print model files
print("\nConverted model files:")
for filename in os.listdir(tfjs_target_dir):   # Renamed 'file' to 'filename'
print(f"- {filename}")

# Create a zip file of the converted model
zip_path = f"{tfjs_target_dir}.zip"
shutil.make_archive(tfjs_target_dir, 'zip', tfjs_target_dir)

# Download the zip file using the renamed colab_files module
colab_files.download(zip_path)

except Exception as e:
print(f"Error during conversion: {str(e)}")
print("\nDebug information:")
print(f"Current working directory: {os.getcwd()}")
print(f"PT model exists: {os.path.exists(pt_model_path)}")
if 'saved_model_dir' in locals():
print(f"SavedModel directory exists: {os.path.exists(saved_model_dir)}")
if os.path.exists(saved_model_dir):
print("SavedModel contents:")
for root, dirs, filenames in os.walk(saved_model_dir):  # Renamed 'files' to 'filenames'
print(f"\nDirectory: {root}")
for filename in filenames:  # Renamed 'f' to 'filename'
print(f"  - {filename}")
raise

# Usage
from google.colab import files as colab_files  # Use consistent naming
uploaded = colab_files.upload()
pt_model_path = next(iter(uploaded.keys()))
output_dir = "converted_model"
convert_to_tfjs(pt_model_path, output_dir)
< /code>
Meine Handpose -Erkennung Web -App < /p>





Real-time Hand Pose Detection


body {
text-align: center;
font-family: Arial, sans-serif;
margin: 0;
padding: 20px;
background: #f0f0f0;
}
.container {
position: relative;
width: 640px;
height: 480px;
margin: 20px auto;
}
video, canvas {
position: absolute;
left: 0;
top: 0;
}
button {
margin: 10px;
padding: 10px 20px;
font-size: 16px;
cursor: pointer;
background: #007bff;
color: white;
border: none;
border-radius: 4px;
}
button:hover {
background: #0056b3;
}
#status {
padding: 10px;
background: #fff;
border-radius: 4px;
display: inline-block;
}



Real-time Hand Pose Detection (YOLOv8)
Load Model
Start Webcam
Model not loaded





// Kalman Filter Implementation
class KalmanFilter {
constructor(stateSize, measurementSize, processNoise = 0.001, measurementNoise = 0.1) {
this.state = new Array(stateSize).fill(0);         // State vector [x, y, vx, vy]
this.covariance = new Array(stateSize * stateSize).fill(0);
this.processNoise = processNoise;
this.measurementNoise = measurementNoise;
this.stateSize = stateSize;
this.measurementSize = measurementSize;

// Initialize covariance matrix with high uncertainty
for (let i = 0; i < stateSize; i++) {
this.covariance[i * stateSize + i] = 1000;
}
}

predict(dt = 1/30) {
// State transition matrix
const F = new Array(this.stateSize * this.stateSize).fill(0);
for (let i = 0; i < this.stateSize/2;  i++) {
F[i * this.stateSize + i] = 1;
F[i * this.stateSize + (i + this.stateSize/2)] = dt;
F[(i + this.stateSize/2) * this.stateSize + (i + this.stateSize/2)] = 1;
}

// Predict state
const newState = new Array(this.stateSize).fill(0);
for (let i = 0; i < this.stateSize; i++) {
for (let j = 0; j < this.stateSize; j++) {
newState[i] += F[i * this.stateSize + j] * this.state[j];
}
}
this.state = newState;

// Predict covariance
const newCovariance = new Array(this.stateSize * this.stateSize).fill(0);
for (let i = 0; i < this.stateSize; i++) {
for (let j = 0; j < this.stateSize; j++) {
for (let k = 0; k < this.stateSize; k++) {
newCovariance[i * this.stateSize + j] +=
F[i * this.stateSize + k] * this.covariance[k * this.stateSize + j];
}
}
}

// Add process noise
for (let i = 0; i < this.stateSize; i++) {
newCovariance[i * this.stateSize + i] += this.processNoise;
}

this.covariance = newCovariance;
}

update(measurement) {
// Measurement matrix
const H = new Array(this.measurementSize * this.stateSize).fill(0);
for (let i = 0; i < this.measurementSize; i++) {
H[i * this.stateSize + i] = 1;
}

// Calculate Kalman gain
const S = new Array(this.measurementSize * this.measurementSize).fill(0);
for (let i = 0; i < this.measurementSize; i++) {
for (let j = 0; j < this.measurementSize; j++) {
for (let k = 0; k < this.stateSize; k++) {
S[i * this.measurementSize + j] +=
H[i * this.stateSize + k] * this.covariance[k * this.stateSize + j];
}
}
S[i * this.measurementSize + i] += this.measurementNoise;
}

const K = new Array(this.stateSize * this.measurementSize).fill(0);
for (let i = 0; i < this.stateSize; i++) {
for (let j = 0; j < this.measurementSize; j++) {
for (let k = 0; k < this.stateSize; k++) {
K[i * this.measurementSize + j] +=
this.covariance[i * this.stateSize + k] * H[j * this.stateSize + k];
}
K[i * this.measurementSize + j] /= S[j * this.measurementSize + j];
}
}

// Update state
const innovation = new Array(this.measurementSize).fill(0);
for (let i = 0; i < this.measurementSize; i++) {
innovation[i] = measurement[i];
for (let j = 0; j < this.stateSize; j++) {
innovation[i] -= H[i * this.stateSize + j] * this.state[j];
}
}

for (let i = 0; i < this.stateSize; i++) {
for (let j = 0; j < this.measurementSize; j++) {
this.state[i] += K[i * this.measurementSize + j] * innovation[j];
}
}

// Update covariance
const newCovariance = new Array(this.stateSize * this.stateSize).fill(0);
for (let i = 0; i < this.stateSize; i++) {
for (let j = 0; j < this.stateSize; j++) {
newCovariance[i * this.stateSize + j] = this.covariance[i * this.stateSize + j];
for (let k = 0; k < this.measurementSize;  k++) {
newCovariance[i * this.stateSize + j] -=
K[i * this.measurementSize + k] * H[k * this.stateSize + j] * this.covariance[i * this.stateSize + j];
}
}
}
this.covariance = newCovariance;
}

getState() {
return this.state.slice(0, this.measurementSize);
}
}

let model;
let video = document.getElementById("video");
let canvas = document.getElementById("canvas");
let ctx = canvas.getContext("2d");

const CONF_THRESHOLD = 0.75;
const IOU_THRESHOLD = 0.1;
let isProcessing = false;
let previousDetections = [];

// Initialize Kalman filters
let bboxFilter = new KalmanFilter(8, 4, 0.005, 0.2); // State: [x, y, w, h, vx, vy, vw, vh]
let keypointFilter = new KalmanFilter(4, 2, 0.005, 0.2); // State: [x, y, vx, vy]
let lastFrameTime = performance.now();

// Model input size constants
const MODEL_WIDTH = 640;
const MODEL_HEIGHT = 640;
const SCALE_FACTOR = 1.8;

async function loadModel() {
try {
document.getElementById("status").innerText = "Loading model...";
model = await tf.loadGraphModel('http://localhost:8000/model.json');
document.getElementById("status").innerText = "Model loaded!";
console.log("Model loaded successfully");
} catch (error) {
console.error("Error loading model:", error);
document.getElementById("status").innerText = "Error loading model!";
}
}

async function startWebcam() {
if (!model) {
alert("Please load the model first!");
return;
}

try {
const stream = await navigator.mediaDevices.getUserMedia({
video: {
width: { ideal: 640 },
height: { ideal: 480 },
facingMode: 'user'
}
});
video.srcObject = stream;
video.onloadedmetadata = () => {
video.play();
processVideoFrame();
};
} catch (err) {
console.error("Error accessing webcam:", err);
document.getElementById("status").innerText = "Error accessing webcam!";
}
}

async function processVideoFrame() {
if (!model || !video.videoWidth || isProcessing) return;

try {
isProcessing = true;

const offscreenCanvas = document.createElement('canvas');
offscreenCanvas.width = MODEL_WIDTH;
offscreenCanvas.height = MODEL_HEIGHT;
const offscreenCtx = offscreenCanvas.getContext('2d');

const scale = Math.min(MODEL_WIDTH / video.videoWidth, MODEL_HEIGHT / video.videoHeight);
const scaledWidth = video.videoWidth * scale;
const scaledHeight = video.videoHeight * scale;
const offsetX = (MODEL_WIDTH - scaledWidth) / 2;
const offsetY = (MODEL_HEIGHT - scaledHeight) / 2;

offscreenCtx.fillStyle = 'black';
offscreenCtx.fillRect(0, 0, MODEL_WIDTH, MODEL_HEIGHT);
offscreenCtx.drawImage(video, offsetX, offsetY, scaledWidth, scaledHeight);

const imgTensor = tf.tidy(() =>  {
return tf.browser.fromPixels(offscreenCanvas)
.expandDims(0)
.toFloat()
.div(255.0);
});

const predictions = await model.predict(imgTensor);
imgTensor.dispose();

const processedDetections = await processDetections(predictions, {
offsetX,
offsetY,
scale,
originalWidth: video.videoWidth,
originalHeight: video.videoHeight
});

const smoothedDetections = smoothDetections(processedDetections);
drawDetections(smoothedDetections);

previousDetections = smoothedDetections;

if (Array.isArray(predictions)) {
predictions.forEach(p => p.dispose());
} else {
predictions.dispose();
}

} catch (error) {
console.error("Error in processing frame:", error);
} finally {
isProcessing = false;
requestAnimationFrame(processVideoFrame);
}
}

async function processDetections(predictionTensor, transformInfo) {
const predictions = await predictionTensor.array();

if (!predictions.length || !predictions[0].length) {
return [];
}

let detections = [];
const numDetections = predictions[0][0].length;

for (let i = 0; i < numDetections; i++) {
const confidence = predictions[0][4][i];

if (confidence > CONF_THRESHOLD) {
let x = (predictions[0][0][i] - transformInfo.offsetX) / transformInfo.scale;
let y = (predictions[0][1][i] - transformInfo.offsetY) / transformInfo.scale;
let width = (predictions[0][2][i] / transformInfo.scale) * SCALE_FACTOR;
let height = (predictions[0][3][i] / transformInfo.scale) * SCALE_FACTOR;

let kp_x = (predictions[0][5][i] - transformInfo.offsetX) / transformInfo.scale;
let kp_y = (predictions[0][6][i] - transformInfo.offsetY) / transformInfo.scale;

x = x / transformInfo.originalWidth;
y = y / transformInfo.originalHeight;
width = width / transformInfo.originalWidth;
height = height / transformInfo.originalHeight;
kp_x = kp_x / transformInfo.originalWidth;
kp_y = kp_y / transformInfo.originalHeight;

x = Math.max(0, Math.min(1, x));
y = Math.max(0, Math.min(1, y));
kp_x = Math.max(0, Math.min(1, kp_x));
kp_y = Math.max(0, Math.min(1, kp_y));

detections.push({
bbox: [x, y, width, height],
confidence,
keypoint: [kp_x, kp_y]
});
}
}

return applyNMS(detections);
}

function smoothDetections(currentDetections) {
const currentTime = performance.now();
const dt = (currentTime - lastFrameTime) / 1000; // Convert to seconds
lastFrameTime = currentTime;

return currentDetections.map(detection =>  {
// Predict next state
bboxFilter.predict(dt);
keypointFilter.predict(dt);

// Update with new measurements
const [x, y, width, height] = detection.bbox;
bboxFilter.update([x, y, width, height]);

const [kpX, kpY] = detection.keypoint;
keypointFilter.update([kpX, kpY]);

// Get filtered states
const filteredBbox = bboxFilter.getState();
const filteredKeypoint = keypointFilter.getState();

return {
bbox: filteredBbox,
confidence: detection.confidence,
keypoint: filteredKeypoint
};
});
}

function calculateIoU(box1, box2) {
const [x1, y1, w1, h1] = box1;
const [x2, y2, w2, h2] = box2;

const x1min = x1 - w1/2;
const x1max = x1 + w1/2;
const y1min = y1 - h1/2;
const y1max = y1 + h1/2;

const x2min = x2 - w2/2;
const x2max = x2 + w2/2;
const y2min = y2 - h2/2;
const y2max = y2 + h2/2;

const xOverlap = Math.max(0, Math.min(x1max, x2max) - Math.max(x1min, x2min));
const yOverlap = Math.max(0, Math.min(y1max, y2max) - Math.max(y1min, y2min));

const intersectionArea = xOverlap * yOverlap;
const union = w1 * h1 + w2 * h2 - intersectionArea;

return intersectionArea / union;
}

async function applyNMS(detections) {
detections.sort((a, b) => b.confidence - a.confidence);

const selected = [];
const active = new Set(Array(detections.length).keys());

for (let i = 0; i < detections.length; i++) {
if (!active.has(i)) continue;

selected.push(detections[i]);

for (let j = i + 1; j < detections.length; j++) {
if (!active.has(j)) continue;

const iou = calculateIoU(detections[i].bbox, detections[j].bbox);
if (iou >= IOU_THRESHOLD) active.delete(j);
}
}

return selected;
}

function drawDetections(detections) {
ctx.clearRect(0, 0, canvas.width, canvas.height);
ctx.drawImage(video, 0, 0, canvas.width, canvas.height);

detections.forEach(detection => {
const [x, y, width, height] = detection.bbox;
const [keypointX, keypointY] = detection.keypoint;

// Convert normalized coordinates to pixel values
const boxX = (x - width/2) * canvas.width;
const boxY = (y - height/2) * canvas.height;
const boxWidth = width * canvas.width;
const boxHeight = height * canvas.height;

// Draw bounding box
ctx.strokeStyle = 'red';
ctx.lineWidth = 2;
ctx.strokeRect(boxX, boxY, boxWidth, boxHeight);

// Draw keypoint
const kpX = keypointX * canvas.width;
const kpY = keypointY * canvas.height;

ctx.fillStyle = 'blue';
ctx.beginPath();
ctx.arc(kpX, kpY, 5, 0, 2 * Math.PI);
ctx.fill();

// Draw confidence score
ctx.fillStyle = 'red';
ctx.font = '14px Arial';
ctx.fillText(`Conf: ${detection.confidence.toFixed(2)}`, boxX, boxY - 5);
});
}

window.loadModel = loadModel;
window.startWebcam = startWebcam;

Etwas, das ich ausprobiert habe, war die Einstellung von Brockenkasten, die IOU- und Vertrauensschwellen einstellen.

Yolov8 Handerkennung schlägt nach TensorFlow.js -Umwandlung aus nächster Nähe aus

Yolov8 Handerkennung schlägt nach TensorFlow.js -Umwandlung aus nächster Nähe aus ⇐ JavaScript

Quick Reply