Anonymous
Yolov8 Handerkennung schlägt nach TensorFlow.js -Umwandlung aus nächster Nähe aus
Post
by Anonymous » 27 Feb 2025, 05:29
Ich verwende Yolov8 für die Echtzeit-Handerkennung in einer Web-App. Das Modell funktioniert gut in Python, aber nachdem es in TensorFlow.js konvertiert wurde, kämpft die Erkennung, wenn die Hand zu nahe an der Webcam ist - manchmal fehlt es vollständig oder verlegt die Begrenzungsbox. Das
Problem scheint mit der Skalierungsvariation verbunden zu sein, es erscheint jedoch erst nach der Konvertierung von TensorFlow.js.
Code: Select all
import os
from ultralytics import YOLO
import shutil
import tensorflow as tf
from google.colab import files as colab_files
def find_saved_model(base_path):
"""Find the SavedModel directory in the export path"""
for root, dirs, filenames in os.walk(base_path):
if 'saved_model.pb' in filenames:
return root
return None
def add_signatures(saved_model_dir):
"""Load the SavedModel and add required signatures"""
print("Adding signatures to SavedModel...")
# Load the model
model = tf.saved_model.load(saved_model_dir)
# Create a wrapper function that matches the model's interface
@tf.function(input_signature=[
tf.TensorSpec(shape=[1, 640, 640, 3], dtype=tf.float32, name='images')
])
def serving_fn(images):
# Pass False for training parameter
return model(images, False, None)
# Convert the model
concrete_func = serving_fn.get_concrete_function()
# Create a new SavedModel with the signature
tf.saved_model.save(
model,
saved_model_dir,
signatures={
'serving_default': concrete_func
}
)
print("Signatures added successfully")
return saved_model_dir
def convert_to_tfjs(pt_model_path, output_dir):
"""
Convert a PyTorch YOLO model to TensorFlow.js format
Args:
pt_model_path (str): Path to the .pt file
output_dir (str): Directory to save the converted model
"""
try:
# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)
# Load the model
print(f"Loading YOLO model from {pt_model_path}...")
model = YOLO(pt_model_path)
# First export to TensorFlow format
print("Exporting to TensorFlow format...")
success = model.export(
format='saved_model',
imgsz=640,
half=False,
simplify=True
)
# Find the SavedModel directory
saved_model_dir = find_saved_model(os.path.join(os.getcwd(), "best_saved_model"))
if not saved_model_dir:
raise Exception(f"Cannot find SavedModel directory in {os.path.dirname(pt_model_path)}")
print(f"Found SavedModel at: {saved_model_dir}")
# Add signatures to the model
saved_model_dir = add_signatures(saved_model_dir)
# Convert to TensorFlow.js
print("Converting to TensorFlow.js format...")
tfjs_target_dir = os.path.join(output_dir, 'tfjs_model')
# Ensure clean target directory
if os.path.exists(tfjs_target_dir):
shutil.rmtree(tfjs_target_dir)
os.makedirs(tfjs_target_dir)
# Try conversion with modified parameters
conversion_command = (
f"tensorflowjs_converter "
f"--input_format=tf_saved_model "
f"--output_format=tfjs_graph_model "
f"--saved_model_tags=serve "
f"--control_flow_v2=True "
f"'{saved_model_dir}' "
f"'{tfjs_target_dir}'"
)
print(f"Running conversion command: {conversion_command}")
result = os.system(conversion_command)
if result != 0:
raise Exception("TensorFlow.js conversion failed")
# Verify conversion
if not os.path.exists(os.path.join(tfjs_target_dir, 'model.json')):
raise Exception("TensorFlow.js conversion failed - model.json not found")
print(f"Successfully converted model to TensorFlow.js format")
print(f"Output saved to: {tfjs_target_dir}")
# Print model files
print("\nConverted model files:")
for filename in os.listdir(tfjs_target_dir): # Renamed 'file' to 'filename'
print(f"- {filename}")
# Create a zip file of the converted model
zip_path = f"{tfjs_target_dir}.zip"
shutil.make_archive(tfjs_target_dir, 'zip', tfjs_target_dir)
# Download the zip file using the renamed colab_files module
colab_files.download(zip_path)
except Exception as e:
print(f"Error during conversion: {str(e)}")
print("\nDebug information:")
print(f"Current working directory: {os.getcwd()}")
print(f"PT model exists: {os.path.exists(pt_model_path)}")
if 'saved_model_dir' in locals():
print(f"SavedModel directory exists: {os.path.exists(saved_model_dir)}")
if os.path.exists(saved_model_dir):
print("SavedModel contents:")
for root, dirs, filenames in os.walk(saved_model_dir): # Renamed 'files' to 'filenames'
print(f"\nDirectory: {root}")
for filename in filenames: # Renamed 'f' to 'filename'
print(f" - {filename}")
raise
# Usage
from google.colab import files as colab_files # Use consistent naming
uploaded = colab_files.upload()
pt_model_path = next(iter(uploaded.keys()))
output_dir = "converted_model"
convert_to_tfjs(pt_model_path, output_dir)
< /code>
Meine Handpose -Erkennung Web -App < /p>
Real-time Hand Pose Detection
body {
text-align: center;
font-family: Arial, sans-serif;
margin: 0;
padding: 20px;
background: #f0f0f0;
}
.container {
position: relative;
width: 640px;
height: 480px;
margin: 20px auto;
}
video, canvas {
position: absolute;
left: 0;
top: 0;
}
button {
margin: 10px;
padding: 10px 20px;
font-size: 16px;
cursor: pointer;
background: #007bff;
color: white;
border: none;
border-radius: 4px;
}
button:hover {
background: #0056b3;
}
#status {
padding: 10px;
background: #fff;
border-radius: 4px;
display: inline-block;
}
Real-time Hand Pose Detection (YOLOv8)
Load Model
Start Webcam
Model not loaded
// Kalman Filter Implementation
class KalmanFilter {
constructor(stateSize, measurementSize, processNoise = 0.001, measurementNoise = 0.1) {
this.state = new Array(stateSize).fill(0); // State vector [x, y, vx, vy]
this.covariance = new Array(stateSize * stateSize).fill(0);
this.processNoise = processNoise;
this.measurementNoise = measurementNoise;
this.stateSize = stateSize;
this.measurementSize = measurementSize;
// Initialize covariance matrix with high uncertainty
for (let i = 0; i < stateSize; i++) {
this.covariance[i * stateSize + i] = 1000;
}
}
predict(dt = 1/30) {
// State transition matrix
const F = new Array(this.stateSize * this.stateSize).fill(0);
for (let i = 0; i < this.stateSize/2; i++) {
F[i * this.stateSize + i] = 1;
F[i * this.stateSize + (i + this.stateSize/2)] = dt;
F[(i + this.stateSize/2) * this.stateSize + (i + this.stateSize/2)] = 1;
}
// Predict state
const newState = new Array(this.stateSize).fill(0);
for (let i = 0; i < this.stateSize; i++) {
for (let j = 0; j < this.stateSize; j++) {
newState[i] += F[i * this.stateSize + j] * this.state[j];
}
}
this.state = newState;
// Predict covariance
const newCovariance = new Array(this.stateSize * this.stateSize).fill(0);
for (let i = 0; i < this.stateSize; i++) {
for (let j = 0; j < this.stateSize; j++) {
for (let k = 0; k < this.stateSize; k++) {
newCovariance[i * this.stateSize + j] +=
F[i * this.stateSize + k] * this.covariance[k * this.stateSize + j];
}
}
}
// Add process noise
for (let i = 0; i < this.stateSize; i++) {
newCovariance[i * this.stateSize + i] += this.processNoise;
}
this.covariance = newCovariance;
}
update(measurement) {
// Measurement matrix
const H = new Array(this.measurementSize * this.stateSize).fill(0);
for (let i = 0; i < this.measurementSize; i++) {
H[i * this.stateSize + i] = 1;
}
// Calculate Kalman gain
const S = new Array(this.measurementSize * this.measurementSize).fill(0);
for (let i = 0; i < this.measurementSize; i++) {
for (let j = 0; j < this.measurementSize; j++) {
for (let k = 0; k < this.stateSize; k++) {
S[i * this.measurementSize + j] +=
H[i * this.stateSize + k] * this.covariance[k * this.stateSize + j];
}
}
S[i * this.measurementSize + i] += this.measurementNoise;
}
const K = new Array(this.stateSize * this.measurementSize).fill(0);
for (let i = 0; i < this.stateSize; i++) {
for (let j = 0; j < this.measurementSize; j++) {
for (let k = 0; k < this.stateSize; k++) {
K[i * this.measurementSize + j] +=
this.covariance[i * this.stateSize + k] * H[j * this.stateSize + k];
}
K[i * this.measurementSize + j] /= S[j * this.measurementSize + j];
}
}
// Update state
const innovation = new Array(this.measurementSize).fill(0);
for (let i = 0; i < this.measurementSize; i++) {
innovation[i] = measurement[i];
for (let j = 0; j < this.stateSize; j++) {
innovation[i] -= H[i * this.stateSize + j] * this.state[j];
}
}
for (let i = 0; i < this.stateSize; i++) {
for (let j = 0; j < this.measurementSize; j++) {
this.state[i] += K[i * this.measurementSize + j] * innovation[j];
}
}
// Update covariance
const newCovariance = new Array(this.stateSize * this.stateSize).fill(0);
for (let i = 0; i < this.stateSize; i++) {
for (let j = 0; j < this.stateSize; j++) {
newCovariance[i * this.stateSize + j] = this.covariance[i * this.stateSize + j];
for (let k = 0; k < this.measurementSize; k++) {
newCovariance[i * this.stateSize + j] -=
K[i * this.measurementSize + k] * H[k * this.stateSize + j] * this.covariance[i * this.stateSize + j];
}
}
}
this.covariance = newCovariance;
}
getState() {
return this.state.slice(0, this.measurementSize);
}
}
let model;
let video = document.getElementById("video");
let canvas = document.getElementById("canvas");
let ctx = canvas.getContext("2d");
const CONF_THRESHOLD = 0.75;
const IOU_THRESHOLD = 0.1;
let isProcessing = false;
let previousDetections = [];
// Initialize Kalman filters
let bboxFilter = new KalmanFilter(8, 4, 0.005, 0.2); // State: [x, y, w, h, vx, vy, vw, vh]
let keypointFilter = new KalmanFilter(4, 2, 0.005, 0.2); // State: [x, y, vx, vy]
let lastFrameTime = performance.now();
// Model input size constants
const MODEL_WIDTH = 640;
const MODEL_HEIGHT = 640;
const SCALE_FACTOR = 1.8;
async function loadModel() {
try {
document.getElementById("status").innerText = "Loading model...";
model = await tf.loadGraphModel('http://localhost:8000/model.json');
document.getElementById("status").innerText = "Model loaded!";
console.log("Model loaded successfully");
} catch (error) {
console.error("Error loading model:", error);
document.getElementById("status").innerText = "Error loading model!";
}
}
async function startWebcam() {
if (!model) {
alert("Please load the model first!");
return;
}
try {
const stream = await navigator.mediaDevices.getUserMedia({
video: {
width: { ideal: 640 },
height: { ideal: 480 },
facingMode: 'user'
}
});
video.srcObject = stream;
video.onloadedmetadata = () => {
video.play();
processVideoFrame();
};
} catch (err) {
console.error("Error accessing webcam:", err);
document.getElementById("status").innerText = "Error accessing webcam!";
}
}
async function processVideoFrame() {
if (!model || !video.videoWidth || isProcessing) return;
try {
isProcessing = true;
const offscreenCanvas = document.createElement('canvas');
offscreenCanvas.width = MODEL_WIDTH;
offscreenCanvas.height = MODEL_HEIGHT;
const offscreenCtx = offscreenCanvas.getContext('2d');
const scale = Math.min(MODEL_WIDTH / video.videoWidth, MODEL_HEIGHT / video.videoHeight);
const scaledWidth = video.videoWidth * scale;
const scaledHeight = video.videoHeight * scale;
const offsetX = (MODEL_WIDTH - scaledWidth) / 2;
const offsetY = (MODEL_HEIGHT - scaledHeight) / 2;
offscreenCtx.fillStyle = 'black';
offscreenCtx.fillRect(0, 0, MODEL_WIDTH, MODEL_HEIGHT);
offscreenCtx.drawImage(video, offsetX, offsetY, scaledWidth, scaledHeight);
const imgTensor = tf.tidy(() => {
return tf.browser.fromPixels(offscreenCanvas)
.expandDims(0)
.toFloat()
.div(255.0);
});
const predictions = await model.predict(imgTensor);
imgTensor.dispose();
const processedDetections = await processDetections(predictions, {
offsetX,
offsetY,
scale,
originalWidth: video.videoWidth,
originalHeight: video.videoHeight
});
const smoothedDetections = smoothDetections(processedDetections);
drawDetections(smoothedDetections);
previousDetections = smoothedDetections;
if (Array.isArray(predictions)) {
predictions.forEach(p => p.dispose());
} else {
predictions.dispose();
}
} catch (error) {
console.error("Error in processing frame:", error);
} finally {
isProcessing = false;
requestAnimationFrame(processVideoFrame);
}
}
async function processDetections(predictionTensor, transformInfo) {
const predictions = await predictionTensor.array();
if (!predictions.length || !predictions[0].length) {
return [];
}
let detections = [];
const numDetections = predictions[0][0].length;
for (let i = 0; i < numDetections; i++) {
const confidence = predictions[0][4][i];
if (confidence > CONF_THRESHOLD) {
let x = (predictions[0][0][i] - transformInfo.offsetX) / transformInfo.scale;
let y = (predictions[0][1][i] - transformInfo.offsetY) / transformInfo.scale;
let width = (predictions[0][2][i] / transformInfo.scale) * SCALE_FACTOR;
let height = (predictions[0][3][i] / transformInfo.scale) * SCALE_FACTOR;
let kp_x = (predictions[0][5][i] - transformInfo.offsetX) / transformInfo.scale;
let kp_y = (predictions[0][6][i] - transformInfo.offsetY) / transformInfo.scale;
x = x / transformInfo.originalWidth;
y = y / transformInfo.originalHeight;
width = width / transformInfo.originalWidth;
height = height / transformInfo.originalHeight;
kp_x = kp_x / transformInfo.originalWidth;
kp_y = kp_y / transformInfo.originalHeight;
x = Math.max(0, Math.min(1, x));
y = Math.max(0, Math.min(1, y));
kp_x = Math.max(0, Math.min(1, kp_x));
kp_y = Math.max(0, Math.min(1, kp_y));
detections.push({
bbox: [x, y, width, height],
confidence,
keypoint: [kp_x, kp_y]
});
}
}
return applyNMS(detections);
}
function smoothDetections(currentDetections) {
const currentTime = performance.now();
const dt = (currentTime - lastFrameTime) / 1000; // Convert to seconds
lastFrameTime = currentTime;
return currentDetections.map(detection => {
// Predict next state
bboxFilter.predict(dt);
keypointFilter.predict(dt);
// Update with new measurements
const [x, y, width, height] = detection.bbox;
bboxFilter.update([x, y, width, height]);
const [kpX, kpY] = detection.keypoint;
keypointFilter.update([kpX, kpY]);
// Get filtered states
const filteredBbox = bboxFilter.getState();
const filteredKeypoint = keypointFilter.getState();
return {
bbox: filteredBbox,
confidence: detection.confidence,
keypoint: filteredKeypoint
};
});
}
function calculateIoU(box1, box2) {
const [x1, y1, w1, h1] = box1;
const [x2, y2, w2, h2] = box2;
const x1min = x1 - w1/2;
const x1max = x1 + w1/2;
const y1min = y1 - h1/2;
const y1max = y1 + h1/2;
const x2min = x2 - w2/2;
const x2max = x2 + w2/2;
const y2min = y2 - h2/2;
const y2max = y2 + h2/2;
const xOverlap = Math.max(0, Math.min(x1max, x2max) - Math.max(x1min, x2min));
const yOverlap = Math.max(0, Math.min(y1max, y2max) - Math.max(y1min, y2min));
const intersectionArea = xOverlap * yOverlap;
const union = w1 * h1 + w2 * h2 - intersectionArea;
return intersectionArea / union;
}
async function applyNMS(detections) {
detections.sort((a, b) => b.confidence - a.confidence);
const selected = [];
const active = new Set(Array(detections.length).keys());
for (let i = 0; i < detections.length; i++) {
if (!active.has(i)) continue;
selected.push(detections[i]);
for (let j = i + 1; j < detections.length; j++) {
if (!active.has(j)) continue;
const iou = calculateIoU(detections[i].bbox, detections[j].bbox);
if (iou >= IOU_THRESHOLD) active.delete(j);
}
}
return selected;
}
function drawDetections(detections) {
ctx.clearRect(0, 0, canvas.width, canvas.height);
ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
detections.forEach(detection => {
const [x, y, width, height] = detection.bbox;
const [keypointX, keypointY] = detection.keypoint;
// Convert normalized coordinates to pixel values
const boxX = (x - width/2) * canvas.width;
const boxY = (y - height/2) * canvas.height;
const boxWidth = width * canvas.width;
const boxHeight = height * canvas.height;
// Draw bounding box
ctx.strokeStyle = 'red';
ctx.lineWidth = 2;
ctx.strokeRect(boxX, boxY, boxWidth, boxHeight);
// Draw keypoint
const kpX = keypointX * canvas.width;
const kpY = keypointY * canvas.height;
ctx.fillStyle = 'blue';
ctx.beginPath();
ctx.arc(kpX, kpY, 5, 0, 2 * Math.PI);
ctx.fill();
// Draw confidence score
ctx.fillStyle = 'red';
ctx.font = '14px Arial';
ctx.fillText(`Conf: ${detection.confidence.toFixed(2)}`, boxX, boxY - 5);
});
}
window.loadModel = loadModel;
window.startWebcam = startWebcam;
Etwas, das ich ausprobiert habe, war die Einstellung von Brockenkasten, die IOU- und Vertrauensschwellen einstellen.
1740630592
Anonymous
Ich verwende Yolov8 für die Echtzeit-Handerkennung in einer Web-App. Das Modell funktioniert gut in Python, aber nachdem es in TensorFlow.js konvertiert wurde, kämpft die Erkennung, wenn die Hand zu nahe an der Webcam ist - manchmal fehlt es vollständig oder verlegt die Begrenzungsbox. Das [url=viewtopic.php?t=11587]Problem[/url] scheint mit der Skalierungsvariation verbunden zu sein, es erscheint jedoch erst nach der Konvertierung von TensorFlow.js.[code]import os from ultralytics import YOLO import shutil import tensorflow as tf from google.colab import files as colab_files def find_saved_model(base_path): """Find the SavedModel directory in the export path""" for root, dirs, filenames in os.walk(base_path): if 'saved_model.pb' in filenames: return root return None def add_signatures(saved_model_dir): """Load the SavedModel and add required signatures""" print("Adding signatures to SavedModel...") # Load the model model = tf.saved_model.load(saved_model_dir) # Create a wrapper function that matches the model's interface @tf.function(input_signature=[ tf.TensorSpec(shape=[1, 640, 640, 3], dtype=tf.float32, name='images') ]) def serving_fn(images): # Pass False for training parameter return model(images, False, None) # Convert the model concrete_func = serving_fn.get_concrete_function() # Create a new SavedModel with the signature tf.saved_model.save( model, saved_model_dir, signatures={ 'serving_default': concrete_func } ) print("Signatures added successfully") return saved_model_dir def convert_to_tfjs(pt_model_path, output_dir): """ Convert a PyTorch YOLO model to TensorFlow.js format Args: pt_model_path (str): Path to the .pt file output_dir (str): Directory to save the converted model """ try: # Ensure output directory exists os.makedirs(output_dir, exist_ok=True) # Load the model print(f"Loading YOLO model from {pt_model_path}...") model = YOLO(pt_model_path) # First export to TensorFlow format print("Exporting to TensorFlow format...") success = model.export( format='saved_model', imgsz=640, half=False, simplify=True ) # Find the SavedModel directory saved_model_dir = find_saved_model(os.path.join(os.getcwd(), "best_saved_model")) if not saved_model_dir: raise Exception(f"Cannot find SavedModel directory in {os.path.dirname(pt_model_path)}") print(f"Found SavedModel at: {saved_model_dir}") # Add signatures to the model saved_model_dir = add_signatures(saved_model_dir) # Convert to TensorFlow.js print("Converting to TensorFlow.js format...") tfjs_target_dir = os.path.join(output_dir, 'tfjs_model') # Ensure clean target directory if os.path.exists(tfjs_target_dir): shutil.rmtree(tfjs_target_dir) os.makedirs(tfjs_target_dir) # Try conversion with modified parameters conversion_command = ( f"tensorflowjs_converter " f"--input_format=tf_saved_model " f"--output_format=tfjs_graph_model " f"--saved_model_tags=serve " f"--control_flow_v2=True " f"'{saved_model_dir}' " f"'{tfjs_target_dir}'" ) print(f"Running conversion command: {conversion_command}") result = os.system(conversion_command) if result != 0: raise Exception("TensorFlow.js conversion failed") # Verify conversion if not os.path.exists(os.path.join(tfjs_target_dir, 'model.json')): raise Exception("TensorFlow.js conversion failed - model.json not found") print(f"Successfully converted model to TensorFlow.js format") print(f"Output saved to: {tfjs_target_dir}") # Print model files print("\nConverted model files:") for filename in os.listdir(tfjs_target_dir): # Renamed 'file' to 'filename' print(f"- {filename}") # Create a zip file of the converted model zip_path = f"{tfjs_target_dir}.zip" shutil.make_archive(tfjs_target_dir, 'zip', tfjs_target_dir) # Download the zip file using the renamed colab_files module colab_files.download(zip_path) except Exception as e: print(f"Error during conversion: {str(e)}") print("\nDebug information:") print(f"Current working directory: {os.getcwd()}") print(f"PT model exists: {os.path.exists(pt_model_path)}") if 'saved_model_dir' in locals(): print(f"SavedModel directory exists: {os.path.exists(saved_model_dir)}") if os.path.exists(saved_model_dir): print("SavedModel contents:") for root, dirs, filenames in os.walk(saved_model_dir): # Renamed 'files' to 'filenames' print(f"\nDirectory: {root}") for filename in filenames: # Renamed 'f' to 'filename' print(f" - {filename}") raise # Usage from google.colab import files as colab_files # Use consistent naming uploaded = colab_files.upload() pt_model_path = next(iter(uploaded.keys())) output_dir = "converted_model" convert_to_tfjs(pt_model_path, output_dir) < /code> Meine Handpose -Erkennung Web -App < /p> Real-time Hand Pose Detection body { text-align: center; font-family: Arial, sans-serif; margin: 0; padding: 20px; background: #f0f0f0; } .container { position: relative; width: 640px; height: 480px; margin: 20px auto; } video, canvas { position: absolute; left: 0; top: 0; } button { margin: 10px; padding: 10px 20px; font-size: 16px; cursor: pointer; background: #007bff; color: white; border: none; border-radius: 4px; } button:hover { background: #0056b3; } #status { padding: 10px; background: #fff; border-radius: 4px; display: inline-block; } Real-time Hand Pose Detection (YOLOv8) Load Model Start Webcam Model not loaded // Kalman Filter Implementation class KalmanFilter { constructor(stateSize, measurementSize, processNoise = 0.001, measurementNoise = 0.1) { this.state = new Array(stateSize).fill(0); // State vector [x, y, vx, vy] this.covariance = new Array(stateSize * stateSize).fill(0); this.processNoise = processNoise; this.measurementNoise = measurementNoise; this.stateSize = stateSize; this.measurementSize = measurementSize; // Initialize covariance matrix with high uncertainty for (let i = 0; i < stateSize; i++) { this.covariance[i * stateSize + i] = 1000; } } predict(dt = 1/30) { // State transition matrix const F = new Array(this.stateSize * this.stateSize).fill(0); for (let i = 0; i < this.stateSize/2; i++) { F[i * this.stateSize + i] = 1; F[i * this.stateSize + (i + this.stateSize/2)] = dt; F[(i + this.stateSize/2) * this.stateSize + (i + this.stateSize/2)] = 1; } // Predict state const newState = new Array(this.stateSize).fill(0); for (let i = 0; i < this.stateSize; i++) { for (let j = 0; j < this.stateSize; j++) { newState[i] += F[i * this.stateSize + j] * this.state[j]; } } this.state = newState; // Predict covariance const newCovariance = new Array(this.stateSize * this.stateSize).fill(0); for (let i = 0; i < this.stateSize; i++) { for (let j = 0; j < this.stateSize; j++) { for (let k = 0; k < this.stateSize; k++) { newCovariance[i * this.stateSize + j] += F[i * this.stateSize + k] * this.covariance[k * this.stateSize + j]; } } } // Add process noise for (let i = 0; i < this.stateSize; i++) { newCovariance[i * this.stateSize + i] += this.processNoise; } this.covariance = newCovariance; } update(measurement) { // Measurement matrix const H = new Array(this.measurementSize * this.stateSize).fill(0); for (let i = 0; i < this.measurementSize; i++) { H[i * this.stateSize + i] = 1; } // Calculate Kalman gain const S = new Array(this.measurementSize * this.measurementSize).fill(0); for (let i = 0; i < this.measurementSize; i++) { for (let j = 0; j < this.measurementSize; j++) { for (let k = 0; k < this.stateSize; k++) { S[i * this.measurementSize + j] += H[i * this.stateSize + k] * this.covariance[k * this.stateSize + j]; } } S[i * this.measurementSize + i] += this.measurementNoise; } const K = new Array(this.stateSize * this.measurementSize).fill(0); for (let i = 0; i < this.stateSize; i++) { for (let j = 0; j < this.measurementSize; j++) { for (let k = 0; k < this.stateSize; k++) { K[i * this.measurementSize + j] += this.covariance[i * this.stateSize + k] * H[j * this.stateSize + k]; } K[i * this.measurementSize + j] /= S[j * this.measurementSize + j]; } } // Update state const innovation = new Array(this.measurementSize).fill(0); for (let i = 0; i < this.measurementSize; i++) { innovation[i] = measurement[i]; for (let j = 0; j < this.stateSize; j++) { innovation[i] -= H[i * this.stateSize + j] * this.state[j]; } } for (let i = 0; i < this.stateSize; i++) { for (let j = 0; j < this.measurementSize; j++) { this.state[i] += K[i * this.measurementSize + j] * innovation[j]; } } // Update covariance const newCovariance = new Array(this.stateSize * this.stateSize).fill(0); for (let i = 0; i < this.stateSize; i++) { for (let j = 0; j < this.stateSize; j++) { newCovariance[i * this.stateSize + j] = this.covariance[i * this.stateSize + j]; for (let k = 0; k < this.measurementSize; k++) { newCovariance[i * this.stateSize + j] -= K[i * this.measurementSize + k] * H[k * this.stateSize + j] * this.covariance[i * this.stateSize + j]; } } } this.covariance = newCovariance; } getState() { return this.state.slice(0, this.measurementSize); } } let model; let video = document.getElementById("video"); let canvas = document.getElementById("canvas"); let ctx = canvas.getContext("2d"); const CONF_THRESHOLD = 0.75; const IOU_THRESHOLD = 0.1; let isProcessing = false; let previousDetections = []; // Initialize Kalman filters let bboxFilter = new KalmanFilter(8, 4, 0.005, 0.2); // State: [x, y, w, h, vx, vy, vw, vh] let keypointFilter = new KalmanFilter(4, 2, 0.005, 0.2); // State: [x, y, vx, vy] let lastFrameTime = performance.now(); // Model input size constants const MODEL_WIDTH = 640; const MODEL_HEIGHT = 640; const SCALE_FACTOR = 1.8; async function loadModel() { try { document.getElementById("status").innerText = "Loading model..."; model = await tf.loadGraphModel('http://localhost:8000/model.json'); document.getElementById("status").innerText = "Model loaded!"; console.log("Model loaded successfully"); } catch (error) { console.error("Error loading model:", error); document.getElementById("status").innerText = "Error loading model!"; } } async function startWebcam() { if (!model) { alert("Please load the model first!"); return; } try { const stream = await navigator.mediaDevices.getUserMedia({ video: { width: { ideal: 640 }, height: { ideal: 480 }, facingMode: 'user' } }); video.srcObject = stream; video.onloadedmetadata = () => { video.play(); processVideoFrame(); }; } catch (err) { console.error("Error accessing webcam:", err); document.getElementById("status").innerText = "Error accessing webcam!"; } } async function processVideoFrame() { if (!model || !video.videoWidth || isProcessing) return; try { isProcessing = true; const offscreenCanvas = document.createElement('canvas'); offscreenCanvas.width = MODEL_WIDTH; offscreenCanvas.height = MODEL_HEIGHT; const offscreenCtx = offscreenCanvas.getContext('2d'); const scale = Math.min(MODEL_WIDTH / video.videoWidth, MODEL_HEIGHT / video.videoHeight); const scaledWidth = video.videoWidth * scale; const scaledHeight = video.videoHeight * scale; const offsetX = (MODEL_WIDTH - scaledWidth) / 2; const offsetY = (MODEL_HEIGHT - scaledHeight) / 2; offscreenCtx.fillStyle = 'black'; offscreenCtx.fillRect(0, 0, MODEL_WIDTH, MODEL_HEIGHT); offscreenCtx.drawImage(video, offsetX, offsetY, scaledWidth, scaledHeight); const imgTensor = tf.tidy(() => { return tf.browser.fromPixels(offscreenCanvas) .expandDims(0) .toFloat() .div(255.0); }); const predictions = await model.predict(imgTensor); imgTensor.dispose(); const processedDetections = await processDetections(predictions, { offsetX, offsetY, scale, originalWidth: video.videoWidth, originalHeight: video.videoHeight }); const smoothedDetections = smoothDetections(processedDetections); drawDetections(smoothedDetections); previousDetections = smoothedDetections; if (Array.isArray(predictions)) { predictions.forEach(p => p.dispose()); } else { predictions.dispose(); } } catch (error) { console.error("Error in processing frame:", error); } finally { isProcessing = false; requestAnimationFrame(processVideoFrame); } } async function processDetections(predictionTensor, transformInfo) { const predictions = await predictionTensor.array(); if (!predictions.length || !predictions[0].length) { return []; } let detections = []; const numDetections = predictions[0][0].length; for (let i = 0; i < numDetections; i++) { const confidence = predictions[0][4][i]; if (confidence > CONF_THRESHOLD) { let x = (predictions[0][0][i] - transformInfo.offsetX) / transformInfo.scale; let y = (predictions[0][1][i] - transformInfo.offsetY) / transformInfo.scale; let width = (predictions[0][2][i] / transformInfo.scale) * SCALE_FACTOR; let height = (predictions[0][3][i] / transformInfo.scale) * SCALE_FACTOR; let kp_x = (predictions[0][5][i] - transformInfo.offsetX) / transformInfo.scale; let kp_y = (predictions[0][6][i] - transformInfo.offsetY) / transformInfo.scale; x = x / transformInfo.originalWidth; y = y / transformInfo.originalHeight; width = width / transformInfo.originalWidth; height = height / transformInfo.originalHeight; kp_x = kp_x / transformInfo.originalWidth; kp_y = kp_y / transformInfo.originalHeight; x = Math.max(0, Math.min(1, x)); y = Math.max(0, Math.min(1, y)); kp_x = Math.max(0, Math.min(1, kp_x)); kp_y = Math.max(0, Math.min(1, kp_y)); detections.push({ bbox: [x, y, width, height], confidence, keypoint: [kp_x, kp_y] }); } } return applyNMS(detections); } function smoothDetections(currentDetections) { const currentTime = performance.now(); const dt = (currentTime - lastFrameTime) / 1000; // Convert to seconds lastFrameTime = currentTime; return currentDetections.map(detection => { // Predict next state bboxFilter.predict(dt); keypointFilter.predict(dt); // Update with new measurements const [x, y, width, height] = detection.bbox; bboxFilter.update([x, y, width, height]); const [kpX, kpY] = detection.keypoint; keypointFilter.update([kpX, kpY]); // Get filtered states const filteredBbox = bboxFilter.getState(); const filteredKeypoint = keypointFilter.getState(); return { bbox: filteredBbox, confidence: detection.confidence, keypoint: filteredKeypoint }; }); } function calculateIoU(box1, box2) { const [x1, y1, w1, h1] = box1; const [x2, y2, w2, h2] = box2; const x1min = x1 - w1/2; const x1max = x1 + w1/2; const y1min = y1 - h1/2; const y1max = y1 + h1/2; const x2min = x2 - w2/2; const x2max = x2 + w2/2; const y2min = y2 - h2/2; const y2max = y2 + h2/2; const xOverlap = Math.max(0, Math.min(x1max, x2max) - Math.max(x1min, x2min)); const yOverlap = Math.max(0, Math.min(y1max, y2max) - Math.max(y1min, y2min)); const intersectionArea = xOverlap * yOverlap; const union = w1 * h1 + w2 * h2 - intersectionArea; return intersectionArea / union; } async function applyNMS(detections) { detections.sort((a, b) => b.confidence - a.confidence); const selected = []; const active = new Set(Array(detections.length).keys()); for (let i = 0; i < detections.length; i++) { if (!active.has(i)) continue; selected.push(detections[i]); for (let j = i + 1; j < detections.length; j++) { if (!active.has(j)) continue; const iou = calculateIoU(detections[i].bbox, detections[j].bbox); if (iou >= IOU_THRESHOLD) active.delete(j); } } return selected; } function drawDetections(detections) { ctx.clearRect(0, 0, canvas.width, canvas.height); ctx.drawImage(video, 0, 0, canvas.width, canvas.height); detections.forEach(detection => { const [x, y, width, height] = detection.bbox; const [keypointX, keypointY] = detection.keypoint; // Convert normalized coordinates to pixel values const boxX = (x - width/2) * canvas.width; const boxY = (y - height/2) * canvas.height; const boxWidth = width * canvas.width; const boxHeight = height * canvas.height; // Draw bounding box ctx.strokeStyle = 'red'; ctx.lineWidth = 2; ctx.strokeRect(boxX, boxY, boxWidth, boxHeight); // Draw keypoint const kpX = keypointX * canvas.width; const kpY = keypointY * canvas.height; ctx.fillStyle = 'blue'; ctx.beginPath(); ctx.arc(kpX, kpY, 5, 0, 2 * Math.PI); ctx.fill(); // Draw confidence score ctx.fillStyle = 'red'; ctx.font = '14px Arial'; ctx.fillText(`Conf: ${detection.confidence.toFixed(2)}`, boxX, boxY - 5); }); } window.loadModel = loadModel; window.startWebcam = startWebcam; [/code] Etwas, das ich ausprobiert habe, war die Einstellung von Brockenkasten, die IOU- und Vertrauensschwellen einstellen.