Programmiererforum

Posted: **15 May 2025, 01:40**

Ich habe ein Torchvisionsmodell, das ohne Klassifizierungskopf als Ähnlichkeitssuche verwendet wird. Ich habe es als Torchvision -Modell gespeichert. Wenn ich es innerhalb von Python mit der Ähnlichkeitssuche verwende, wird das richtige Ergebnis erzielt, aber innerhalb von Kotlin habe ich es nicht überprüft, die gleichen Bilder zu verwenden, und es erhält nicht die gleichen Ausgänge. Ich vermute

Code: Select all

# Model and transform setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
image_transform = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
])

def load_trained_model():
model = torch.jit.load(MODEL_FILE, map_location=device)
model.eval()
return model

def extract_features(pil_img, model):
with torch.no_grad():
tensor = image_transform(pil_img).unsqueeze(0).to(device)
features = model(tensor)
if len(features.shape) > 2:
features = features.view(features.size(0), -1)
return features.cpu().numpy().astype(np.float32)
enter code here

@app.post("/extract_features")
async def extract_image_features(image: UploadFile = File(...)):
try:
image_bytes = await image.read()
with Image.open(BytesIO(image_bytes)) as img:
processed_img = img.convert("RGB")

raw_features = extract_features(processed_img, model)
< /code>
Kotlin: < /p>
fun preprocessImage(bitmap: Bitmap): Tensor {
val rgbBitmap = if (bitmap.config != Bitmap.Config.ARGB_8888) {
bitmap.copy(Bitmap.Config.ARGB_8888, true)
} else {
bitmap
}

val resizedBitmap = resizeWithAspectRatio(rgbBitmap, 256)

val croppedBitmap = centerCrop(resizedBitmap, 224, 224)

val mean = floatArrayOf(0.485f, 0.456f, 0.406f)
val std = floatArrayOf(0.229f, 0.224f, 0.225f)
return TensorImageUtils.bitmapToFloat32Tensor(croppedBitmap, mean, std)
}

private fun resizeWithAspectRatio(bitmap: Bitmap, targetSize: Int): Bitmap {
val width = bitmap.width
val height = bitmap.height

val scale = if (width < height) {
targetSize.toFloat() / width.toFloat()
} else {
targetSize.toFloat() / height.toFloat()
}

val newWidth = (width * scale).roundToInt()
val newHeight = (height * scale).roundToInt()

// Use createScaledBitmap with bilinear filtering (matches PyTorch default)
return bitmap.scale(256, 256)
}

private fun centerCrop(bitmap: Bitmap, targetWidth: Int, targetHeight: Int): Bitmap {
val width = bitmap.width
val height = bitmap.height

val startX = (width - targetWidth) / 2
val startY = (height - targetHeight) / 2

val validStartX = Math.max(0, startX)
val validStartY = Math.max(0, startY)

val validTargetWidth = Math.min(targetWidth, width - validStartX)
val validTargetHeight = Math.min(targetHeight, height - validStartY)

return Bitmap.createBitmap(bitmap, validStartX, validStartY, validTargetWidth, validTargetHeight)
}

fun extractFeatures(bitmap: Bitmap): FloatArray {
if (model == null) {
throw IllegalStateException("Model not loaded. Call loadModel() first.")
}

val inputTensor =
preprocessImage(bitmap)

val output = model!!.forward(IValue.from(inputTensor))
val outputTensor = output.toTensor()

val features = outputTensor.dataAsFloatArray

return features
}
< /code>
Erste 5 Ausgänge aus Kotlin: < /p>
[0.7993497, 0.30109355, 0.32214138, 0.47712356, 0.5185487]
< /code>
Python: < /p>
[ 1.2595854  -0.07939269 -0.3717999   0.22528967  0.12919804]

Warum sind die Ausgänge so unterschiedlich?

Programmiererforum

Pytorch - Torchvision -Modell, das in Python und Kotlin unterschiedliche Ergebnisse erzielt

Pytorch - Torchvision -Modell, das in Python und Kotlin unterschiedliche Ergebnisse erzielt