Příklady

Zde je jednoduchý úryvek kódu pro zahrnutí knihoven a načtení modelu.

#include <iostream>
#include <opencv2/opencv.hpp>
#include <torch/script.h>
#include <torch/torch.h>

int main()
{
    std::string modelPath = "model.torchscript";
    std::cout << "Trying to load model from: " << modelPath << std::endl;
    torch::jit::script::Module model = torch::jit::load(modelPath);
}
#include <iostream>
#include <opencv2/opencv.hpp>
#include <torch/script.h>
#include <torch/torch.h>

int main()
{
    try {
        std::string modelPath = "model.torchscript";
        std::string imagePath = "output.png";
        std::cout << "Trying to load model from: " << modelPath << std::endl;
        torch::jit::script::Module model = torch::jit::load(modelPath);
        model.eval();

        cv::Mat img = cv::imread(imagePath);
        if (img.empty()) {
            std::cerr << "Failed to load image" << std::endl;
            return 0;
        }

        cv::resize(img, img, cv::Size(640, 640));
        cv::cvtColor(img, img, cv::COLOR_BGR2RGB);
        img.convertTo(img, CV_32F, 1.0/255.0);

        torch::Tensor input = torch::from_blob(img.data, {1,640,640,3}, torch::kFloat32);
        input = input.permute({0,3,1,2}).contiguous();

        std::vector<torch::jit::IValue> inputs = { input };
        at::Tensor output = model.forward(inputs).toTensor();

        if (output.sizes() == torch::IntArrayRef({1, 28, 8400})) {
            torch::Tensor detections = output.squeeze(0).transpose(0,1).contiguous();
            float conf_threshold = 0.1;

            for (int i = 0; i < detections.size(0); ++i) {
                auto row = detections[i];
                float x = row[0].item<float>();
                float y = row[1].item<float>();
                float w = row[2].item<float>();
                float h = row[3].item<float>();

                int numClasses = detections.size(1) - 4;
                torch::Tensor class_logits = row.slice(0, 4, 4 + numClasses);
                auto max_result = class_logits.max(0, /*keepdim=*/false);

                torch::Tensor max_values = std::get<0>(max_result);
                torch::Tensor max_indices = std::get<1>(max_result);

                float confidence = max_values.item<float>();
                int class_id = max_indices.item<int>();

                if (confidence < conf_threshold) continue;

                float x1 = x - w/2.f, y1 = y - h/2.f, x2 = x + w/2.f, y2 = y + h/2.f;
                std::cout << "Detection: ["<<x1<<","<<y1<<","<<x2<<","<<y2
                          << "] confidence="<<confidence<<" class="<<class_id<< std::endl;
            }
        } else {
            std::cout << "Unexpected output shape: " << output.sizes() << std::endl;
        }
    } catch (const c10::Error& e) {
        std::cerr << "TorchScript exception: " << e.what() << std::endl;
    }
    return 0;
}
  • Musíte nastavit vstupní velikost obrázku podle modelu. Můj model byl trénován na 640×640.
    cv::resize(img, img, cv::Size(640, 640));
  • Zde kontrolujeme tvar výstupu.
    if (output.sizes() == torch::IntArrayRef({ 1, 28, 8400 })) {
    • [0–3] jsou souřadnice detekovaného boxu: x, y, šířka, výška.
    • [4–N] jsou pravděpodobnosti tříd — jak si je model jistý, která třída je v boxu.
  • Zde vybíráme nejpravděpodobnější třídu.
    int numClasses = detections.size(1) - 4;
    torch::Tensor class_logits = row.slice(0, 4, 4 + numClasses);
    auto max_result = class_logits.max(0, /*keepdim=*/false);
    
    torch::Tensor max_values = std::get<0>(max_result);
    torch::Tensor max_indices = std::get<1>(max_result);
    
    float confidence = max_values.item<float>();
    int class_id = max_indices.item<int>();
  • Filtrovat detekce s nízkou jistotou.
    if (confidence < conf_threshold) continue;