栏目分类:
子分类:
返回
名师互学网用户登录
快速导航关闭
当前搜索
当前分类
子分类
实用工具
热门搜索
名师互学网 > IT > 软件开发 > 后端开发 > Python

openvino opencl 输入实现

Python 更新时间: 发布时间: IT归档 最新发布 模块sitemap 名妆网 法律咨询 聚返吧 英语巴士网 伯小乐 网商动力

openvino opencl 输入实现

// reference from https://docs.openvinotoolkit.org/latest/openvino_docs_IE_DG_supported_plugins_GPU_RemoteBlob_API.html

#pragma once


#include
#include
#include
#include
#include
#include
#include

#include

#include
#include
#include
#include
#include "classification_results.h"

#include
#include
#include
#include
#include

#include
#include
//
#include "CL/cl.h"
#include "CL/cl2.hpp"

using namespace std;
using namespace InferenceEngine;
using namespace std::chrono;

cv::Mat jpg;

static void loadjpg(const char * jpgname, int width, int height)
{
    //loadimage(&jpg, jpgname);//
    jpg = cv::imread(jpgname);
    cout << "load image: " << jpgname << " resize: w=" << width << " h=" << height << endl;
    //resize to 640*480
    cv::resize(jpg, jpg, cv::Size(width, height), 0, 0, cv::INTER_CUBIC);
}

int main(int argc, char *argv[]) {
    try {

        string FLAGS_d = "GPU";
        string FLAGS_m = "C:\Users\jgu23\documents\Intel\OpenVINO\openvino_models\ir\public\squeezenet1.1\FP16\squeezenet1.1.xml";
        string labelFileName = "C:\Users\jgu23\documents\Intel\OpenVINO\openvino_models\ir\public\squeezenet1.1\FP16\squeezenet1.1.labels";
        string FLAGS_i = "C:\Program Files (x86)\IntelSWTools\openvino\deployment_tools\demo\car.png";
        int FLAGS_nt = 10;

        cout << "starting" << endl;
        const Version *IEversion;
        IEversion = GetInferenceEngineVersion();
        cout << "InferenceEngine: API version " << IEversion->apiVersion.major << "." << IEversion->apiVersion.minor << endl;
        cout << "InferenceEngine: Build : " << IEversion->buildNumber << endl << endl;

        // --------------------------- 1. Load inference engine -------------------------------------
        cout << "Creating Inference Engine" << endl;

        Core ie;
        // -----------------------------------------------------------------------------------------------------

                // --------------------------- 2. Read IR Generated by ModelOptimizer (.xml and .bin files) ------------
        cout << "Loading network files" << endl;

        
        CNNNetwork network = ie.ReadNetwork(FLAGS_m);
        cout << "network layer count: " << network.layerCount() << endl;
        // -----------------------------------------------------------------------------------------------------

                // --------------------------- 3. Configure input & output ---------------------------------------------

            // --------------------------- Prepare input blobs -----------------------------------------------------
        cout << "Preparing input blobs" << endl;

        
        InputsDataMap inputInfo(network.getInputsInfo());
        if (inputInfo.size() != 1) throw std::logic_error("Sample supports topologies with 1 input only");

        auto inputInfoItem = *inputInfo.begin();

        
        inputInfoItem.second->setPrecision(Precision::U8);
        inputInfoItem.second->setLayout(Layout::NCHW);

        //cout << FLAGS_i << endl;
        loadjpg(FLAGS_i.c_str(), inputInfoItem.second->getTensorDesc().getDims()[3],
            inputInfoItem.second->getTensorDesc().getDims()[2]);

        if (jpg.data == NULL)
        {
            cout << "Valid input images were not found!" << endl;
        }

        
        network.setBatchSize(1);
        size_t batchSize = network.getBatchSize();
        cout << "Batch size is " << std::to_string(batchSize) << endl;


        // --------------------------- 4. Loading model to the device ------------------------------------------
        cout << "Loading model to the device: " << FLAGS_d << endl;
        ExecutableNetwork executable_network = ie.LoadNetwork(network, FLAGS_d);
        // -----------------------------------------------------------------------------------------------------

        // --------------------------- 5. Create infer request -------------------------------------------------
        cout << "Create infer request" << endl;
        InferRequest inferRequest_regular = executable_network.CreateInferRequest();
        // -----------------------------------------------------------------------------------------------------

        // --------------------------- 6. Prepare input --------------------------------------------------------
        for (auto & item : inputInfo) {
            Blob::Ptr inputBlob = inferRequest_regular.GetBlob(item.first);
            SizeVector dims = inputBlob->getTensorDesc().getDims();
            
            size_t num_channels = dims[1];
            size_t image_size = dims[3] * dims[2];

            MemoryBlob::Ptr minput = as(inputBlob);
            if (!minput) {
                cout << "We expect MemoryBlob from inferRequest_regular, but by fact we were not able to cast inputBlob to MemoryBlob" << endl;
                return 1;
            }
            // locked memory holder should be alive all time while access to its buffer happens
            auto minputHolder = minput->wmap();

            auto data = minputHolder.as::value_type *>();
            unsigned char* pixels = (unsigned char*)(jpg.data);

            cout << "image_size = " << image_size << endl;
            
            for (size_t pid = 0; pid < image_size; pid++) {
                
                for (size_t ch = 0; ch < num_channels; ++ch) {
                    
                    data[ch * image_size + pid] = pixels[pid*num_channels + ch];
                }
            }
        }


        // --------------------------- 7. Do inference ---------------------------------------------------------
#if 0
        //for async inference
        size_t numIterations = 10;
        size_t curIteration = 0;
        std::condition_variable condVar;

        inferRequest_regular.SetCompletionCallback(
            [&] {
            curIteration++;
            cout << "Completed " << curIteration << " async request execution" << endl;
            if (curIteration < numIterations) {
                
                inferRequest_regular.StartAsync();
            }
            else {
                
                condVar.notify_one();
            }
        });

        
        cout << "Start inference (" << numIterations << " asynchronous executions)" << endl;
        inferRequest_regular.StartAsync();

        
        std::mutex mutex;
        std::unique_lock lock(mutex);
        condVar.wait(lock, [&] { return curIteration == numIterations; });
#else
        
        cout << "Start inference " << endl;
        inferRequest_regular.Infer();
#endif

        // -----------------------------------------------------------------------------------------------------

        // --------------------------- 8. Process output -------------------------------------------------------
        cout << "Processing output blobs" << endl;
        OutputsDataMap outputInfo(network.getOutputsInfo());
        if (outputInfo.size() != 1) throw std::logic_error("Sample supports topologies with 1 output only");
        Blob::Ptr outputBlob_regular = inferRequest_regular.GetBlob(outputInfo.begin()->first);

        
        const size_t resultsCnt = outputBlob_regular->size() / batchSize;
        if (FLAGS_nt > resultsCnt || FLAGS_nt < 1) {
            cout << "-nt " << FLAGS_nt << " is not available for this network (-nt should be less than "
                << resultsCnt + 1 << " and more than 0)n            will be used maximal value : " << resultsCnt << endl;
            FLAGS_nt = resultsCnt;
        }

        
        //std::string labelFileName = fileNameNoExt(FLAGS_m) + ".labels";
        std::vector labels;

        std::ifstream inputFile;
        inputFile.open(labelFileName, std::ios::in);
        if (inputFile.is_open()) {
            std::string strLine;
            while (std::getline(inputFile, strLine)) {
                //trim(strLine);
                labels.push_back(strLine);
            }
        }

        std::vector validImageNames = { "car.png" };
        ClassificationResult classificationResult(outputBlob_regular, validImageNames,
            batchSize, FLAGS_nt,
            labels);
        classificationResult.print();


        // inference using remote blob
        auto inf_req_shared = executable_network.CreateInferRequest();

        // obtain the RemoteContext pointer from the executable network object
        auto cldnn_context = executable_network.GetContext();
        cl_context ctx = std::dynamic_pointer_cast(cldnn_context)->get();

        cl::Context _context;
        cl::Device _device;
        cl::CommandQueue _queue;
        // user-supplied context handle
        _context = cl::Context(ctx, true);
        _device = cl::Device(_context.getInfo()[0].get(), true);

        cl_command_queue_properties props = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
        _queue = cl::CommandQueue(_context, _device, props);

        auto dims = network.getInputsInfo().begin()->second->getTensorDesc().getDims();
        size_t imSize = dims[1] * dims[2] * dims[3];
        cout << "imSize = " << imSize << " dims[1]=" << dims[1] << " dims[2]=" << dims[2] << " dims[3]=" << dims[3] << endl << endl;

        size_t num_channels = dims[1];
        size_t image_size = dims[3] * dims[2];

        //prepare input image data
        
        unsigned char *ImageBuffer;
        ImageBuffer = (unsigned char *)malloc(imSize);
        unsigned char* pixels = (unsigned char*)(jpg.data);
        for (size_t pid = 0; pid < image_size; pid++) {
            
            for (size_t ch = 0; ch < num_channels; ++ch) {
                
                ImageBuffer[ch * image_size + pid] = pixels[pid*num_channels + ch];
                //set input data to 0
                //ImageBuffer[ch * image_size + pid] = 0;
            }
        }

        cl_int err;
        cl::Buffer shared_buffer(_context, CL_MEM_READ_WRITE, imSize, NULL, &err);
        {
            void *buffer = ImageBuffer;
            _queue.enqueueWriteBuffer(shared_buffer, true, 0, imSize, buffer);
        }

        Blob::Ptr shared_blob = gpu::make_shared_blob(network.getInputsInfo().begin()->second->getTensorDesc(), cldnn_context,
            shared_buffer);

        inf_req_shared.SetBlob(network.getInputsInfo().begin()->first, shared_blob);

        inf_req_shared.Infer();
        auto outputBlob_shared = inf_req_shared.GetBlob(network.getOutputsInfo().begin()->first);

        free(ImageBuffer);

        cout << "Processing output shared blobs" << endl;
        ClassificationResult classificationResult_shared(outputBlob_shared, validImageNames,
            batchSize, FLAGS_nt,
            labels);
        classificationResult_shared.print();

    }
    catch (const std::exception& error) {
        cout << error.what() << endl;
        return 1;
    }
    catch (...) {
        cout << "Unknown/internal exception happened." << endl;
        return 1;
    }

    cout << "Execution successful" << endl;
    cout << endl << "This sample is an API example, for any performance measurements "
        "please use the dedicated benchmark_app tool" << endl;

    // -----------------------------------------------------------------------------------------------------
    return 0;
}
 

转载请注明:文章转载自 www.mshxw.com
本文地址:https://www.mshxw.com/it/741476.html
我们一直用心在做
关于我们 文章归档 网站地图 联系我们

版权所有 (c)2021-2022 MSHXW.COM

ICP备案号:晋ICP备2021003244-6号