LCOV - Code coverage - app/test-mldev/test_inference

LCOV - code coverage report

Current view:	top level - app/test-mldev - test_inference_common.c (source / functions)		Hit	Total	Coverage
Test:	Code coverage	Lines:	0	548	0.0 %
Date:	2025-01-02 22:41:34	Functions:	0	20	0.0 %
Legend:	Lines: hit not hit \| Branches: + taken - not taken # not executed	Branches:	0	0	-

           Branch data     Line data    Source code

       1                 :            : /* SPDX-License-Identifier: BSD-3-Clause
       2                 :            :  * Copyright (c) 2022 Marvell.
       3                 :            :  */
       4                 :            : 
       5                 :            : #include <errno.h>
       6                 :            : #include <math.h>
       7                 :            : #include <stdio.h>
       8                 :            : #include <unistd.h>
       9                 :            : 
      10                 :            : #include <rte_common.h>
      11                 :            : #include <rte_cycles.h>
      12                 :            : #include <rte_hash_crc.h>
      13                 :            : #include <rte_launch.h>
      14                 :            : #include <rte_lcore.h>
      15                 :            : #include <rte_malloc.h>
      16                 :            : #include <rte_memzone.h>
      17                 :            : #include <rte_mldev.h>
      18                 :            : 
      19                 :            : #include "ml_common.h"
      20                 :            : #include "test_inference_common.h"
      21                 :            : 
      22                 :            : #define ML_OPEN_WRITE_GET_ERR(name, buffer, size, err) \
      23                 :            :         do { \
      24                 :            :                 FILE *fp = fopen(name, "w+"); \
      25                 :            :                 if (fp == NULL) { \
      26                 :            :                         ml_err("Unable to create file: %s, error: %s", name, strerror(errno)); \
      27                 :            :                         err = true; \
      28                 :            :                 } else { \
      29                 :            :                         if (fwrite(buffer, 1, size, fp) != size) { \
      30                 :            :                                 ml_err("Error writing output, file: %s, error: %s", name, \
      31                 :            :                                        strerror(errno)); \
      32                 :            :                                 err = true; \
      33                 :            :                         } \
      34                 :            :                         fclose(fp); \
      35                 :            :                 } \
      36                 :            :         } while (0)
      37                 :            : 
      38                 :            : /* Enqueue inference requests with burst size equal to 1 */
      39                 :            : static int
      40                 :          0 : ml_enqueue_single(void *arg)
      41                 :            : {
      42                 :            :         struct test_inference *t = ml_test_priv((struct ml_test *)arg);
      43                 :          0 :         struct ml_request *req = NULL;
      44                 :          0 :         struct rte_ml_op *op = NULL;
      45                 :            :         struct ml_core_args *args;
      46                 :            :         uint64_t model_enq = 0;
      47                 :            :         uint64_t start_cycle;
      48                 :            :         uint32_t burst_enq;
      49                 :            :         uint32_t lcore_id;
      50                 :            :         uint64_t offset;
      51                 :            :         uint64_t bufsz;
      52                 :            :         uint16_t fid;
      53                 :            :         uint32_t i;
      54                 :            :         int ret;
      55                 :            : 
      56                 :            :         lcore_id = rte_lcore_id();
      57                 :            :         args = &t->args[lcore_id];
      58                 :          0 :         args->start_cycles = 0;
      59                 :            :         model_enq = 0;
      60                 :            : 
      61                 :          0 :         if (args->nb_reqs == 0)
      62                 :            :                 return 0;
      63                 :            : 
      64                 :          0 : next_rep:
      65                 :          0 :         fid = args->start_fid;
      66                 :            : 
      67                 :            : next_model:
      68                 :          0 :         ret = rte_mempool_get(t->op_pool, (void **)&op);
      69                 :          0 :         if (ret != 0)
      70                 :          0 :                 goto next_model;
      71                 :            : 
      72                 :          0 : retry_req:
      73                 :          0 :         ret = rte_mempool_get(t->model[fid].io_pool, (void **)&req);
      74                 :          0 :         if (ret != 0)
      75                 :          0 :                 goto retry_req;
      76                 :            : 
      77                 :          0 : retry_inp_segs:
      78                 :          0 :         ret = rte_mempool_get_bulk(t->buf_seg_pool, (void **)req->inp_buf_segs,
      79                 :            :                                    t->model[fid].info.nb_inputs);
      80                 :          0 :         if (ret != 0)
      81                 :          0 :                 goto retry_inp_segs;
      82                 :            : 
      83                 :          0 : retry_out_segs:
      84                 :          0 :         ret = rte_mempool_get_bulk(t->buf_seg_pool, (void **)req->out_buf_segs,
      85                 :            :                                    t->model[fid].info.nb_outputs);
      86                 :          0 :         if (ret != 0)
      87                 :          0 :                 goto retry_out_segs;
      88                 :            : 
      89                 :          0 :         op->model_id = t->model[fid].id;
      90                 :          0 :         op->nb_batches = t->model[fid].info.min_batches;
      91                 :          0 :         op->mempool = t->op_pool;
      92                 :          0 :         op->input = req->inp_buf_segs;
      93                 :          0 :         op->output = req->out_buf_segs;
      94                 :          0 :         op->user_ptr = req;
      95                 :            : 
      96                 :          0 :         if (t->model[fid].info.io_layout == RTE_ML_IO_LAYOUT_PACKED) {
      97                 :          0 :                 op->input[0]->addr = req->input;
      98                 :          0 :                 op->input[0]->iova_addr = rte_mem_virt2iova(req->input);
      99                 :          0 :                 op->input[0]->length = t->model[fid].inp_qsize;
     100                 :          0 :                 op->input[0]->next = NULL;
     101                 :            : 
     102                 :          0 :                 op->output[0]->addr = req->output;
     103                 :          0 :                 op->output[0]->iova_addr = rte_mem_virt2iova(req->output);
     104                 :          0 :                 op->output[0]->length = t->model[fid].out_qsize;
     105                 :          0 :                 op->output[0]->next = NULL;
     106                 :            :         } else {
     107                 :            :                 offset = 0;
     108                 :          0 :                 for (i = 0; i < t->model[fid].info.nb_inputs; i++) {
     109                 :          0 :                         bufsz = RTE_ALIGN_CEIL(t->model[fid].info.input_info[i].size,
     110                 :            :                                                t->cmn.dev_info.align_size);
     111                 :          0 :                         op->input[i]->addr = req->input + offset;
     112                 :          0 :                         op->input[i]->iova_addr = rte_mem_virt2iova(req->input + offset);
     113                 :          0 :                         op->input[i]->length = bufsz;
     114                 :          0 :                         op->input[i]->next = NULL;
     115                 :          0 :                         offset += bufsz;
     116                 :            :                 }
     117                 :            : 
     118                 :            :                 offset = 0;
     119                 :          0 :                 for (i = 0; i < t->model[fid].info.nb_outputs; i++) {
     120                 :          0 :                         bufsz = RTE_ALIGN_CEIL(t->model[fid].info.output_info[i].size,
     121                 :            :                                                t->cmn.dev_info.align_size);
     122                 :          0 :                         op->output[i]->addr = req->output + offset;
     123                 :          0 :                         op->output[i]->iova_addr = rte_mem_virt2iova(req->output + offset);
     124                 :          0 :                         op->output[i]->length = bufsz;
     125                 :          0 :                         op->output[i]->next = NULL;
     126                 :          0 :                         offset += bufsz;
     127                 :            :                 }
     128                 :            :         }
     129                 :            : 
     130                 :          0 :         req->niters++;
     131                 :          0 :         req->fid = fid;
     132                 :            : 
     133                 :          0 : enqueue_req:
     134                 :            :         start_cycle = rte_get_tsc_cycles();
     135                 :          0 :         burst_enq = rte_ml_enqueue_burst(t->cmn.opt->dev_id, args->qp_id, &op, 1);
     136                 :          0 :         if (burst_enq == 0)
     137                 :          0 :                 goto enqueue_req;
     138                 :            : 
     139                 :          0 :         args->start_cycles += start_cycle;
     140                 :          0 :         fid++;
     141                 :          0 :         if (likely(fid <= args->end_fid))
     142                 :          0 :                 goto next_model;
     143                 :            : 
     144                 :          0 :         model_enq++;
     145                 :          0 :         if (likely(model_enq < args->nb_reqs))
     146                 :          0 :                 goto next_rep;
     147                 :            : 
     148                 :            :         return 0;
     149                 :            : }
     150                 :            : 
     151                 :            : /* Dequeue inference requests with burst size equal to 1 */
     152                 :            : static int
     153                 :          0 : ml_dequeue_single(void *arg)
     154                 :            : {
     155                 :            :         struct test_inference *t = ml_test_priv((struct ml_test *)arg);
     156                 :            :         struct rte_ml_op_error error;
     157                 :          0 :         struct rte_ml_op *op = NULL;
     158                 :            :         struct ml_core_args *args;
     159                 :            :         struct ml_request *req;
     160                 :            :         uint64_t total_deq = 0;
     161                 :            :         uint8_t nb_filelist;
     162                 :            :         uint32_t burst_deq;
     163                 :            :         uint64_t end_cycle;
     164                 :            :         uint32_t lcore_id;
     165                 :            : 
     166                 :            :         lcore_id = rte_lcore_id();
     167                 :            :         args = &t->args[lcore_id];
     168                 :          0 :         args->end_cycles = 0;
     169                 :          0 :         nb_filelist = args->end_fid - args->start_fid + 1;
     170                 :            : 
     171                 :          0 :         if (args->nb_reqs == 0)
     172                 :            :                 return 0;
     173                 :            : 
     174                 :          0 : dequeue_req:
     175                 :          0 :         burst_deq = rte_ml_dequeue_burst(t->cmn.opt->dev_id, args->qp_id, &op, 1);
     176                 :            :         end_cycle = rte_get_tsc_cycles();
     177                 :            : 
     178                 :          0 :         if (likely(burst_deq == 1)) {
     179                 :          0 :                 total_deq += burst_deq;
     180                 :          0 :                 args->end_cycles += end_cycle;
     181                 :          0 :                 if (unlikely(op->status == RTE_ML_OP_STATUS_ERROR)) {
     182                 :          0 :                         rte_ml_op_error_get(t->cmn.opt->dev_id, op, &error);
     183                 :          0 :                         ml_err("error_code = 0x%" PRIx64 ", error_message = %s\n", error.errcode,
     184                 :            :                                error.message);
     185                 :          0 :                         t->error_count[lcore_id]++;
     186                 :            :                 }
     187                 :          0 :                 req = (struct ml_request *)op->user_ptr;
     188                 :          0 :                 rte_mempool_put(t->model[req->fid].io_pool, req);
     189                 :          0 :                 rte_mempool_put_bulk(t->buf_seg_pool, (void **)op->input,
     190                 :          0 :                                      t->model[req->fid].info.nb_inputs);
     191                 :          0 :                 rte_mempool_put_bulk(t->buf_seg_pool, (void **)op->output,
     192                 :          0 :                                      t->model[req->fid].info.nb_outputs);
     193                 :          0 :                 rte_mempool_put(t->op_pool, op);
     194                 :            :         }
     195                 :            : 
     196                 :          0 :         if (likely(total_deq < args->nb_reqs * nb_filelist))
     197                 :          0 :                 goto dequeue_req;
     198                 :            : 
     199                 :            :         return 0;
     200                 :            : }
     201                 :            : 
     202                 :            : /* Enqueue inference requests with burst size greater than 1 */
     203                 :            : static int
     204                 :          0 : ml_enqueue_burst(void *arg)
     205                 :            : {
     206                 :            :         struct test_inference *t = ml_test_priv((struct ml_test *)arg);
     207                 :            :         struct ml_core_args *args;
     208                 :            :         uint64_t start_cycle;
     209                 :            :         uint16_t ops_count;
     210                 :            :         uint64_t model_enq;
     211                 :            :         uint16_t burst_enq;
     212                 :            :         uint32_t lcore_id;
     213                 :            :         uint16_t pending;
     214                 :            :         uint64_t offset;
     215                 :            :         uint64_t bufsz;
     216                 :            :         uint16_t idx;
     217                 :            :         uint16_t fid;
     218                 :            :         uint16_t i;
     219                 :            :         uint16_t j;
     220                 :            :         int ret;
     221                 :            : 
     222                 :            :         lcore_id = rte_lcore_id();
     223                 :            :         args = &t->args[lcore_id];
     224                 :          0 :         args->start_cycles = 0;
     225                 :            :         model_enq = 0;
     226                 :            : 
     227                 :          0 :         if (args->nb_reqs == 0)
     228                 :            :                 return 0;
     229                 :            : 
     230                 :          0 : next_rep:
     231                 :          0 :         fid = args->start_fid;
     232                 :            : 
     233                 :            : next_model:
     234                 :          0 :         ops_count = RTE_MIN(t->cmn.opt->burst_size, args->nb_reqs - model_enq);
     235                 :          0 :         ret = rte_mempool_get_bulk(t->op_pool, (void **)args->enq_ops, ops_count);
     236                 :          0 :         if (ret != 0)
     237                 :          0 :                 goto next_model;
     238                 :            : 
     239                 :          0 : retry_reqs:
     240                 :          0 :         ret = rte_mempool_get_bulk(t->model[fid].io_pool, (void **)args->reqs, ops_count);
     241                 :          0 :         if (ret != 0)
     242                 :          0 :                 goto retry_reqs;
     243                 :            : 
     244                 :          0 :         for (i = 0; i < ops_count; i++) {
     245                 :          0 : retry_inp_segs:
     246                 :          0 :                 ret = rte_mempool_get_bulk(t->buf_seg_pool, (void **)args->reqs[i]->inp_buf_segs,
     247                 :            :                                            t->model[fid].info.nb_inputs);
     248                 :          0 :                 if (ret != 0)
     249                 :          0 :                         goto retry_inp_segs;
     250                 :            : 
     251                 :          0 : retry_out_segs:
     252                 :          0 :                 ret = rte_mempool_get_bulk(t->buf_seg_pool, (void **)args->reqs[i]->out_buf_segs,
     253                 :            :                                            t->model[fid].info.nb_outputs);
     254                 :          0 :                 if (ret != 0)
     255                 :          0 :                         goto retry_out_segs;
     256                 :            : 
     257                 :          0 :                 args->enq_ops[i]->model_id = t->model[fid].id;
     258                 :          0 :                 args->enq_ops[i]->nb_batches = t->model[fid].info.min_batches;
     259                 :          0 :                 args->enq_ops[i]->mempool = t->op_pool;
     260                 :          0 :                 args->enq_ops[i]->input = args->reqs[i]->inp_buf_segs;
     261                 :          0 :                 args->enq_ops[i]->output = args->reqs[i]->out_buf_segs;
     262                 :          0 :                 args->enq_ops[i]->user_ptr = args->reqs[i];
     263                 :            : 
     264                 :          0 :                 if (t->model[fid].info.io_layout == RTE_ML_IO_LAYOUT_PACKED) {
     265                 :          0 :                         args->enq_ops[i]->input[0]->addr = args->reqs[i]->input;
     266                 :          0 :                         args->enq_ops[i]->input[0]->iova_addr =
     267                 :          0 :                                 rte_mem_virt2iova(args->reqs[i]->input);
     268                 :          0 :                         args->enq_ops[i]->input[0]->length = t->model[fid].inp_qsize;
     269                 :          0 :                         args->enq_ops[i]->input[0]->next = NULL;
     270                 :            : 
     271                 :          0 :                         args->enq_ops[i]->output[0]->addr = args->reqs[i]->output;
     272                 :          0 :                         args->enq_ops[i]->output[0]->iova_addr =
     273                 :          0 :                                 rte_mem_virt2iova(args->reqs[i]->output);
     274                 :          0 :                         args->enq_ops[i]->output[0]->length = t->model[fid].out_qsize;
     275                 :          0 :                         args->enq_ops[i]->output[0]->next = NULL;
     276                 :            :                 } else {
     277                 :            :                         offset = 0;
     278                 :          0 :                         for (j = 0; j < t->model[fid].info.nb_inputs; j++) {
     279                 :          0 :                                 bufsz = RTE_ALIGN_CEIL(t->model[fid].info.input_info[i].size,
     280                 :            :                                                        t->cmn.dev_info.align_size);
     281                 :            : 
     282                 :          0 :                                 args->enq_ops[i]->input[j]->addr = args->reqs[i]->input + offset;
     283                 :          0 :                                 args->enq_ops[i]->input[j]->iova_addr =
     284                 :          0 :                                         rte_mem_virt2iova(args->reqs[i]->input + offset);
     285                 :          0 :                                 args->enq_ops[i]->input[j]->length = t->model[fid].inp_qsize;
     286                 :          0 :                                 args->enq_ops[i]->input[j]->next = NULL;
     287                 :          0 :                                 offset += bufsz;
     288                 :            :                         }
     289                 :            : 
     290                 :            :                         offset = 0;
     291                 :          0 :                         for (j = 0; j < t->model[fid].info.nb_outputs; j++) {
     292                 :          0 :                                 bufsz = RTE_ALIGN_CEIL(t->model[fid].info.output_info[i].size,
     293                 :            :                                                        t->cmn.dev_info.align_size);
     294                 :          0 :                                 args->enq_ops[i]->output[j]->addr = args->reqs[i]->output + offset;
     295                 :          0 :                                 args->enq_ops[i]->output[j]->iova_addr =
     296                 :          0 :                                         rte_mem_virt2iova(args->reqs[i]->output + offset);
     297                 :          0 :                                 args->enq_ops[i]->output[j]->length = t->model[fid].out_qsize;
     298                 :          0 :                                 args->enq_ops[i]->output[j]->next = NULL;
     299                 :          0 :                                 offset += bufsz;
     300                 :            :                         }
     301                 :            :                 }
     302                 :            : 
     303                 :          0 :                 args->reqs[i]->niters++;
     304                 :          0 :                 args->reqs[i]->fid = fid;
     305                 :            :         }
     306                 :            : 
     307                 :            :         idx = 0;
     308                 :            :         pending = ops_count;
     309                 :            : 
     310                 :          0 : enqueue_reqs:
     311                 :            :         start_cycle = rte_get_tsc_cycles();
     312                 :            :         burst_enq =
     313                 :          0 :                 rte_ml_enqueue_burst(t->cmn.opt->dev_id, args->qp_id, &args->enq_ops[idx], pending);
     314                 :          0 :         args->start_cycles += burst_enq * start_cycle;
     315                 :          0 :         pending = pending - burst_enq;
     316                 :            : 
     317                 :          0 :         if (pending > 0) {
     318                 :          0 :                 idx = idx + burst_enq;
     319                 :          0 :                 goto enqueue_reqs;
     320                 :            :         }
     321                 :            : 
     322                 :          0 :         fid++;
     323                 :          0 :         if (fid <= args->end_fid)
     324                 :          0 :                 goto next_model;
     325                 :            : 
     326                 :          0 :         model_enq = model_enq + ops_count;
     327                 :          0 :         if (model_enq < args->nb_reqs)
     328                 :          0 :                 goto next_rep;
     329                 :            : 
     330                 :            :         return 0;
     331                 :            : }
     332                 :            : 
     333                 :            : /* Dequeue inference requests with burst size greater than 1 */
     334                 :            : static int
     335                 :          0 : ml_dequeue_burst(void *arg)
     336                 :            : {
     337                 :            :         struct test_inference *t = ml_test_priv((struct ml_test *)arg);
     338                 :            :         struct rte_ml_op_error error;
     339                 :            :         struct ml_core_args *args;
     340                 :            :         struct ml_request *req;
     341                 :            :         uint64_t total_deq = 0;
     342                 :            :         uint16_t burst_deq = 0;
     343                 :            :         uint8_t nb_filelist;
     344                 :            :         uint64_t end_cycle;
     345                 :            :         uint32_t lcore_id;
     346                 :            :         uint32_t i;
     347                 :            : 
     348                 :            :         lcore_id = rte_lcore_id();
     349                 :            :         args = &t->args[lcore_id];
     350                 :          0 :         args->end_cycles = 0;
     351                 :          0 :         nb_filelist = args->end_fid - args->start_fid + 1;
     352                 :            : 
     353                 :          0 :         if (args->nb_reqs == 0)
     354                 :            :                 return 0;
     355                 :            : 
     356                 :          0 : dequeue_burst:
     357                 :          0 :         burst_deq = rte_ml_dequeue_burst(t->cmn.opt->dev_id, args->qp_id, args->deq_ops,
     358                 :          0 :                                          t->cmn.opt->burst_size);
     359                 :            :         end_cycle = rte_get_tsc_cycles();
     360                 :            : 
     361                 :          0 :         if (likely(burst_deq > 0)) {
     362                 :          0 :                 total_deq += burst_deq;
     363                 :          0 :                 args->end_cycles += burst_deq * end_cycle;
     364                 :            : 
     365                 :          0 :                 for (i = 0; i < burst_deq; i++) {
     366                 :          0 :                         if (unlikely(args->deq_ops[i]->status == RTE_ML_OP_STATUS_ERROR)) {
     367                 :          0 :                                 rte_ml_op_error_get(t->cmn.opt->dev_id, args->deq_ops[i], &error);
     368                 :          0 :                                 ml_err("error_code = 0x%" PRIx64 ", error_message = %s\n",
     369                 :            :                                        error.errcode, error.message);
     370                 :          0 :                                 t->error_count[lcore_id]++;
     371                 :            :                         }
     372                 :          0 :                         req = (struct ml_request *)args->deq_ops[i]->user_ptr;
     373                 :          0 :                         if (req != NULL) {
     374                 :          0 :                                 rte_mempool_put(t->model[req->fid].io_pool, req);
     375                 :          0 :                                 rte_mempool_put_bulk(t->buf_seg_pool,
     376                 :          0 :                                                      (void **)args->deq_ops[i]->input,
     377                 :          0 :                                                      t->model[req->fid].info.nb_inputs);
     378                 :          0 :                                 rte_mempool_put_bulk(t->buf_seg_pool,
     379                 :          0 :                                                      (void **)args->deq_ops[i]->output,
     380                 :          0 :                                                      t->model[req->fid].info.nb_outputs);
     381                 :            :                         }
     382                 :            :                 }
     383                 :          0 :                 rte_mempool_put_bulk(t->op_pool, (void *)args->deq_ops, burst_deq);
     384                 :            :         }
     385                 :            : 
     386                 :          0 :         if (total_deq < args->nb_reqs * nb_filelist)
     387                 :          0 :                 goto dequeue_burst;
     388                 :            : 
     389                 :            :         return 0;
     390                 :            : }
     391                 :            : 
     392                 :            : bool
     393                 :          0 : test_inference_cap_check(struct ml_options *opt)
     394                 :            : {
     395                 :            :         struct rte_ml_dev_info dev_info;
     396                 :            : 
     397                 :          0 :         if (!ml_test_cap_check(opt))
     398                 :            :                 return false;
     399                 :            : 
     400                 :          0 :         rte_ml_dev_info_get(opt->dev_id, &dev_info);
     401                 :            : 
     402                 :          0 :         if (opt->queue_pairs > dev_info.max_queue_pairs) {
     403                 :          0 :                 ml_err("Insufficient capabilities: queue_pairs = %u > (max_queue_pairs = %u)",
     404                 :            :                        opt->queue_pairs, dev_info.max_queue_pairs);
     405                 :          0 :                 return false;
     406                 :            :         }
     407                 :            : 
     408                 :          0 :         if (opt->queue_size > dev_info.max_desc) {
     409                 :          0 :                 ml_err("Insufficient capabilities: queue_size = %u > (max_desc = %u)",
     410                 :            :                        opt->queue_size, dev_info.max_desc);
     411                 :          0 :                 return false;
     412                 :            :         }
     413                 :            : 
     414                 :          0 :         if (opt->nb_filelist > dev_info.max_models) {
     415                 :          0 :                 ml_err("Insufficient capabilities:  Filelist count exceeded device limit, count = %u > (max limit = %u)",
     416                 :            :                        opt->nb_filelist, dev_info.max_models);
     417                 :          0 :                 return false;
     418                 :            :         }
     419                 :            : 
     420                 :          0 :         if (dev_info.max_io < ML_TEST_MAX_IO_SIZE) {
     421                 :          0 :                 ml_err("Insufficient capabilities:  Max I/O, count = %u > (max limit = %u)",
     422                 :            :                        ML_TEST_MAX_IO_SIZE, dev_info.max_io);
     423                 :          0 :                 return false;
     424                 :            :         }
     425                 :            : 
     426                 :            :         return true;
     427                 :            : }
     428                 :            : 
     429                 :            : int
     430                 :          0 : test_inference_opt_check(struct ml_options *opt)
     431                 :            : {
     432                 :            :         uint32_t i;
     433                 :            :         int ret;
     434                 :            : 
     435                 :            :         /* check common opts */
     436                 :          0 :         ret = ml_test_opt_check(opt);
     437                 :          0 :         if (ret != 0)
     438                 :            :                 return ret;
     439                 :            : 
     440                 :            :         /* check for at least one filelist */
     441                 :          0 :         if (opt->nb_filelist == 0) {
     442                 :          0 :                 ml_err("Filelist empty, need at least one filelist to run the test\n");
     443                 :          0 :                 return -EINVAL;
     444                 :            :         }
     445                 :            : 
     446                 :            :         /* check file availability */
     447                 :          0 :         for (i = 0; i < opt->nb_filelist; i++) {
     448                 :          0 :                 if (access(opt->filelist[i].model, F_OK) == -1) {
     449                 :          0 :                         ml_err("Model file not accessible: id = %u, file = %s", i,
     450                 :            :                                opt->filelist[i].model);
     451                 :          0 :                         return -ENOENT;
     452                 :            :                 }
     453                 :            : 
     454                 :          0 :                 if (access(opt->filelist[i].input, F_OK) == -1) {
     455                 :          0 :                         ml_err("Input file not accessible: id = %u, file = %s", i,
     456                 :            :                                opt->filelist[i].input);
     457                 :          0 :                         return -ENOENT;
     458                 :            :                 }
     459                 :            :         }
     460                 :            : 
     461                 :          0 :         if (opt->repetitions == 0) {
     462                 :          0 :                 ml_err("Invalid option, repetitions = %" PRIu64 "\n", opt->repetitions);
     463                 :          0 :                 return -EINVAL;
     464                 :            :         }
     465                 :            : 
     466                 :          0 :         if (opt->burst_size == 0) {
     467                 :          0 :                 ml_err("Invalid option, burst_size = %u\n", opt->burst_size);
     468                 :          0 :                 return -EINVAL;
     469                 :            :         }
     470                 :            : 
     471                 :          0 :         if (opt->burst_size > ML_TEST_MAX_POOL_SIZE) {
     472                 :          0 :                 ml_err("Invalid option, burst_size = %u (> max supported = %d)\n", opt->burst_size,
     473                 :            :                        ML_TEST_MAX_POOL_SIZE);
     474                 :          0 :                 return -EINVAL;
     475                 :            :         }
     476                 :            : 
     477                 :          0 :         if (opt->queue_pairs == 0) {
     478                 :          0 :                 ml_err("Invalid option, queue_pairs = %u\n", opt->queue_pairs);
     479                 :          0 :                 return -EINVAL;
     480                 :            :         }
     481                 :            : 
     482                 :          0 :         if (opt->queue_size == 0) {
     483                 :          0 :                 ml_err("Invalid option, queue_size = %u\n", opt->queue_size);
     484                 :          0 :                 return -EINVAL;
     485                 :            :         }
     486                 :            : 
     487                 :            :         /* check number of available lcores. */
     488                 :          0 :         if (rte_lcore_count() < (uint32_t)(opt->queue_pairs * 2 + 1)) {
     489                 :          0 :                 ml_err("Insufficient lcores = %u\n", rte_lcore_count());
     490                 :          0 :                 ml_err("Minimum lcores required to create %u queue-pairs = %u\n", opt->queue_pairs,
     491                 :            :                        (opt->queue_pairs * 2 + 1));
     492                 :          0 :                 return -EINVAL;
     493                 :            :         }
     494                 :            : 
     495                 :            :         return 0;
     496                 :            : }
     497                 :            : 
     498                 :            : void
     499                 :          0 : test_inference_opt_dump(struct ml_options *opt)
     500                 :            : {
     501                 :            :         uint32_t i;
     502                 :            : 
     503                 :            :         /* dump common opts */
     504                 :          0 :         ml_test_opt_dump(opt);
     505                 :            : 
     506                 :            :         /* dump test opts */
     507                 :          0 :         ml_dump("repetitions", "%" PRIu64, opt->repetitions);
     508                 :          0 :         ml_dump("burst_size", "%u", opt->burst_size);
     509                 :          0 :         ml_dump("queue_pairs", "%u", opt->queue_pairs);
     510                 :          0 :         ml_dump("queue_size", "%u", opt->queue_size);
     511                 :          0 :         ml_dump("tolerance", "%-7.3f", opt->tolerance);
     512                 :          0 :         ml_dump("stats", "%s", (opt->stats ? "true" : "false"));
     513                 :            : 
     514                 :            :         ml_dump_begin("filelist");
     515                 :          0 :         for (i = 0; i < opt->nb_filelist; i++) {
     516                 :          0 :                 ml_dump_list("model", i, opt->filelist[i].model);
     517                 :          0 :                 ml_dump_list("input", i, opt->filelist[i].input);
     518                 :          0 :                 ml_dump_list("output", i, opt->filelist[i].output);
     519                 :          0 :                 if (strcmp(opt->filelist[i].reference, "\0") != 0)
     520                 :            :                         ml_dump_list("reference", i, opt->filelist[i].reference);
     521                 :            :         }
     522                 :            :         ml_dump_end;
     523                 :          0 : }
     524                 :            : 
     525                 :            : int
     526                 :          0 : test_inference_setup(struct ml_test *test, struct ml_options *opt)
     527                 :            : {
     528                 :            :         struct test_inference *t;
     529                 :            :         void *test_inference;
     530                 :            :         uint32_t lcore_id;
     531                 :            :         int ret = 0;
     532                 :            :         uint32_t i;
     533                 :            : 
     534                 :          0 :         test_inference = rte_zmalloc_socket(test->name, sizeof(struct test_inference),
     535                 :            :                                             RTE_CACHE_LINE_SIZE, opt->socket_id);
     536                 :          0 :         if (test_inference == NULL) {
     537                 :          0 :                 ml_err("failed to allocate memory for test_model");
     538                 :            :                 ret = -ENOMEM;
     539                 :          0 :                 goto error;
     540                 :            :         }
     541                 :          0 :         test->test_priv = test_inference;
     542                 :            :         t = ml_test_priv(test);
     543                 :            : 
     544                 :          0 :         t->nb_used = 0;
     545                 :          0 :         t->nb_valid = 0;
     546                 :          0 :         t->cmn.result = ML_TEST_FAILED;
     547                 :          0 :         t->cmn.opt = opt;
     548                 :          0 :         memset(t->error_count, 0, RTE_MAX_LCORE * sizeof(uint64_t));
     549                 :            : 
     550                 :            :         /* get device info */
     551                 :          0 :         ret = rte_ml_dev_info_get(opt->dev_id, &t->cmn.dev_info);
     552                 :          0 :         if (ret < 0) {
     553                 :          0 :                 ml_err("failed to get device info");
     554                 :          0 :                 goto error;
     555                 :            :         }
     556                 :            : 
     557                 :          0 :         if (opt->burst_size == 1) {
     558                 :          0 :                 t->enqueue = ml_enqueue_single;
     559                 :          0 :                 t->dequeue = ml_dequeue_single;
     560                 :            :         } else {
     561                 :          0 :                 t->enqueue = ml_enqueue_burst;
     562                 :          0 :                 t->dequeue = ml_dequeue_burst;
     563                 :            :         }
     564                 :            : 
     565                 :            :         /* set model initial state */
     566                 :          0 :         for (i = 0; i < opt->nb_filelist; i++)
     567                 :          0 :                 t->model[i].state = MODEL_INITIAL;
     568                 :            : 
     569                 :          0 :         for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
     570                 :          0 :                 t->args[lcore_id].enq_ops = rte_zmalloc_socket(
     571                 :          0 :                         "ml_test_enq_ops", opt->burst_size * sizeof(struct rte_ml_op *),
     572                 :            :                         RTE_CACHE_LINE_SIZE, opt->socket_id);
     573                 :          0 :                 t->args[lcore_id].deq_ops = rte_zmalloc_socket(
     574                 :          0 :                         "ml_test_deq_ops", opt->burst_size * sizeof(struct rte_ml_op *),
     575                 :            :                         RTE_CACHE_LINE_SIZE, opt->socket_id);
     576                 :          0 :                 t->args[lcore_id].reqs = rte_zmalloc_socket(
     577                 :          0 :                         "ml_test_requests", opt->burst_size * sizeof(struct ml_request *),
     578                 :            :                         RTE_CACHE_LINE_SIZE, opt->socket_id);
     579                 :            :         }
     580                 :            : 
     581                 :          0 :         for (i = 0; i < RTE_MAX_LCORE; i++) {
     582                 :          0 :                 t->args[i].start_cycles = 0;
     583                 :          0 :                 t->args[i].end_cycles = 0;
     584                 :            :         }
     585                 :            : 
     586                 :            :         return 0;
     587                 :            : 
     588                 :          0 : error:
     589                 :          0 :         rte_free(test_inference);
     590                 :            : 
     591                 :          0 :         return ret;
     592                 :            : }
     593                 :            : 
     594                 :            : void
     595                 :          0 : test_inference_destroy(struct ml_test *test, struct ml_options *opt)
     596                 :            : {
     597                 :            :         struct test_inference *t;
     598                 :            :         uint32_t lcore_id;
     599                 :            : 
     600                 :            :         RTE_SET_USED(opt);
     601                 :            : 
     602                 :            :         t = ml_test_priv(test);
     603                 :            : 
     604                 :          0 :         for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
     605                 :          0 :                 rte_free(t->args[lcore_id].enq_ops);
     606                 :          0 :                 rte_free(t->args[lcore_id].deq_ops);
     607                 :          0 :                 rte_free(t->args[lcore_id].reqs);
     608                 :            :         }
     609                 :            : 
     610                 :          0 :         rte_free(t);
     611                 :          0 : }
     612                 :            : 
     613                 :            : int
     614                 :          0 : ml_inference_mldev_setup(struct ml_test *test, struct ml_options *opt)
     615                 :            : {
     616                 :            :         struct rte_ml_dev_qp_conf qp_conf;
     617                 :            :         struct test_inference *t;
     618                 :            :         uint16_t qp_id;
     619                 :            :         int ret;
     620                 :            : 
     621                 :            :         t = ml_test_priv(test);
     622                 :            : 
     623                 :            :         RTE_SET_USED(t);
     624                 :            : 
     625                 :          0 :         ret = ml_test_device_configure(test, opt);
     626                 :          0 :         if (ret != 0)
     627                 :            :                 return ret;
     628                 :            : 
     629                 :            :         /* setup queue pairs */
     630                 :          0 :         qp_conf.nb_desc = opt->queue_size;
     631                 :          0 :         qp_conf.cb = NULL;
     632                 :            : 
     633                 :          0 :         for (qp_id = 0; qp_id < opt->queue_pairs; qp_id++) {
     634                 :          0 :                 qp_conf.nb_desc = opt->queue_size;
     635                 :          0 :                 qp_conf.cb = NULL;
     636                 :            : 
     637                 :          0 :                 ret = rte_ml_dev_queue_pair_setup(opt->dev_id, qp_id, &qp_conf, opt->socket_id);
     638                 :          0 :                 if (ret != 0) {
     639                 :          0 :                         ml_err("Failed to setup ml device queue-pair, dev_id = %d, qp_id = %u\n",
     640                 :            :                                opt->dev_id, qp_id);
     641                 :          0 :                         return ret;
     642                 :            :                 }
     643                 :            :         }
     644                 :            : 
     645                 :          0 :         ret = ml_test_device_start(test, opt);
     646                 :          0 :         if (ret != 0)
     647                 :          0 :                 goto error;
     648                 :            : 
     649                 :            :         return 0;
     650                 :            : 
     651                 :            : error:
     652                 :          0 :         ml_test_device_close(test, opt);
     653                 :            : 
     654                 :          0 :         return ret;
     655                 :            : }
     656                 :            : 
     657                 :            : int
     658                 :          0 : ml_inference_mldev_destroy(struct ml_test *test, struct ml_options *opt)
     659                 :            : {
     660                 :            :         int ret;
     661                 :            : 
     662                 :          0 :         ret = ml_test_device_stop(test, opt);
     663                 :          0 :         if (ret != 0)
     664                 :          0 :                 goto error;
     665                 :            : 
     666                 :          0 :         ret = ml_test_device_close(test, opt);
     667                 :          0 :         if (ret != 0)
     668                 :          0 :                 return ret;
     669                 :            : 
     670                 :            :         return 0;
     671                 :            : 
     672                 :            : error:
     673                 :          0 :         ml_test_device_close(test, opt);
     674                 :            : 
     675                 :          0 :         return ret;
     676                 :            : }
     677                 :            : 
     678                 :            : /* Callback for IO pool create. This function would compute the fields of ml_request
     679                 :            :  * structure and prepare the quantized input data.
     680                 :            :  */
     681                 :            : static void
     682                 :          0 : ml_request_initialize(struct rte_mempool *mp, void *opaque, void *obj, unsigned int obj_idx)
     683                 :            : {
     684                 :            :         struct test_inference *t = ml_test_priv((struct ml_test *)opaque);
     685                 :            :         struct ml_request *req = (struct ml_request *)obj;
     686                 :            :         struct rte_ml_buff_seg dbuff_seg[ML_TEST_MAX_IO_SIZE];
     687                 :            :         struct rte_ml_buff_seg qbuff_seg[ML_TEST_MAX_IO_SIZE];
     688                 :            :         struct rte_ml_buff_seg *q_segs[ML_TEST_MAX_IO_SIZE];
     689                 :            :         struct rte_ml_buff_seg *d_segs[ML_TEST_MAX_IO_SIZE];
     690                 :            :         uint64_t offset;
     691                 :            :         uint64_t bufsz;
     692                 :            :         uint32_t i;
     693                 :            : 
     694                 :            :         RTE_SET_USED(mp);
     695                 :            :         RTE_SET_USED(obj_idx);
     696                 :            : 
     697                 :          0 :         req->input = (uint8_t *)obj +
     698                 :          0 :                      RTE_ALIGN_CEIL(sizeof(struct ml_request), t->cmn.dev_info.align_size);
     699                 :          0 :         req->output =
     700                 :          0 :                 req->input + RTE_ALIGN_CEIL(t->model[t->fid].inp_qsize, t->cmn.dev_info.align_size);
     701                 :          0 :         req->niters = 0;
     702                 :            : 
     703                 :          0 :         if (t->model[t->fid].info.io_layout == RTE_ML_IO_LAYOUT_PACKED) {
     704                 :          0 :                 dbuff_seg[0].addr = t->model[t->fid].input;
     705                 :          0 :                 dbuff_seg[0].iova_addr = rte_mem_virt2iova(t->model[t->fid].input);
     706                 :          0 :                 dbuff_seg[0].length = t->model[t->fid].inp_dsize;
     707                 :          0 :                 dbuff_seg[0].next = NULL;
     708                 :          0 :                 d_segs[0] = &dbuff_seg[0];
     709                 :            : 
     710                 :          0 :                 qbuff_seg[0].addr = req->input;
     711                 :          0 :                 qbuff_seg[0].iova_addr = rte_mem_virt2iova(req->input);
     712                 :          0 :                 qbuff_seg[0].length = t->model[t->fid].inp_qsize;
     713                 :          0 :                 qbuff_seg[0].next = NULL;
     714                 :          0 :                 q_segs[0] = &qbuff_seg[0];
     715                 :            :         } else {
     716                 :            :                 offset = 0;
     717                 :          0 :                 for (i = 0; i < t->model[t->fid].info.nb_inputs; i++) {
     718                 :          0 :                         bufsz = t->model[t->fid].info.input_info[i].nb_elements * sizeof(float);
     719                 :          0 :                         dbuff_seg[i].addr = t->model[t->fid].input + offset;
     720                 :          0 :                         dbuff_seg[i].iova_addr = rte_mem_virt2iova(t->model[t->fid].input + offset);
     721                 :          0 :                         dbuff_seg[i].length = bufsz;
     722                 :          0 :                         dbuff_seg[i].next = NULL;
     723                 :          0 :                         d_segs[i] = &dbuff_seg[i];
     724                 :          0 :                         offset += bufsz;
     725                 :            :                 }
     726                 :            : 
     727                 :            :                 offset = 0;
     728                 :          0 :                 for (i = 0; i < t->model[t->fid].info.nb_inputs; i++) {
     729                 :          0 :                         bufsz = RTE_ALIGN_CEIL(t->model[t->fid].info.input_info[i].size,
     730                 :            :                                                t->cmn.dev_info.align_size);
     731                 :          0 :                         qbuff_seg[i].addr = req->input + offset;
     732                 :          0 :                         qbuff_seg[i].iova_addr = rte_mem_virt2iova(req->input + offset);
     733                 :          0 :                         qbuff_seg[i].length = bufsz;
     734                 :          0 :                         qbuff_seg[i].next = NULL;
     735                 :          0 :                         q_segs[i] = &qbuff_seg[i];
     736                 :          0 :                         offset += bufsz;
     737                 :            :                 }
     738                 :            :         }
     739                 :            : 
     740                 :            :         /* quantize data */
     741                 :          0 :         rte_ml_io_quantize(t->cmn.opt->dev_id, t->model[t->fid].id, d_segs, q_segs);
     742                 :          0 : }
     743                 :            : 
     744                 :            : int
     745                 :          0 : ml_inference_iomem_setup(struct ml_test *test, struct ml_options *opt, uint16_t fid)
     746                 :            : {
     747                 :            :         struct test_inference *t = ml_test_priv(test);
     748                 :            :         char mz_name[RTE_MEMZONE_NAMESIZE];
     749                 :            :         char mp_name[RTE_MEMPOOL_NAMESIZE];
     750                 :            :         const struct rte_memzone *mz;
     751                 :            :         uint64_t nb_buffers;
     752                 :          0 :         char *buffer = NULL;
     753                 :            :         uint32_t buff_size;
     754                 :            :         uint32_t mz_size;
     755                 :            :         size_t fsize;
     756                 :            :         uint32_t i;
     757                 :            :         int ret;
     758                 :            : 
     759                 :            :         /* get input buffer size */
     760                 :          0 :         t->model[fid].inp_qsize = 0;
     761                 :          0 :         for (i = 0; i < t->model[fid].info.nb_inputs; i++) {
     762                 :          0 :                 if (t->model[fid].info.io_layout == RTE_ML_IO_LAYOUT_PACKED)
     763                 :          0 :                         t->model[fid].inp_qsize += t->model[fid].info.input_info[i].size;
     764                 :            :                 else
     765                 :          0 :                         t->model[fid].inp_qsize += RTE_ALIGN_CEIL(
     766                 :            :                                 t->model[fid].info.input_info[i].size, t->cmn.dev_info.align_size);
     767                 :            :         }
     768                 :            : 
     769                 :            :         /* get output buffer size */
     770                 :          0 :         t->model[fid].out_qsize = 0;
     771                 :          0 :         for (i = 0; i < t->model[fid].info.nb_outputs; i++) {
     772                 :          0 :                 if (t->model[fid].info.io_layout == RTE_ML_IO_LAYOUT_PACKED)
     773                 :          0 :                         t->model[fid].out_qsize += t->model[fid].info.output_info[i].size;
     774                 :            :                 else
     775                 :          0 :                         t->model[fid].out_qsize += RTE_ALIGN_CEIL(
     776                 :            :                                 t->model[fid].info.output_info[i].size, t->cmn.dev_info.align_size);
     777                 :            :         }
     778                 :            : 
     779                 :          0 :         t->model[fid].inp_dsize = 0;
     780                 :          0 :         for (i = 0; i < t->model[fid].info.nb_inputs; i++) {
     781                 :          0 :                 if (opt->quantized_io)
     782                 :          0 :                         t->model[fid].inp_dsize += t->model[fid].info.input_info[i].size;
     783                 :            :                 else
     784                 :          0 :                         t->model[fid].inp_dsize +=
     785                 :          0 :                                 t->model[fid].info.input_info[i].nb_elements * sizeof(float);
     786                 :            :         }
     787                 :            : 
     788                 :          0 :         t->model[fid].out_dsize = 0;
     789                 :          0 :         for (i = 0; i < t->model[fid].info.nb_outputs; i++) {
     790                 :          0 :                 if (opt->quantized_io)
     791                 :          0 :                         t->model[fid].out_dsize += t->model[fid].info.output_info[i].size;
     792                 :            :                 else
     793                 :          0 :                         t->model[fid].out_dsize +=
     794                 :          0 :                                 t->model[fid].info.output_info[i].nb_elements * sizeof(float);
     795                 :            :         }
     796                 :            : 
     797                 :            :         /* allocate buffer for user data */
     798                 :          0 :         mz_size = t->model[fid].inp_dsize + t->model[fid].out_dsize;
     799                 :          0 :         if (strcmp(opt->filelist[fid].reference, "\0") != 0)
     800                 :          0 :                 mz_size += t->model[fid].out_dsize;
     801                 :            : 
     802                 :            :         sprintf(mz_name, "ml_user_data_%d", fid);
     803                 :          0 :         mz = rte_memzone_reserve(mz_name, mz_size, opt->socket_id, 0);
     804                 :          0 :         if (mz == NULL) {
     805                 :          0 :                 ml_err("Memzone allocation failed for ml_user_data\n");
     806                 :            :                 ret = -ENOMEM;
     807                 :          0 :                 goto error;
     808                 :            :         }
     809                 :            : 
     810                 :          0 :         t->model[fid].input = mz->addr;
     811                 :          0 :         t->model[fid].output = t->model[fid].input + t->model[fid].inp_dsize;
     812                 :          0 :         if (strcmp(opt->filelist[fid].reference, "\0") != 0)
     813                 :          0 :                 t->model[fid].reference = t->model[fid].output + t->model[fid].out_dsize;
     814                 :            :         else
     815                 :          0 :                 t->model[fid].reference = NULL;
     816                 :            : 
     817                 :            :         /* load input file */
     818                 :          0 :         ret = ml_read_file(opt->filelist[fid].input, &fsize, &buffer);
     819                 :          0 :         if (ret != 0)
     820                 :          0 :                 goto error;
     821                 :            : 
     822                 :          0 :         if (fsize == t->model[fid].inp_dsize) {
     823                 :          0 :                 rte_memcpy(t->model[fid].input, buffer, fsize);
     824                 :          0 :                 free(buffer);
     825                 :            :         } else {
     826                 :          0 :                 ml_err("Invalid input file, size = %zu (expected size = %" PRIu64 ")\n", fsize,
     827                 :            :                        t->model[fid].inp_dsize);
     828                 :            :                 ret = -EINVAL;
     829                 :          0 :                 free(buffer);
     830                 :          0 :                 goto error;
     831                 :            :         }
     832                 :            : 
     833                 :            :         /* load reference file */
     834                 :          0 :         buffer = NULL;
     835                 :          0 :         if (t->model[fid].reference != NULL) {
     836                 :          0 :                 ret = ml_read_file(opt->filelist[fid].reference, &fsize, &buffer);
     837                 :          0 :                 if (ret != 0)
     838                 :          0 :                         goto error;
     839                 :            : 
     840                 :          0 :                 if (fsize == t->model[fid].out_dsize) {
     841                 :          0 :                         rte_memcpy(t->model[fid].reference, buffer, fsize);
     842                 :          0 :                         free(buffer);
     843                 :            :                 } else {
     844                 :          0 :                         ml_err("Invalid reference file, size = %zu (expected size = %" PRIu64 ")\n",
     845                 :            :                                fsize, t->model[fid].out_dsize);
     846                 :            :                         ret = -EINVAL;
     847                 :          0 :                         free(buffer);
     848                 :          0 :                         goto error;
     849                 :            :                 }
     850                 :            :         }
     851                 :            : 
     852                 :            :         /* create mempool for quantized input and output buffers. ml_request_initialize is
     853                 :            :          * used as a callback for object creation.
     854                 :            :          */
     855                 :          0 :         buff_size = RTE_ALIGN_CEIL(sizeof(struct ml_request), t->cmn.dev_info.align_size) +
     856                 :          0 :                     RTE_ALIGN_CEIL(t->model[fid].inp_qsize, t->cmn.dev_info.align_size) +
     857                 :          0 :                     RTE_ALIGN_CEIL(t->model[fid].out_qsize, t->cmn.dev_info.align_size);
     858                 :          0 :         nb_buffers = RTE_MIN((uint64_t)ML_TEST_MAX_POOL_SIZE, opt->repetitions);
     859                 :            : 
     860                 :          0 :         t->fid = fid;
     861                 :            :         sprintf(mp_name, "ml_io_pool_%d", fid);
     862                 :          0 :         t->model[fid].io_pool = rte_mempool_create(mp_name, nb_buffers, buff_size, 0, 0, NULL, NULL,
     863                 :            :                                                    ml_request_initialize, test, opt->socket_id, 0);
     864                 :          0 :         if (t->model[fid].io_pool == NULL) {
     865                 :          0 :                 ml_err("Failed to create io pool : %s\n", "ml_io_pool");
     866                 :            :                 ret = -ENOMEM;
     867                 :          0 :                 goto error;
     868                 :            :         }
     869                 :            : 
     870                 :            :         return 0;
     871                 :            : 
     872                 :          0 : error:
     873                 :          0 :         rte_memzone_free(mz);
     874                 :            : 
     875                 :          0 :         if (t->model[fid].io_pool != NULL) {
     876                 :          0 :                 rte_mempool_free(t->model[fid].io_pool);
     877                 :          0 :                 t->model[fid].io_pool = NULL;
     878                 :            :         }
     879                 :            : 
     880                 :            :         return ret;
     881                 :            : }
     882                 :            : 
     883                 :            : void
     884                 :          0 : ml_inference_iomem_destroy(struct ml_test *test, struct ml_options *opt, uint16_t fid)
     885                 :            : {
     886                 :            :         char mz_name[RTE_MEMZONE_NAMESIZE];
     887                 :            :         char mp_name[RTE_MEMPOOL_NAMESIZE];
     888                 :            :         const struct rte_memzone *mz;
     889                 :            :         struct rte_mempool *mp;
     890                 :            : 
     891                 :            :         RTE_SET_USED(test);
     892                 :            :         RTE_SET_USED(opt);
     893                 :            : 
     894                 :            :         /* release user data memzone */
     895                 :          0 :         sprintf(mz_name, "ml_user_data_%d", fid);
     896                 :          0 :         mz = rte_memzone_lookup(mz_name);
     897                 :          0 :         rte_memzone_free(mz);
     898                 :            : 
     899                 :            :         /* destroy io pool */
     900                 :            :         sprintf(mp_name, "ml_io_pool_%d", fid);
     901                 :          0 :         mp = rte_mempool_lookup(mp_name);
     902                 :          0 :         rte_mempool_free(mp);
     903                 :          0 : }
     904                 :            : 
     905                 :            : int
     906                 :          0 : ml_inference_mem_setup(struct ml_test *test, struct ml_options *opt)
     907                 :            : {
     908                 :            :         struct test_inference *t = ml_test_priv(test);
     909                 :            : 
     910                 :            :         /* create op pool */
     911                 :          0 :         t->op_pool = rte_ml_op_pool_create("ml_test_op_pool", ML_TEST_MAX_POOL_SIZE, 0, 0,
     912                 :            :                                            opt->socket_id);
     913                 :          0 :         if (t->op_pool == NULL) {
     914                 :          0 :                 ml_err("Failed to create op pool : %s\n", "ml_op_pool");
     915                 :          0 :                 return -ENOMEM;
     916                 :            :         }
     917                 :            : 
     918                 :            :         /* create buf_segs pool of with element of uint8_t. external buffers are attached to the
     919                 :            :          * buf_segs while queuing inference requests.
     920                 :            :          */
     921                 :          0 :         t->buf_seg_pool = rte_mempool_create("ml_test_mbuf_pool", ML_TEST_MAX_POOL_SIZE * 2,
     922                 :            :                                              sizeof(struct rte_ml_buff_seg), 0, 0, NULL, NULL, NULL,
     923                 :            :                                              NULL, opt->socket_id, 0);
     924                 :          0 :         if (t->buf_seg_pool == NULL) {
     925                 :          0 :                 ml_err("Failed to create buf_segs pool : %s\n", "ml_test_mbuf_pool");
     926                 :          0 :                 rte_ml_op_pool_free(t->op_pool);
     927                 :          0 :                 return -ENOMEM;
     928                 :            :         }
     929                 :            : 
     930                 :            :         return 0;
     931                 :            : }
     932                 :            : 
     933                 :            : void
     934                 :          0 : ml_inference_mem_destroy(struct ml_test *test, struct ml_options *opt)
     935                 :            : {
     936                 :            :         struct test_inference *t = ml_test_priv(test);
     937                 :            : 
     938                 :            :         RTE_SET_USED(opt);
     939                 :            : 
     940                 :            :         /* release op pool */
     941                 :          0 :         rte_mempool_free(t->op_pool);
     942                 :            : 
     943                 :            :         /* release buf_segs pool */
     944                 :          0 :         rte_mempool_free(t->buf_seg_pool);
     945                 :          0 : }
     946                 :            : 
     947                 :            : static bool
     948                 :          0 : ml_inference_validation(struct ml_test *test, struct ml_request *req)
     949                 :            : {
     950                 :            :         struct test_inference *t = ml_test_priv((struct ml_test *)test);
     951                 :            :         struct ml_model *model;
     952                 :            :         float *reference;
     953                 :            :         float *output;
     954                 :            :         float deviation;
     955                 :            :         bool match;
     956                 :            :         uint32_t i;
     957                 :            :         uint32_t j;
     958                 :            : 
     959                 :          0 :         model = &t->model[req->fid];
     960                 :            : 
     961                 :            :         /* compare crc when tolerance is 0 */
     962                 :          0 :         if (t->cmn.opt->tolerance == 0.0) {
     963                 :          0 :                 match = (rte_hash_crc(model->output, model->out_dsize, 0) ==
     964                 :          0 :                          rte_hash_crc(model->reference, model->out_dsize, 0));
     965                 :            :         } else {
     966                 :          0 :                 output = (float *)model->output;
     967                 :          0 :                 reference = (float *)model->reference;
     968                 :            : 
     969                 :            :                 i = 0;
     970                 :          0 : next_output:
     971                 :            :                 j = 0;
     972                 :          0 : next_element:
     973                 :            :                 match = false;
     974                 :          0 :                 if ((*reference == 0) && (*output == 0))
     975                 :            :                         deviation = 0;
     976                 :            :                 else
     977                 :          0 :                         deviation = 100 * fabs(*output - *reference) / fabs(*reference);
     978                 :          0 :                 if (deviation <= t->cmn.opt->tolerance)
     979                 :            :                         match = true;
     980                 :            :                 else
     981                 :          0 :                         ml_err("id = %d, element = %d, output = %f, reference = %f, deviation = %f %%\n",
     982                 :            :                                i, j, *output, *reference, deviation);
     983                 :            : 
     984                 :          0 :                 output++;
     985                 :          0 :                 reference++;
     986                 :            : 
     987                 :          0 :                 if (!match)
     988                 :          0 :                         goto done;
     989                 :            : 
     990                 :          0 :                 j++;
     991                 :          0 :                 if (j < model->info.output_info[i].nb_elements)
     992                 :          0 :                         goto next_element;
     993                 :            : 
     994                 :          0 :                 i++;
     995                 :          0 :                 if (i < model->info.nb_outputs)
     996                 :          0 :                         goto next_output;
     997                 :            :         }
     998                 :          0 : done:
     999                 :          0 :         return match;
    1000                 :            : }
    1001                 :            : 
    1002                 :            : /* Callback for mempool object iteration. This call would dequantize output data. */
    1003                 :            : static void
    1004                 :          0 : ml_request_finish(struct rte_mempool *mp, void *opaque, void *obj, unsigned int obj_idx)
    1005                 :            : {
    1006                 :            :         struct test_inference *t = ml_test_priv((struct ml_test *)opaque);
    1007                 :            :         struct ml_request *req = (struct ml_request *)obj;
    1008                 :          0 :         struct ml_model *model = &t->model[req->fid];
    1009                 :            :         bool error = false;
    1010                 :            :         char *dump_path;
    1011                 :            : 
    1012                 :            :         struct rte_ml_buff_seg qbuff_seg[ML_TEST_MAX_IO_SIZE];
    1013                 :            :         struct rte_ml_buff_seg dbuff_seg[ML_TEST_MAX_IO_SIZE];
    1014                 :            :         struct rte_ml_buff_seg *q_segs[ML_TEST_MAX_IO_SIZE];
    1015                 :            :         struct rte_ml_buff_seg *d_segs[ML_TEST_MAX_IO_SIZE];
    1016                 :            :         uint64_t offset;
    1017                 :            :         uint64_t bufsz;
    1018                 :            :         uint32_t i;
    1019                 :            : 
    1020                 :            :         RTE_SET_USED(mp);
    1021                 :            : 
    1022                 :          0 :         if (req->niters == 0)
    1023                 :          0 :                 return;
    1024                 :            : 
    1025                 :          0 :         t->nb_used++;
    1026                 :            : 
    1027                 :          0 :         if (t->model[req->fid].info.io_layout == RTE_ML_IO_LAYOUT_PACKED) {
    1028                 :          0 :                 qbuff_seg[0].addr = req->output;
    1029                 :          0 :                 qbuff_seg[0].iova_addr = rte_mem_virt2iova(req->output);
    1030                 :          0 :                 qbuff_seg[0].length = t->model[req->fid].out_qsize;
    1031                 :          0 :                 qbuff_seg[0].next = NULL;
    1032                 :          0 :                 q_segs[0] = &qbuff_seg[0];
    1033                 :            : 
    1034                 :          0 :                 dbuff_seg[0].addr = model->output;
    1035                 :          0 :                 dbuff_seg[0].iova_addr = rte_mem_virt2iova(model->output);
    1036                 :          0 :                 dbuff_seg[0].length = t->model[req->fid].out_dsize;
    1037                 :          0 :                 dbuff_seg[0].next = NULL;
    1038                 :          0 :                 d_segs[0] = &dbuff_seg[0];
    1039                 :            :         } else {
    1040                 :            :                 offset = 0;
    1041                 :          0 :                 for (i = 0; i < t->model[req->fid].info.nb_outputs; i++) {
    1042                 :          0 :                         bufsz = RTE_ALIGN_CEIL(t->model[req->fid].info.output_info[i].size,
    1043                 :            :                                                t->cmn.dev_info.align_size);
    1044                 :          0 :                         qbuff_seg[i].addr = req->output + offset;
    1045                 :          0 :                         qbuff_seg[i].iova_addr = rte_mem_virt2iova(req->output + offset);
    1046                 :          0 :                         qbuff_seg[i].length = bufsz;
    1047                 :          0 :                         qbuff_seg[i].next = NULL;
    1048                 :          0 :                         q_segs[i] = &qbuff_seg[i];
    1049                 :          0 :                         offset += bufsz;
    1050                 :            :                 }
    1051                 :            : 
    1052                 :            :                 offset = 0;
    1053                 :          0 :                 for (i = 0; i < t->model[req->fid].info.nb_outputs; i++) {
    1054                 :          0 :                         bufsz = t->model[req->fid].info.output_info[i].nb_elements * sizeof(float);
    1055                 :          0 :                         dbuff_seg[i].addr = model->output + offset;
    1056                 :          0 :                         dbuff_seg[i].iova_addr = rte_mem_virt2iova(model->output + offset);
    1057                 :          0 :                         dbuff_seg[i].length = bufsz;
    1058                 :          0 :                         dbuff_seg[i].next = NULL;
    1059                 :          0 :                         d_segs[i] = &dbuff_seg[i];
    1060                 :          0 :                         offset += bufsz;
    1061                 :            :                 }
    1062                 :            :         }
    1063                 :            : 
    1064                 :          0 :         rte_ml_io_dequantize(t->cmn.opt->dev_id, model->id, q_segs, d_segs);
    1065                 :            : 
    1066                 :          0 :         if (model->reference == NULL)
    1067                 :          0 :                 goto dump_output_pass;
    1068                 :            : 
    1069                 :          0 :         if (!ml_inference_validation(opaque, req))
    1070                 :          0 :                 goto dump_output_fail;
    1071                 :            :         else
    1072                 :          0 :                 goto dump_output_pass;
    1073                 :            : 
    1074                 :          0 : dump_output_pass:
    1075                 :          0 :         if (obj_idx == 0) {
    1076                 :            :                 /* write quantized output */
    1077                 :          0 :                 if (asprintf(&dump_path, "%s.q", t->cmn.opt->filelist[req->fid].output) == -1)
    1078                 :            :                         return;
    1079                 :          0 :                 ML_OPEN_WRITE_GET_ERR(dump_path, req->output, model->out_qsize, error);
    1080                 :          0 :                 free(dump_path);
    1081                 :          0 :                 if (error)
    1082                 :            :                         return;
    1083                 :            : 
    1084                 :            :                 /* write dequantized output */
    1085                 :          0 :                 if (asprintf(&dump_path, "%s", t->cmn.opt->filelist[req->fid].output) == -1)
    1086                 :            :                         return;
    1087                 :          0 :                 ML_OPEN_WRITE_GET_ERR(dump_path, model->output, model->out_dsize, error);
    1088                 :          0 :                 free(dump_path);
    1089                 :          0 :                 if (error)
    1090                 :            :                         return;
    1091                 :            :         }
    1092                 :          0 :         t->nb_valid++;
    1093                 :            : 
    1094                 :          0 :         return;
    1095                 :            : 
    1096                 :            : dump_output_fail:
    1097                 :          0 :         if (t->cmn.opt->debug) {
    1098                 :            :                 /* dump quantized output buffer */
    1099                 :          0 :                 if (asprintf(&dump_path, "%s.q.%u", t->cmn.opt->filelist[req->fid].output,
    1100                 :            :                              obj_idx) == -1)
    1101                 :            :                         return;
    1102                 :          0 :                 ML_OPEN_WRITE_GET_ERR(dump_path, req->output, model->out_qsize, error);
    1103                 :          0 :                 free(dump_path);
    1104                 :          0 :                 if (error)
    1105                 :            :                         return;
    1106                 :            : 
    1107                 :            :                 /* dump dequantized output buffer */
    1108                 :          0 :                 if (asprintf(&dump_path, "%s.%u", t->cmn.opt->filelist[req->fid].output, obj_idx) ==
    1109                 :            :                     -1)
    1110                 :            :                         return;
    1111                 :          0 :                 ML_OPEN_WRITE_GET_ERR(dump_path, model->output, model->out_dsize, error);
    1112                 :          0 :                 free(dump_path);
    1113                 :            :                 if (error)
    1114                 :          0 :                         return;
    1115                 :            :         }
    1116                 :            : }
    1117                 :            : 
    1118                 :            : int
    1119                 :          0 : ml_inference_result(struct ml_test *test, struct ml_options *opt, uint16_t fid)
    1120                 :            : {
    1121                 :            :         struct test_inference *t = ml_test_priv(test);
    1122                 :            :         uint64_t error_count = 0;
    1123                 :            :         uint32_t i;
    1124                 :            : 
    1125                 :            :         RTE_SET_USED(opt);
    1126                 :            : 
    1127                 :            :         /* check for errors */
    1128                 :          0 :         for (i = 0; i < RTE_MAX_LCORE; i++)
    1129                 :          0 :                 error_count += t->error_count[i];
    1130                 :            : 
    1131                 :          0 :         rte_mempool_obj_iter(t->model[fid].io_pool, ml_request_finish, test);
    1132                 :            : 
    1133                 :          0 :         if ((t->nb_used == t->nb_valid) && (error_count == 0))
    1134                 :          0 :                 t->cmn.result = ML_TEST_SUCCESS;
    1135                 :            :         else
    1136                 :          0 :                 t->cmn.result = ML_TEST_FAILED;
    1137                 :            : 
    1138                 :          0 :         return t->cmn.result;
    1139                 :            : }
    1140                 :            : 
    1141                 :            : int
    1142                 :          0 : ml_inference_launch_cores(struct ml_test *test, struct ml_options *opt, uint16_t start_fid,
    1143                 :            :                           uint16_t end_fid)
    1144                 :            : {
    1145                 :            :         struct test_inference *t = ml_test_priv(test);
    1146                 :            :         uint32_t lcore_id;
    1147                 :            :         uint32_t nb_reqs;
    1148                 :            :         uint32_t id = 0;
    1149                 :            :         uint32_t qp_id;
    1150                 :            : 
    1151                 :          0 :         nb_reqs = opt->repetitions / opt->queue_pairs;
    1152                 :            : 
    1153                 :          0 :         RTE_LCORE_FOREACH_WORKER(lcore_id)
    1154                 :            :         {
    1155                 :          0 :                 if (id >= opt->queue_pairs * 2)
    1156                 :            :                         break;
    1157                 :            : 
    1158                 :          0 :                 qp_id = id / 2;
    1159                 :          0 :                 t->args[lcore_id].qp_id = qp_id;
    1160                 :          0 :                 t->args[lcore_id].nb_reqs = nb_reqs;
    1161                 :          0 :                 if (qp_id == 0)
    1162                 :          0 :                         t->args[lcore_id].nb_reqs += opt->repetitions - nb_reqs * opt->queue_pairs;
    1163                 :            : 
    1164                 :          0 :                 if (t->args[lcore_id].nb_reqs == 0) {
    1165                 :            :                         id++;
    1166                 :            :                         break;
    1167                 :            :                 }
    1168                 :            : 
    1169                 :          0 :                 t->args[lcore_id].start_fid = start_fid;
    1170                 :          0 :                 t->args[lcore_id].end_fid = end_fid;
    1171                 :            : 
    1172                 :          0 :                 if (id % 2 == 0)
    1173                 :          0 :                         rte_eal_remote_launch(t->enqueue, test, lcore_id);
    1174                 :            :                 else
    1175                 :          0 :                         rte_eal_remote_launch(t->dequeue, test, lcore_id);
    1176                 :            : 
    1177                 :          0 :                 id++;
    1178                 :            :         }
    1179                 :            : 
    1180                 :          0 :         return 0;
    1181                 :            : }

Generated by: LCOV version 1.14