LCOV - code coverage report
Current view: top level - lib/distributor - rte_distributor_match_sse.c (source / functions) Hit Total Coverage
Test: Code coverage Lines: 7 7 100.0 %
Date: 2024-02-14 00:53:57 Functions: 1 1 100.0 %
Legend: Lines: hit not hit | Branches: + taken - not taken # not executed Branches: 2 2 100.0 %

           Branch data     Line data    Source code
       1                 :            : /* SPDX-License-Identifier: BSD-3-Clause
       2                 :            :  * Copyright(c) 2017 Intel Corporation
       3                 :            :  */
       4                 :            : 
       5                 :            : #include <rte_mbuf.h>
       6                 :            : #include "distributor_private.h"
       7                 :            : #include "smmintrin.h"
       8                 :            : 
       9                 :            : 
      10                 :            : void
      11                 :     131210 : find_match_vec(struct rte_distributor *d,
      12                 :            :                         uint16_t *data_ptr,
      13                 :            :                         uint16_t *output_ptr)
      14                 :            : {
      15                 :            :         /* Setup */
      16                 :            :         __m128i incoming_fids;
      17                 :            :         __m128i inflight_fids;
      18                 :            :         __m128i preflight_fids;
      19                 :            :         __m128i wkr;
      20                 :            :         __m128i mask1;
      21                 :            :         __m128i mask2;
      22                 :            :         __m128i output;
      23                 :            :         struct rte_distributor_backlog *bl;
      24                 :            :         uint16_t i;
      25                 :            : 
      26                 :            :         /*
      27                 :            :          * Function overview:
      28                 :            :          * 2. Loop through all worker ID's
      29                 :            :          *  2a. Load the current inflights for that worker into an xmm reg
      30                 :            :          *  2b. Load the current backlog for that worker into an xmm reg
      31                 :            :          *  2c. use cmpestrm to intersect flow_ids with backlog and inflights
      32                 :            :          *  2d. Add any matches to the output
      33                 :            :          * 3. Write the output xmm (matching worker ids).
      34                 :            :          */
      35                 :            : 
      36                 :            : 
      37                 :            :         output = _mm_set1_epi16(0);
      38                 :            :         incoming_fids = _mm_load_si128((__m128i *)data_ptr);
      39                 :            : 
      40         [ +  + ]:     262420 :         for (i = 0; i < d->num_workers; i++) {
      41                 :     131210 :                 bl = &d->backlog[i];
      42                 :            : 
      43                 :            :                 inflight_fids =
      44                 :     131210 :                         _mm_load_si128((__m128i *)&(d->in_flight_tags[i]));
      45                 :            :                 preflight_fids =
      46                 :     131210 :                         _mm_load_si128((__m128i *)(bl->tags));
      47                 :            : 
      48                 :            :                 /*
      49                 :            :                  * Any incoming_fid that exists anywhere in inflight_fids will
      50                 :            :                  * have 0xffff in same position of the mask as the incoming fid
      51                 :            :                  * Example (shortened to bytes for brevity):
      52                 :            :                  * incoming_fids   0x01 0x02 0x03 0x04 0x05 0x06 0x07 0x08
      53                 :            :                  * inflight_fids   0x03 0x05 0x07 0x00 0x00 0x00 0x00 0x00
      54                 :            :                  * mask            0x00 0x00 0xff 0x00 0xff 0x00 0xff 0x00
      55                 :            :                  */
      56                 :            : 
      57                 :            :                 mask1 = _mm_cmpestrm(inflight_fids, 8, incoming_fids, 8,
      58                 :            :                         _SIDD_UWORD_OPS |
      59                 :            :                         _SIDD_CMP_EQUAL_ANY |
      60                 :            :                         _SIDD_UNIT_MASK);
      61                 :            :                 mask2 = _mm_cmpestrm(preflight_fids, 8, incoming_fids, 8,
      62                 :            :                         _SIDD_UWORD_OPS |
      63                 :            :                         _SIDD_CMP_EQUAL_ANY |
      64                 :            :                         _SIDD_UNIT_MASK);
      65                 :            : 
      66                 :            :                 mask1 = _mm_or_si128(mask1, mask2);
      67                 :            :                 /*
      68                 :            :                  * Now mask contains 0xffff where there's a match.
      69                 :            :                  * Next we need to store the worker_id in the relevant position
      70                 :            :                  * in the output.
      71                 :            :                  */
      72                 :            : 
      73                 :     131210 :                 wkr = _mm_set1_epi16(i+1);
      74                 :            :                 mask1 = _mm_and_si128(mask1, wkr);
      75                 :            :                 output = _mm_or_si128(mask1, output);
      76                 :            :         }
      77                 :            : 
      78                 :            :         /*
      79                 :            :          * At this stage, the output 128-bit contains 8 16-bit values, with
      80                 :            :          * each non-zero value containing the worker ID on which the
      81                 :            :          * corresponding flow is pinned to.
      82                 :            :          */
      83                 :            :         _mm_store_si128((__m128i *)output_ptr, output);
      84                 :     131210 : }

Generated by: LCOV version 1.14