Branch data Line data Source code
1 : : /* SPDX-License-Identifier: BSD-3-Clause
2 : : * Copyright(C) 2023 Marvell.
3 : : */
4 : :
5 : : #include "cnxk_ep_rx.h"
6 : :
7 : : static __rte_always_inline void
8 : : cnxk_ep_process_pkts_vec_avx(struct rte_mbuf **rx_pkts, struct otx_ep_droq *droq, uint16_t new_pkts)
9 : : {
10 : 0 : struct rte_mbuf **recv_buf_list = droq->recv_buf_list;
11 : 0 : uint32_t bytes_rsvd = 0, read_idx = droq->read_idx;
12 : 0 : const uint64_t rearm_data = droq->rearm_data;
13 : : struct rte_mbuf *m[CNXK_EP_OQ_DESC_PER_LOOP_AVX];
14 : : uint32_t pidx[CNXK_EP_OQ_DESC_PER_LOOP_AVX];
15 : : uint32_t idx[CNXK_EP_OQ_DESC_PER_LOOP_AVX];
16 : 0 : uint16_t nb_desc = droq->nb_desc;
17 : : uint16_t pkts = 0;
18 : : uint8_t i;
19 : :
20 : 0 : idx[0] = read_idx;
21 [ # # # # ]: 0 : while (pkts < new_pkts) {
22 : : __m256i data[CNXK_EP_OQ_DESC_PER_LOOP_AVX];
23 : : /* mask to shuffle from desc. to mbuf (2 descriptors)*/
24 : : const __m256i mask =
25 : : _mm256_set_epi8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 20, 21, 0xFF, 0xFF, 20,
26 : : 21, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
27 : : 0xFF, 0xFF, 0xFF, 7, 6, 5, 4, 3, 2, 1, 0);
28 : :
29 : : /* Load indexes. */
30 [ # # # # ]: 0 : for (i = 1; i < CNXK_EP_OQ_DESC_PER_LOOP_AVX; i++)
31 : 0 : idx[i] = otx_ep_incr_index(idx[i - 1], 1, nb_desc);
32 : :
33 : : /* Prefetch next indexes. */
34 [ # # # # ]: 0 : if (new_pkts - pkts > 8) {
35 : 0 : pidx[0] = otx_ep_incr_index(idx[i - 1], 1, nb_desc);
36 [ # # # # ]: 0 : for (i = 1; i < CNXK_EP_OQ_DESC_PER_LOOP_AVX; i++)
37 : 0 : pidx[i] = otx_ep_incr_index(pidx[i - 1], 1, nb_desc);
38 : :
39 [ # # # # ]: 0 : for (i = 0; i < CNXK_EP_OQ_DESC_PER_LOOP_AVX; i++) {
40 : 0 : rte_prefetch0(recv_buf_list[pidx[i]]);
41 : 0 : rte_prefetch0(rte_pktmbuf_mtod(recv_buf_list[pidx[i]], void *));
42 : : }
43 : : }
44 : :
45 : : /* Load mbuf array. */
46 [ # # # # ]: 0 : for (i = 0; i < CNXK_EP_OQ_DESC_PER_LOOP_AVX; i++)
47 : 0 : m[i] = recv_buf_list[idx[i]];
48 : :
49 : : /* Load rearm data and packet length for shuffle. */
50 [ # # # # ]: 0 : for (i = 0; i < CNXK_EP_OQ_DESC_PER_LOOP_AVX; i++)
51 : 0 : data[i] = _mm256_set_epi64x(0,
52 : 0 : cnxk_pktmbuf_mtod(m[i], struct otx_ep_droq_info *)->length >> 16,
53 : : 0, rearm_data);
54 : :
55 : : /* Shuffle data to its place and sum the packet length. */
56 [ # # # # ]: 0 : for (i = 0; i < CNXK_EP_OQ_DESC_PER_LOOP_AVX; i++) {
57 : 0 : data[i] = _mm256_shuffle_epi8(data[i], mask);
58 : 0 : bytes_rsvd += _mm256_extract_epi16(data[i], 10);
59 : : }
60 : :
61 : : /* Store the 256bit data to the mbuf. */
62 [ # # # # ]: 0 : for (i = 0; i < CNXK_EP_OQ_DESC_PER_LOOP_AVX; i++)
63 : 0 : _mm256_storeu_si256((__m256i *)&m[i]->rearm_data, data[i]);
64 : :
65 [ # # # # ]: 0 : for (i = 0; i < CNXK_EP_OQ_DESC_PER_LOOP_AVX; i++)
66 : 0 : rx_pkts[pkts++] = m[i];
67 : 0 : idx[0] = otx_ep_incr_index(idx[i - 1], 1, nb_desc);
68 : : }
69 : 0 : droq->read_idx = idx[0];
70 : :
71 : 0 : droq->refill_count += new_pkts;
72 : 0 : droq->pkts_pending -= new_pkts;
73 : : /* Stats */
74 : 0 : droq->stats.pkts_received += new_pkts;
75 : 0 : droq->stats.bytes_received += bytes_rsvd;
76 : : }
77 : :
78 : : uint16_t __rte_noinline __rte_hot
79 : 0 : cnxk_ep_recv_pkts_avx(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
80 : : {
81 : : struct otx_ep_droq *droq = (struct otx_ep_droq *)rx_queue;
82 : : uint16_t new_pkts, vpkts;
83 : :
84 : : /* Refill RX buffers */
85 [ # # ]: 0 : if (droq->refill_count >= DROQ_REFILL_THRESHOLD)
86 : 0 : cnxk_ep_rx_refill(droq);
87 : :
88 : 0 : new_pkts = cnxk_ep_rx_pkts_to_process(droq, nb_pkts);
89 : 0 : vpkts = RTE_ALIGN_FLOOR(new_pkts, CNXK_EP_OQ_DESC_PER_LOOP_AVX);
90 : 0 : cnxk_ep_process_pkts_vec_avx(rx_pkts, droq, vpkts);
91 : 0 : cnxk_ep_process_pkts_scalar(&rx_pkts[vpkts], droq, new_pkts - vpkts);
92 : :
93 : 0 : return new_pkts;
94 : : }
95 : :
96 : : uint16_t __rte_noinline __rte_hot
97 : 0 : cn9k_ep_recv_pkts_avx(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
98 : : {
99 : : struct otx_ep_droq *droq = (struct otx_ep_droq *)rx_queue;
100 : : uint16_t new_pkts, vpkts;
101 : :
102 : : /* Refill RX buffers */
103 [ # # ]: 0 : if (droq->refill_count >= DROQ_REFILL_THRESHOLD) {
104 : 0 : cnxk_ep_rx_refill(droq);
105 : : } else {
106 : : /* SDP output goes into DROP state when output doorbell count
107 : : * goes below drop count. When door bell count is written with
108 : : * a value greater than drop count SDP output should come out
109 : : * of DROP state. Due to a race condition this is not happening.
110 : : * Writing doorbell register with 0 again may make SDP output
111 : : * come out of this state.
112 : : */
113 : :
114 : 0 : rte_write32(0, droq->pkts_credit_reg);
115 : : }
116 : :
117 : 0 : new_pkts = cnxk_ep_rx_pkts_to_process(droq, nb_pkts);
118 : 0 : vpkts = RTE_ALIGN_FLOOR(new_pkts, CNXK_EP_OQ_DESC_PER_LOOP_AVX);
119 : 0 : cnxk_ep_process_pkts_vec_avx(rx_pkts, droq, vpkts);
120 : 0 : cnxk_ep_process_pkts_scalar(&rx_pkts[vpkts], droq, new_pkts - vpkts);
121 : :
122 : 0 : return new_pkts;
123 : : }
|