diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig index 288fa8ce53af0a..b513baf3cbb296 100644 --- a/drivers/net/ethernet/intel/Kconfig +++ b/drivers/net/ethernet/intel/Kconfig @@ -203,6 +203,7 @@ config IXGBE_IPSEC config IXGBEVF tristate "Intel(R) 10GbE PCI Express Virtual Function Ethernet support" depends on PCI_MSI + select LIBETH_XDP help This driver supports Intel(R) PCI Express virtual functions for the Intel(R) ixgbe driver. For more information on how to identify your diff --git a/drivers/net/ethernet/intel/idpf/xdp.c b/drivers/net/ethernet/intel/idpf/xdp.c index 958d16f874248d..7d91f21174de1c 100644 --- a/drivers/net/ethernet/intel/idpf/xdp.c +++ b/drivers/net/ethernet/intel/idpf/xdp.c @@ -46,11 +46,15 @@ static int __idpf_xdp_rxq_info_init(struct idpf_rx_queue *rxq, void *arg) { const struct idpf_vport *vport = rxq->q_vector->vport; bool split = idpf_is_queue_model_split(vport->rxq_model); + u32 frag_size = 0; int err; + if (idpf_queue_has(XSK, rxq)) + frag_size = rxq->bufq_sets[0].bufq.truesize; + err = __xdp_rxq_info_reg(&rxq->xdp_rxq, vport->netdev, rxq->idx, rxq->q_vector->napi.napi_id, - rxq->rx_buf_size); + frag_size); if (err) return err; diff --git a/drivers/net/ethernet/intel/idpf/xsk.c b/drivers/net/ethernet/intel/idpf/xsk.c index fd2cc43ab43cba..95a665cb2f33b4 100644 --- a/drivers/net/ethernet/intel/idpf/xsk.c +++ b/drivers/net/ethernet/intel/idpf/xsk.c @@ -401,6 +401,7 @@ int idpf_xskfq_init(struct idpf_buf_queue *bufq) bufq->pending = fq.pending; bufq->thresh = fq.thresh; bufq->rx_buf_size = fq.buf_len; + bufq->truesize = fq.truesize; if (!idpf_xskfq_refill(bufq)) netdev_err(bufq->pool->netdev, diff --git a/drivers/net/ethernet/intel/ixgbevf/Makefile b/drivers/net/ethernet/intel/ixgbevf/Makefile index 01d3e892f3fa7b..cdae62f25fd926 100644 --- a/drivers/net/ethernet/intel/ixgbevf/Makefile +++ b/drivers/net/ethernet/intel/ixgbevf/Makefile @@ -6,5 +6,5 @@ obj-$(CONFIG_IXGBEVF) += ixgbevf.o -ixgbevf-y := vf.o mbx.o ethtool.o ixgbevf_main.o +ixgbevf-y := vf.o mbx.o ethtool.o ixgbevf_main.o ixgbevf_xsk.o ixgbevf-$(CONFIG_IXGBEVF_IPSEC) += ipsec.o diff --git a/drivers/net/ethernet/intel/ixgbevf/defines.h b/drivers/net/ethernet/intel/ixgbevf/defines.h index e177d1d58696aa..afc927dd14381b 100644 --- a/drivers/net/ethernet/intel/ixgbevf/defines.h +++ b/drivers/net/ethernet/intel/ixgbevf/defines.h @@ -71,7 +71,7 @@ typedef u32 ixgbe_link_speed; #define IXGBE_PSRTYPE_L2HDR 0x00001000 /* SRRCTL bit definitions */ -#define IXGBE_SRRCTL_BSIZEPKT_SHIFT 10 /* so many KBs */ +#define IXGBE_SRRCTL_BSIZEPKT_STEP 1024 #define IXGBE_SRRCTL_RDMTS_SHIFT 22 #define IXGBE_SRRCTL_RDMTS_MASK 0x01C00000 #define IXGBE_SRRCTL_DROP_EN 0x10000000 diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c index 537a60d5276f0f..274eef39c58618 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c +++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c @@ -72,13 +72,6 @@ static const char ixgbe_gstrings_test[][ETH_GSTRING_LEN] = { #define IXGBEVF_TEST_LEN (sizeof(ixgbe_gstrings_test) / ETH_GSTRING_LEN) -static const char ixgbevf_priv_flags_strings[][ETH_GSTRING_LEN] = { -#define IXGBEVF_PRIV_FLAGS_LEGACY_RX BIT(0) - "legacy-rx", -}; - -#define IXGBEVF_PRIV_FLAGS_STR_LEN ARRAY_SIZE(ixgbevf_priv_flags_strings) - static int ixgbevf_get_link_ksettings(struct net_device *netdev, struct ethtool_link_ksettings *cmd) { @@ -216,8 +209,6 @@ static void ixgbevf_get_drvinfo(struct net_device *netdev, strscpy(drvinfo->driver, ixgbevf_driver_name, sizeof(drvinfo->driver)); strscpy(drvinfo->bus_info, pci_name(adapter->pdev), sizeof(drvinfo->bus_info)); - - drvinfo->n_priv_flags = IXGBEVF_PRIV_FLAGS_STR_LEN; } static void ixgbevf_get_ringparam(struct net_device *netdev, @@ -409,8 +400,6 @@ static int ixgbevf_get_sset_count(struct net_device *netdev, int stringset) return IXGBEVF_TEST_LEN; case ETH_SS_STATS: return IXGBEVF_STATS_LEN; - case ETH_SS_PRIV_FLAGS: - return IXGBEVF_PRIV_FLAGS_STR_LEN; default: return -EINVAL; } @@ -538,10 +527,6 @@ static void ixgbevf_get_strings(struct net_device *netdev, u32 stringset, p += ETH_GSTRING_LEN; } break; - case ETH_SS_PRIV_FLAGS: - memcpy(data, ixgbevf_priv_flags_strings, - IXGBEVF_PRIV_FLAGS_STR_LEN * ETH_GSTRING_LEN); - break; } } @@ -931,37 +916,6 @@ static int ixgbevf_get_rxfh(struct net_device *netdev, return err; } -static u32 ixgbevf_get_priv_flags(struct net_device *netdev) -{ - struct ixgbevf_adapter *adapter = netdev_priv(netdev); - u32 priv_flags = 0; - - if (adapter->flags & IXGBEVF_FLAGS_LEGACY_RX) - priv_flags |= IXGBEVF_PRIV_FLAGS_LEGACY_RX; - - return priv_flags; -} - -static int ixgbevf_set_priv_flags(struct net_device *netdev, u32 priv_flags) -{ - struct ixgbevf_adapter *adapter = netdev_priv(netdev); - unsigned int flags = adapter->flags; - - flags &= ~IXGBEVF_FLAGS_LEGACY_RX; - if (priv_flags & IXGBEVF_PRIV_FLAGS_LEGACY_RX) - flags |= IXGBEVF_FLAGS_LEGACY_RX; - - if (flags != adapter->flags) { - adapter->flags = flags; - - /* reset interface to repopulate queues */ - if (netif_running(netdev)) - ixgbevf_reinit_locked(adapter); - } - - return 0; -} - static const struct ethtool_ops ixgbevf_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS, .get_drvinfo = ixgbevf_get_drvinfo, @@ -984,8 +938,6 @@ static const struct ethtool_ops ixgbevf_ethtool_ops = { .get_rxfh_key_size = ixgbevf_get_rxfh_key_size, .get_rxfh = ixgbevf_get_rxfh, .get_link_ksettings = ixgbevf_get_link_ksettings, - .get_priv_flags = ixgbevf_get_priv_flags, - .set_priv_flags = ixgbevf_set_priv_flags, }; void ixgbevf_set_ethtool_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h index 516a6fdd23d076..d8f841515ca62a 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include "vf.h" @@ -42,17 +43,6 @@ struct ixgbevf_tx_buffer { u32 tx_flags; }; -struct ixgbevf_rx_buffer { - dma_addr_t dma; - struct page *page; -#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536) - __u32 page_offset; -#else - __u16 page_offset; -#endif - __u16 pagecnt_bias; -}; - struct ixgbevf_stats { u64 packets; u64 bytes; @@ -72,12 +62,11 @@ struct ixgbevf_rx_queue_stats { }; enum ixgbevf_ring_state_t { - __IXGBEVF_RX_3K_BUFFER, - __IXGBEVF_RX_BUILD_SKB_ENABLED, __IXGBEVF_TX_DETECT_HANG, __IXGBEVF_HANG_CHECK_ARMED, __IXGBEVF_TX_XDP_RING, __IXGBEVF_TX_XDP_RING_PRIMED, + __IXGBEVF_RXTX_XSK_RING, }; #define ring_is_xdp(ring) \ @@ -87,24 +76,39 @@ enum ixgbevf_ring_state_t { #define clear_ring_xdp(ring) \ clear_bit(__IXGBEVF_TX_XDP_RING, &(ring)->state) +#define ring_is_xsk(ring) \ + test_bit(__IXGBEVF_RXTX_XSK_RING, &(ring)->state) +#define set_ring_xsk(ring) \ + set_bit(__IXGBEVF_RXTX_XSK_RING, &(ring)->state) +#define clear_ring_xsk(ring) \ + clear_bit(__IXGBEVF_RXTX_XSK_RING, &(ring)->state) + struct ixgbevf_ring { struct ixgbevf_ring *next; struct ixgbevf_q_vector *q_vector; /* backpointer to q_vector */ struct net_device *netdev; - struct bpf_prog *xdp_prog; - struct device *dev; + struct bpf_prog __rcu *xdp_prog; + union { + struct page_pool *pp; /* Rx and XDP rings */ + struct device *dev; /* Tx ring */ + }; void *desc; /* descriptor ring memory */ - dma_addr_t dma; /* phys. address of descriptor ring */ - unsigned int size; /* length in bytes */ + u32 truesize; /* Rx buffer full size */ + u32 hdr_truesize; /* Rx header buffer full size */ u16 count; /* amount of descriptors */ - u16 next_to_use; u16 next_to_clean; - u16 next_to_alloc; + u32 next_to_use; + u32 pending; /* Sent-not-completed descriptors */ union { + struct libeth_fqe *rx_fqes; + struct libeth_xdp_buff **xsk_fqes; struct ixgbevf_tx_buffer *tx_buffer_info; - struct ixgbevf_rx_buffer *rx_buffer_info; + struct libeth_sqe *xdp_sqes; }; + struct libeth_xdpsq_lock xdpq_lock; + u32 cached_ntu; + u32 thresh; unsigned long state; struct ixgbevf_stats stats; struct u64_stats_sync syncp; @@ -112,16 +116,23 @@ struct ixgbevf_ring { struct ixgbevf_tx_queue_stats tx_stats; struct ixgbevf_rx_queue_stats rx_stats; }; + struct libeth_fqe *hdr_fqes; + struct page_pool *hdr_pp; struct xdp_rxq_info xdp_rxq; u64 hw_csum_rx_error; u8 __iomem *tail; - struct sk_buff *skb; /* holds the special value that gets the hardware register offset * associated with this ring, which is different for DCB and RSS modes */ u16 reg_idx; int queue_index; /* needed for multiqueue queue management */ + u32 rx_buf_len; + struct libeth_xdp_buff_stash xdp_stash; + struct libeth_xdp_buff *xsk_xdp_head; + unsigned int dma_size; /* length in bytes */ + dma_addr_t dma; /* phys. address of descriptor ring */ + struct xsk_buff_pool *xsk_pool; /* AF_XDP ZC rings */ } ____cacheline_internodealigned_in_smp; /* How many Rx Buffers do we bundle into one write to the hardware ? */ @@ -144,21 +155,17 @@ struct ixgbevf_ring { #define IXGBEVF_MIN_RXD 64 /* Supported Rx Buffer Sizes */ -#define IXGBEVF_RXBUFFER_256 256 /* Used for packet split */ -#define IXGBEVF_RXBUFFER_2048 2048 +#define IXGBEVF_RXBUFFER_256 256 #define IXGBEVF_RXBUFFER_3072 3072 #define IXGBEVF_RX_HDR_SIZE IXGBEVF_RXBUFFER_256 #define MAXIMUM_ETHERNET_VLAN_SIZE (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN) -#define IXGBEVF_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN) -#if (PAGE_SIZE < 8192) -#define IXGBEVF_MAX_FRAME_BUILD_SKB \ - (SKB_WITH_OVERHEAD(IXGBEVF_RXBUFFER_2048) - IXGBEVF_SKB_PAD) -#else -#define IXGBEVF_MAX_FRAME_BUILD_SKB IXGBEVF_RXBUFFER_2048 -#endif +#define IXGBEVF_RX_PAGE_LEN(hr) (ALIGN_DOWN(LIBETH_RX_PAGE_LEN(hr), \ + IXGBE_SRRCTL_BSIZEPKT_STEP)) +#define IXGBEVF_RX_SRRCTL_BUF_SIZE(mtu) (ALIGN((mtu) + LIBETH_RX_LL_LEN, \ + IXGBE_SRRCTL_BSIZEPKT_STEP)) #define IXGBE_TX_FLAGS_CSUM BIT(0) #define IXGBE_TX_FLAGS_VLAN BIT(1) @@ -169,43 +176,6 @@ struct ixgbevf_ring { #define IXGBE_TX_FLAGS_VLAN_PRIO_MASK 0x0000e000 #define IXGBE_TX_FLAGS_VLAN_SHIFT 16 -#define ring_uses_large_buffer(ring) \ - test_bit(__IXGBEVF_RX_3K_BUFFER, &(ring)->state) -#define set_ring_uses_large_buffer(ring) \ - set_bit(__IXGBEVF_RX_3K_BUFFER, &(ring)->state) -#define clear_ring_uses_large_buffer(ring) \ - clear_bit(__IXGBEVF_RX_3K_BUFFER, &(ring)->state) - -#define ring_uses_build_skb(ring) \ - test_bit(__IXGBEVF_RX_BUILD_SKB_ENABLED, &(ring)->state) -#define set_ring_build_skb_enabled(ring) \ - set_bit(__IXGBEVF_RX_BUILD_SKB_ENABLED, &(ring)->state) -#define clear_ring_build_skb_enabled(ring) \ - clear_bit(__IXGBEVF_RX_BUILD_SKB_ENABLED, &(ring)->state) - -static inline unsigned int ixgbevf_rx_bufsz(struct ixgbevf_ring *ring) -{ -#if (PAGE_SIZE < 8192) - if (ring_uses_large_buffer(ring)) - return IXGBEVF_RXBUFFER_3072; - - if (ring_uses_build_skb(ring)) - return IXGBEVF_MAX_FRAME_BUILD_SKB; -#endif - return IXGBEVF_RXBUFFER_2048; -} - -static inline unsigned int ixgbevf_rx_pg_order(struct ixgbevf_ring *ring) -{ -#if (PAGE_SIZE < 8192) - if (ring_uses_large_buffer(ring)) - return 1; -#endif - return 0; -} - -#define ixgbevf_rx_pg_size(_ring) (PAGE_SIZE << ixgbevf_rx_pg_order(_ring)) - #define check_for_tx_hang(ring) \ test_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state) #define set_check_for_tx_hang(ring) \ @@ -377,8 +347,6 @@ struct ixgbevf_adapter { u32 flags; bool link_state; -#define IXGBEVF_FLAGS_LEGACY_RX BIT(1) - #ifdef CONFIG_XFRM struct ixgbevf_ipsec *ipsec; #endif /* CONFIG_XFRM */ @@ -396,6 +364,8 @@ enum ixbgevf_state_t { __IXGBEVF_QUEUE_RESET_REQUESTED, }; +#define IXGBEVF_FLAG_HSPLIT BIT(0) + enum ixgbevf_boards { board_82599_vf, board_82599_vf_hv, @@ -439,14 +409,28 @@ int ixgbevf_open(struct net_device *netdev); int ixgbevf_close(struct net_device *netdev); void ixgbevf_up(struct ixgbevf_adapter *adapter); void ixgbevf_down(struct ixgbevf_adapter *adapter); +void ixgbevf_flush_tx_queue(struct ixgbevf_ring *ring); +void ixgbevf_disable_rx_queue(struct ixgbevf_adapter *adapter, + struct ixgbevf_ring *ring); +void ixgbevf_rx_desc_queue_enable(struct ixgbevf_adapter *adapter, + struct ixgbevf_ring *ring); void ixgbevf_reinit_locked(struct ixgbevf_adapter *adapter); void ixgbevf_reset(struct ixgbevf_adapter *adapter); void ixgbevf_set_ethtool_ops(struct net_device *netdev); int ixgbevf_setup_rx_resources(struct ixgbevf_adapter *adapter, struct ixgbevf_ring *rx_ring); +void ixgbevf_irq_enable(struct ixgbevf_adapter *adapter); +void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter, + struct ixgbevf_ring *ring); int ixgbevf_setup_tx_resources(struct ixgbevf_ring *); +void ixgbevf_configure_tx_ring(struct ixgbevf_adapter *adapter, + struct ixgbevf_ring *ring); void ixgbevf_free_rx_resources(struct ixgbevf_ring *); +void ixgbevf_clean_rx_ring(struct ixgbevf_ring *rx_ring); +void ixgbevf_rx_destroy_pp(struct ixgbevf_ring *rx_ring); void ixgbevf_free_tx_resources(struct ixgbevf_ring *); +void ixgbevf_clean_tx_ring(struct ixgbevf_ring *tx_ring); +void ixgbevf_clean_xdp_ring(struct ixgbevf_ring *xdp_ring); void ixgbevf_update_stats(struct ixgbevf_adapter *adapter); int ethtool_ioctl(struct ifreq *ifr); diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index d5ce20f47def1f..dd6a9f32f309c1 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -30,9 +30,11 @@ #include #include #include +#include #include -#include "ixgbevf.h" +#include "ixgbevf_txrx_lib.h" +#include "ixgbevf_xsk.h" const char ixgbevf_driver_name[] = "ixgbevf"; static const char ixgbevf_driver_string[] = @@ -82,6 +84,8 @@ static const struct pci_device_id ixgbevf_pci_tbl[] = { MODULE_DEVICE_TABLE(pci, ixgbevf_pci_tbl); MODULE_DESCRIPTION("Intel(R) 10 Gigabit Virtual Function Network Driver"); +MODULE_IMPORT_NS("LIBETH"); +MODULE_IMPORT_NS("LIBETH_XDP"); MODULE_LICENSE("GPL v2"); #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK) @@ -112,9 +116,6 @@ static void ixgbevf_service_event_complete(struct ixgbevf_adapter *adapter) static void ixgbevf_queue_reset_subtask(struct ixgbevf_adapter *adapter); static void ixgbevf_set_itr(struct ixgbevf_q_vector *q_vector); static void ixgbevf_free_all_rx_resources(struct ixgbevf_adapter *adapter); -static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer); -static void ixgbevf_reuse_rx_page(struct ixgbevf_ring *rx_ring, - struct ixgbevf_rx_buffer *old_buff); static void ixgbevf_remove_adapter(struct ixgbe_hw *hw) { @@ -306,10 +307,7 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector, total_ipsec++; /* free the skb */ - if (ring_is_xdp(tx_ring)) - page_frag_free(tx_buffer->data); - else - napi_consume_skb(tx_buffer->skb, napi_budget); + napi_consume_skb(tx_buffer->skb, napi_budget); /* unmap skb header data */ dma_unmap_single(tx_ring->dev, @@ -392,9 +390,8 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector, eop_desc, (eop_desc ? eop_desc->wb.status : 0), tx_ring->tx_buffer_info[i].time_stamp, jiffies); - if (!ring_is_xdp(tx_ring)) - netif_stop_subqueue(tx_ring->netdev, - tx_ring->queue_index); + netif_stop_subqueue(tx_ring->netdev, + tx_ring->queue_index); /* schedule immediate reset if we believe we hung */ ixgbevf_tx_timeout_reset(adapter); @@ -402,9 +399,6 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector, return true; } - if (ring_is_xdp(tx_ring)) - return !!budget; - #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) && (ixgbevf_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD))) { @@ -425,227 +419,6 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector, return !!budget; } -/** - * ixgbevf_rx_skb - Helper function to determine proper Rx method - * @q_vector: structure containing interrupt and ring information - * @skb: packet to send up - **/ -static void ixgbevf_rx_skb(struct ixgbevf_q_vector *q_vector, - struct sk_buff *skb) -{ - napi_gro_receive(&q_vector->napi, skb); -} - -#define IXGBE_RSS_L4_TYPES_MASK \ - ((1ul << IXGBE_RXDADV_RSSTYPE_IPV4_TCP) | \ - (1ul << IXGBE_RXDADV_RSSTYPE_IPV4_UDP) | \ - (1ul << IXGBE_RXDADV_RSSTYPE_IPV6_TCP) | \ - (1ul << IXGBE_RXDADV_RSSTYPE_IPV6_UDP)) - -static inline void ixgbevf_rx_hash(struct ixgbevf_ring *ring, - union ixgbe_adv_rx_desc *rx_desc, - struct sk_buff *skb) -{ - u16 rss_type; - - if (!(ring->netdev->features & NETIF_F_RXHASH)) - return; - - rss_type = le16_to_cpu(rx_desc->wb.lower.lo_dword.hs_rss.pkt_info) & - IXGBE_RXDADV_RSSTYPE_MASK; - - if (!rss_type) - return; - - skb_set_hash(skb, le32_to_cpu(rx_desc->wb.lower.hi_dword.rss), - (IXGBE_RSS_L4_TYPES_MASK & (1ul << rss_type)) ? - PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3); -} - -/** - * ixgbevf_rx_checksum - indicate in skb if hw indicated a good cksum - * @ring: structure containig ring specific data - * @rx_desc: current Rx descriptor being processed - * @skb: skb currently being received and modified - **/ -static inline void ixgbevf_rx_checksum(struct ixgbevf_ring *ring, - union ixgbe_adv_rx_desc *rx_desc, - struct sk_buff *skb) -{ - skb_checksum_none_assert(skb); - - /* Rx csum disabled */ - if (!(ring->netdev->features & NETIF_F_RXCSUM)) - return; - - /* if IP and error */ - if (ixgbevf_test_staterr(rx_desc, IXGBE_RXD_STAT_IPCS) && - ixgbevf_test_staterr(rx_desc, IXGBE_RXDADV_ERR_IPE)) { - ring->rx_stats.csum_err++; - return; - } - - if (!ixgbevf_test_staterr(rx_desc, IXGBE_RXD_STAT_L4CS)) - return; - - if (ixgbevf_test_staterr(rx_desc, IXGBE_RXDADV_ERR_TCPE)) { - ring->rx_stats.csum_err++; - return; - } - - /* It must be a TCP or UDP packet with a valid checksum */ - skb->ip_summed = CHECKSUM_UNNECESSARY; -} - -/** - * ixgbevf_process_skb_fields - Populate skb header fields from Rx descriptor - * @rx_ring: rx descriptor ring packet is being transacted on - * @rx_desc: pointer to the EOP Rx descriptor - * @skb: pointer to current skb being populated - * - * This function checks the ring, descriptor, and packet information in - * order to populate the checksum, VLAN, protocol, and other fields within - * the skb. - **/ -static void ixgbevf_process_skb_fields(struct ixgbevf_ring *rx_ring, - union ixgbe_adv_rx_desc *rx_desc, - struct sk_buff *skb) -{ - ixgbevf_rx_hash(rx_ring, rx_desc, skb); - ixgbevf_rx_checksum(rx_ring, rx_desc, skb); - - if (ixgbevf_test_staterr(rx_desc, IXGBE_RXD_STAT_VP)) { - u16 vid = le16_to_cpu(rx_desc->wb.upper.vlan); - unsigned long *active_vlans = netdev_priv(rx_ring->netdev); - - if (test_bit(vid & VLAN_VID_MASK, active_vlans)) - __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); - } - - if (ixgbevf_test_staterr(rx_desc, IXGBE_RXDADV_STAT_SECP)) - ixgbevf_ipsec_rx(rx_ring, rx_desc, skb); - - skb->protocol = eth_type_trans(skb, rx_ring->netdev); -} - -static -struct ixgbevf_rx_buffer *ixgbevf_get_rx_buffer(struct ixgbevf_ring *rx_ring, - const unsigned int size) -{ - struct ixgbevf_rx_buffer *rx_buffer; - - rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; - prefetchw(rx_buffer->page); - - /* we are reusing so sync this buffer for CPU use */ - dma_sync_single_range_for_cpu(rx_ring->dev, - rx_buffer->dma, - rx_buffer->page_offset, - size, - DMA_FROM_DEVICE); - - rx_buffer->pagecnt_bias--; - - return rx_buffer; -} - -static void ixgbevf_put_rx_buffer(struct ixgbevf_ring *rx_ring, - struct ixgbevf_rx_buffer *rx_buffer, - struct sk_buff *skb) -{ - if (ixgbevf_can_reuse_rx_page(rx_buffer)) { - /* hand second half of page back to the ring */ - ixgbevf_reuse_rx_page(rx_ring, rx_buffer); - } else { - if (IS_ERR(skb)) - /* We are not reusing the buffer so unmap it and free - * any references we are holding to it - */ - dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, - ixgbevf_rx_pg_size(rx_ring), - DMA_FROM_DEVICE, - IXGBEVF_RX_DMA_ATTR); - __page_frag_cache_drain(rx_buffer->page, - rx_buffer->pagecnt_bias); - } - - /* clear contents of rx_buffer */ - rx_buffer->page = NULL; -} - -/** - * ixgbevf_is_non_eop - process handling of non-EOP buffers - * @rx_ring: Rx ring being processed - * @rx_desc: Rx descriptor for current buffer - * - * This function updates next to clean. If the buffer is an EOP buffer - * this function exits returning false, otherwise it will place the - * sk_buff in the next buffer to be chained and return true indicating - * that this is in fact a non-EOP buffer. - **/ -static bool ixgbevf_is_non_eop(struct ixgbevf_ring *rx_ring, - union ixgbe_adv_rx_desc *rx_desc) -{ - u32 ntc = rx_ring->next_to_clean + 1; - - /* fetch, update, and store next to clean */ - ntc = (ntc < rx_ring->count) ? ntc : 0; - rx_ring->next_to_clean = ntc; - - prefetch(IXGBEVF_RX_DESC(rx_ring, ntc)); - - if (likely(ixgbevf_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))) - return false; - - return true; -} - -static inline unsigned int ixgbevf_rx_offset(struct ixgbevf_ring *rx_ring) -{ - return ring_uses_build_skb(rx_ring) ? IXGBEVF_SKB_PAD : 0; -} - -static bool ixgbevf_alloc_mapped_page(struct ixgbevf_ring *rx_ring, - struct ixgbevf_rx_buffer *bi) -{ - struct page *page = bi->page; - dma_addr_t dma; - - /* since we are recycling buffers we should seldom need to alloc */ - if (likely(page)) - return true; - - /* alloc new page for storage */ - page = dev_alloc_pages(ixgbevf_rx_pg_order(rx_ring)); - if (unlikely(!page)) { - rx_ring->rx_stats.alloc_rx_page_failed++; - return false; - } - - /* map page for use */ - dma = dma_map_page_attrs(rx_ring->dev, page, 0, - ixgbevf_rx_pg_size(rx_ring), - DMA_FROM_DEVICE, IXGBEVF_RX_DMA_ATTR); - - /* if mapping failed free memory back to system since - * there isn't much point in holding memory we can't use - */ - if (dma_mapping_error(rx_ring->dev, dma)) { - __free_pages(page, ixgbevf_rx_pg_order(rx_ring)); - - rx_ring->rx_stats.alloc_rx_page_failed++; - return false; - } - - bi->dma = dma; - bi->page = page; - bi->page_offset = ixgbevf_rx_offset(rx_ring); - bi->pagecnt_bias = 1; - rx_ring->rx_stats.alloc_rx_page++; - - return true; -} - /** * ixgbevf_alloc_rx_buffers - Replace used receive buffers; packet split * @rx_ring: rx descriptor ring (for a specific queue) to setup buffers on @@ -655,39 +428,48 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_ring *rx_ring, u16 cleaned_count) { union ixgbe_adv_rx_desc *rx_desc; - struct ixgbevf_rx_buffer *bi; - unsigned int i = rx_ring->next_to_use; + const struct libeth_fq_fp fq = { + .pp = rx_ring->pp, + .fqes = rx_ring->rx_fqes, + .truesize = rx_ring->truesize, + .count = rx_ring->count, + }; + const struct libeth_fq_fp hdr_fq = { + .pp = rx_ring->hdr_pp, + .fqes = rx_ring->hdr_fqes, + .truesize = rx_ring->hdr_truesize, + .count = rx_ring->count, + }; + u16 ntu = rx_ring->next_to_use; - /* nothing to do or no valid netdev defined */ - if (!cleaned_count || !rx_ring->netdev) + /* nothing to do or page pool is not present */ + if (unlikely(!cleaned_count || !fq.pp)) return; - rx_desc = IXGBEVF_RX_DESC(rx_ring, i); - bi = &rx_ring->rx_buffer_info[i]; - i -= rx_ring->count; + rx_desc = IXGBEVF_RX_DESC(rx_ring, ntu); do { - if (!ixgbevf_alloc_mapped_page(rx_ring, bi)) + dma_addr_t addr; + + addr = libeth_rx_alloc(&fq, ntu); + if (addr == DMA_MAPPING_ERROR) break; - /* sync the buffer for use by the device */ - dma_sync_single_range_for_device(rx_ring->dev, bi->dma, - bi->page_offset, - ixgbevf_rx_bufsz(rx_ring), - DMA_FROM_DEVICE); + rx_desc->read.pkt_addr = cpu_to_le64(addr); - /* Refresh the desc even if pkt_addr didn't change - * because each write-back erases this info. - */ - rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset); + if (hdr_fq.pp) { + addr = libeth_rx_alloc(&hdr_fq, ntu); + if (addr == DMA_MAPPING_ERROR) { + libeth_rx_recycle_slow(fq.fqes[ntu].netmem); + break; + } + } rx_desc++; - bi++; - i++; - if (unlikely(!i)) { + ntu++; + if (unlikely(ntu == fq.count)) { rx_desc = IXGBEVF_RX_DESC(rx_ring, 0); - bi = rx_ring->rx_buffer_info; - i -= rx_ring->count; + ntu = 0; } /* clear the length for the next_to_use descriptor */ @@ -696,14 +478,9 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_ring *rx_ring, cleaned_count--; } while (cleaned_count); - i += rx_ring->count; - - if (rx_ring->next_to_use != i) { + if (likely(rx_ring->next_to_use != ntu)) { /* record the next descriptor to use */ - rx_ring->next_to_use = i; - - /* update next to alloc since we have filled the ring */ - rx_ring->next_to_alloc = i; + rx_ring->next_to_use = ntu; /* Force memory writes to complete before letting h/w * know there are new descriptors to fetch. (Only @@ -711,206 +488,8 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_ring *rx_ring, * such as IA-64). */ wmb(); - ixgbevf_write_tail(rx_ring, i); - } -} - -/** - * ixgbevf_cleanup_headers - Correct corrupted or empty headers - * @rx_ring: rx descriptor ring packet is being transacted on - * @rx_desc: pointer to the EOP Rx descriptor - * @skb: pointer to current skb being fixed - * - * Check for corrupted packet headers caused by senders on the local L2 - * embedded NIC switch not setting up their Tx Descriptors right. These - * should be very rare. - * - * Also address the case where we are pulling data in on pages only - * and as such no data is present in the skb header. - * - * In addition if skb is not at least 60 bytes we need to pad it so that - * it is large enough to qualify as a valid Ethernet frame. - * - * Returns true if an error was encountered and skb was freed. - **/ -static bool ixgbevf_cleanup_headers(struct ixgbevf_ring *rx_ring, - union ixgbe_adv_rx_desc *rx_desc, - struct sk_buff *skb) -{ - /* verify that the packet does not have any known errors */ - if (unlikely(ixgbevf_test_staterr(rx_desc, - IXGBE_RXDADV_ERR_FRAME_ERR_MASK))) { - struct net_device *netdev = rx_ring->netdev; - - if (!(netdev->features & NETIF_F_RXALL)) { - dev_kfree_skb_any(skb); - return true; - } - } - - /* if eth_skb_pad returns an error the skb was freed */ - if (eth_skb_pad(skb)) - return true; - - return false; -} - -/** - * ixgbevf_reuse_rx_page - page flip buffer and store it back on the ring - * @rx_ring: rx descriptor ring to store buffers on - * @old_buff: donor buffer to have page reused - * - * Synchronizes page for reuse by the adapter - **/ -static void ixgbevf_reuse_rx_page(struct ixgbevf_ring *rx_ring, - struct ixgbevf_rx_buffer *old_buff) -{ - struct ixgbevf_rx_buffer *new_buff; - u16 nta = rx_ring->next_to_alloc; - - new_buff = &rx_ring->rx_buffer_info[nta]; - - /* update, and store next to alloc */ - nta++; - rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; - - /* transfer page from old buffer to new buffer */ - new_buff->page = old_buff->page; - new_buff->dma = old_buff->dma; - new_buff->page_offset = old_buff->page_offset; - new_buff->pagecnt_bias = old_buff->pagecnt_bias; -} - -static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer) -{ - unsigned int pagecnt_bias = rx_buffer->pagecnt_bias; - struct page *page = rx_buffer->page; - - /* avoid re-using remote and pfmemalloc pages */ - if (!dev_page_is_reusable(page)) - return false; - -#if (PAGE_SIZE < 8192) - /* if we are only owner of page we can reuse it */ - if (unlikely((page_ref_count(page) - pagecnt_bias) > 1)) - return false; -#else -#define IXGBEVF_LAST_OFFSET \ - (SKB_WITH_OVERHEAD(PAGE_SIZE) - IXGBEVF_RXBUFFER_2048) - - if (rx_buffer->page_offset > IXGBEVF_LAST_OFFSET) - return false; - -#endif - - /* If we have drained the page fragment pool we need to update - * the pagecnt_bias and page count so that we fully restock the - * number of references the driver holds. - */ - if (unlikely(!pagecnt_bias)) { - page_ref_add(page, USHRT_MAX); - rx_buffer->pagecnt_bias = USHRT_MAX; - } - - return true; -} - -/** - * ixgbevf_add_rx_frag - Add contents of Rx buffer to sk_buff - * @rx_ring: rx descriptor ring to transact packets on - * @rx_buffer: buffer containing page to add - * @skb: sk_buff to place the data into - * @size: size of buffer to be added - * - * This function will add the data contained in rx_buffer->page to the skb. - **/ -static void ixgbevf_add_rx_frag(struct ixgbevf_ring *rx_ring, - struct ixgbevf_rx_buffer *rx_buffer, - struct sk_buff *skb, - unsigned int size) -{ -#if (PAGE_SIZE < 8192) - unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2; -#else - unsigned int truesize = ring_uses_build_skb(rx_ring) ? - SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size) : - SKB_DATA_ALIGN(size); -#endif - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page, - rx_buffer->page_offset, size, truesize); -#if (PAGE_SIZE < 8192) - rx_buffer->page_offset ^= truesize; -#else - rx_buffer->page_offset += truesize; -#endif -} - -static -struct sk_buff *ixgbevf_construct_skb(struct ixgbevf_ring *rx_ring, - struct ixgbevf_rx_buffer *rx_buffer, - struct xdp_buff *xdp, - union ixgbe_adv_rx_desc *rx_desc) -{ - unsigned int size = xdp->data_end - xdp->data; -#if (PAGE_SIZE < 8192) - unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2; -#else - unsigned int truesize = SKB_DATA_ALIGN(xdp->data_end - - xdp->data_hard_start); -#endif - unsigned int headlen; - struct sk_buff *skb; - - /* prefetch first cache line of first page */ - net_prefetch(xdp->data); - - /* Note, we get here by enabling legacy-rx via: - * - * ethtool --set-priv-flags legacy-rx on - * - * In this mode, we currently get 0 extra XDP headroom as - * opposed to having legacy-rx off, where we process XDP - * packets going to stack via ixgbevf_build_skb(). - * - * For ixgbevf_construct_skb() mode it means that the - * xdp->data_meta will always point to xdp->data, since - * the helper cannot expand the head. Should this ever - * changed in future for legacy-rx mode on, then lets also - * add xdp->data_meta handling here. - */ - - /* allocate a skb to store the frags */ - skb = napi_alloc_skb(&rx_ring->q_vector->napi, IXGBEVF_RX_HDR_SIZE); - if (unlikely(!skb)) - return NULL; - - /* Determine available headroom for copy */ - headlen = size; - if (headlen > IXGBEVF_RX_HDR_SIZE) - headlen = eth_get_headlen(skb->dev, xdp->data, - IXGBEVF_RX_HDR_SIZE); - - /* align pull length to size of long to optimize memcpy performance */ - memcpy(__skb_put(skb, headlen), xdp->data, - ALIGN(headlen, sizeof(long))); - - /* update all of the pointers */ - size -= headlen; - if (size) { - skb_add_rx_frag(skb, 0, rx_buffer->page, - (xdp->data + headlen) - - page_address(rx_buffer->page), - size, truesize); -#if (PAGE_SIZE < 8192) - rx_buffer->page_offset ^= truesize; -#else - rx_buffer->page_offset += truesize; -#endif - } else { - rx_buffer->pagecnt_bias++; + ixgbevf_write_tail(rx_ring, ntu); } - - return skb; } static inline void ixgbevf_irq_enable_queues(struct ixgbevf_adapter *adapter, @@ -921,219 +500,135 @@ static inline void ixgbevf_irq_enable_queues(struct ixgbevf_adapter *adapter, IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, qmask); } -static struct sk_buff *ixgbevf_build_skb(struct ixgbevf_ring *rx_ring, - struct ixgbevf_rx_buffer *rx_buffer, - struct xdp_buff *xdp, - union ixgbe_adv_rx_desc *rx_desc) +void ixgbevf_clean_xdp_num(struct ixgbevf_ring *xdp_ring, bool in_napi, + u16 to_clean) { - unsigned int metasize = xdp->data - xdp->data_meta; -#if (PAGE_SIZE < 8192) - unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2; -#else - unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + - SKB_DATA_ALIGN(xdp->data_end - - xdp->data_hard_start); -#endif - struct sk_buff *skb; - - /* Prefetch first cache line of first page. If xdp->data_meta - * is unused, this points to xdp->data, otherwise, we likely - * have a consumer accessing first few bytes of meta data, - * and then actual data. - */ - net_prefetch(xdp->data_meta); - - /* build an skb around the page buffer */ - skb = napi_build_skb(xdp->data_hard_start, truesize); - if (unlikely(!skb)) - return NULL; + struct libeth_xdpsq_napi_stats stats = { }; + bool xsk_ring = ring_is_xsk(xdp_ring); + u32 ntc = xdp_ring->next_to_clean; + struct xdp_frame_bulk cbulk; + struct libeth_cq_pp cp = { + .bq = &cbulk, + .dev = xdp_ring->dev, + .xss = &stats, + .napi = in_napi, + }; + u32 xsk_frames = 0; - /* update pointers within the skb to store the data */ - skb_reserve(skb, xdp->data - xdp->data_hard_start); - __skb_put(skb, xdp->data_end - xdp->data); - if (metasize) - skb_metadata_set(skb, metasize); + xdp_frame_bulk_init(&cbulk); + xdp_ring->pending -= to_clean; - /* update buffer offset */ -#if (PAGE_SIZE < 8192) - rx_buffer->page_offset ^= truesize; -#else - rx_buffer->page_offset += truesize; -#endif + while (likely(to_clean--)) { + xsk_frames += xsk_ring && + likely(!xdp_ring->xdp_sqes[ntc].type) ? 1 : 0; + libeth_xdp_complete_tx(&xdp_ring->xdp_sqes[ntc], &cp); + ntc++; + ntc = unlikely(ntc == xdp_ring->count) ? 0 : ntc; + } - return skb; + xdp_ring->next_to_clean = ntc; + xdp_flush_frame_bulk(&cbulk); + if (xsk_frames) + xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames); } -#define IXGBEVF_XDP_PASS 0 -#define IXGBEVF_XDP_CONSUMED 1 -#define IXGBEVF_XDP_TX 2 - -static int ixgbevf_xmit_xdp_ring(struct ixgbevf_ring *ring, - struct xdp_buff *xdp) +void ixgbevf_clean_xdp_ring(struct ixgbevf_ring *xdp_ring) { - struct ixgbevf_tx_buffer *tx_buffer; - union ixgbe_adv_tx_desc *tx_desc; - u32 len, cmd_type; - dma_addr_t dma; - u16 i; - - len = xdp->data_end - xdp->data; + ixgbevf_clean_xdp_num(xdp_ring, false, xdp_ring->pending); + libeth_xdpsq_put(&xdp_ring->xdpq_lock, xdp_ring->netdev); +} - if (unlikely(!ixgbevf_desc_unused(ring))) - return IXGBEVF_XDP_CONSUMED; +static void ixgbevf_xdp_xmit_desc(struct libeth_xdp_tx_desc desc, u32 i, + const struct libeth_xdpsq *sq, + u64 priv) +{ + union ixgbe_adv_tx_desc *tx_desc = + &((union ixgbe_adv_tx_desc *)sq->descs)[i]; - dma = dma_map_single(ring->dev, xdp->data, len, DMA_TO_DEVICE); - if (dma_mapping_error(ring->dev, dma)) - return IXGBEVF_XDP_CONSUMED; + u32 cmd_type = IXGBE_ADVTXD_DTYP_DATA | + IXGBE_ADVTXD_DCMD_DEXT | + IXGBE_ADVTXD_DCMD_IFCS | + desc.len; - /* record the location of the first descriptor for this packet */ - i = ring->next_to_use; - tx_buffer = &ring->tx_buffer_info[i]; - - dma_unmap_len_set(tx_buffer, len, len); - dma_unmap_addr_set(tx_buffer, dma, dma); - tx_buffer->data = xdp->data; - tx_buffer->bytecount = len; - tx_buffer->gso_segs = 1; - tx_buffer->protocol = 0; - - /* Populate minimal context descriptor that will provide for the - * fact that we are expected to process Ethernet frames. - */ - if (!test_bit(__IXGBEVF_TX_XDP_RING_PRIMED, &ring->state)) { - struct ixgbe_adv_tx_context_desc *context_desc; + if (desc.flags & LIBETH_XDP_TX_LAST) + cmd_type |= IXGBE_TXD_CMD_EOP; - set_bit(__IXGBEVF_TX_XDP_RING_PRIMED, &ring->state); + if (desc.flags & LIBETH_XDP_TX_FIRST) { + struct libeth_sqe *sqe = &sq->sqes[i]; + struct skb_shared_info *sinfo; + u16 full_len = desc.len; - context_desc = IXGBEVF_TX_CTXTDESC(ring, 0); - context_desc->vlan_macip_lens = - cpu_to_le32(ETH_HLEN << IXGBE_ADVTXD_MACLEN_SHIFT); - context_desc->fceof_saidx = 0; - context_desc->type_tucmd_mlhl = - cpu_to_le32(IXGBE_TXD_CMD_DEXT | - IXGBE_ADVTXD_DTYP_CTXT); - context_desc->mss_l4len_idx = 0; + if (desc.flags & LIBETH_XDP_TX_MULTI) { + sinfo = sqe->type == LIBETH_SQE_XDP_TX ? + sqe->sinfo : + xdp_get_shared_info_from_frame(sqe->xdpf); + full_len += sinfo->xdp_frags_size; + } - i = 1; + tx_desc->read.olinfo_status = + cpu_to_le32((full_len << IXGBE_ADVTXD_PAYLEN_SHIFT) | + IXGBE_ADVTXD_CC); } - /* put descriptor type bits */ - cmd_type = IXGBE_ADVTXD_DTYP_DATA | - IXGBE_ADVTXD_DCMD_DEXT | - IXGBE_ADVTXD_DCMD_IFCS; - cmd_type |= len | IXGBE_TXD_CMD; - - tx_desc = IXGBEVF_TX_DESC(ring, i); - tx_desc->read.buffer_addr = cpu_to_le64(dma); - + tx_desc->read.buffer_addr = cpu_to_le64(desc.addr); tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); - tx_desc->read.olinfo_status = - cpu_to_le32((len << IXGBE_ADVTXD_PAYLEN_SHIFT) | - IXGBE_ADVTXD_CC); - - /* Avoid any potential race with cleanup */ - smp_wmb(); - - /* set next_to_watch value indicating a packet is present */ - i++; - if (i == ring->count) - i = 0; - - tx_buffer->next_to_watch = tx_desc; - ring->next_to_use = i; - - return IXGBEVF_XDP_TX; } -static int ixgbevf_run_xdp(struct ixgbevf_adapter *adapter, - struct ixgbevf_ring *rx_ring, - struct xdp_buff *xdp) -{ - int result = IXGBEVF_XDP_PASS; - struct ixgbevf_ring *xdp_ring; - struct bpf_prog *xdp_prog; - u32 act; - - xdp_prog = READ_ONCE(rx_ring->xdp_prog); +LIBETH_XDP_DEFINE_START(); +LIBETH_XDP_DEFINE_FLUSH_TX(static ixgbevf_xdp_flush_tx, ixgbevf_prep_xdp_sq, + ixgbevf_xdp_xmit_desc); +LIBETH_XDP_DEFINE_FLUSH_XMIT(static ixgbevf_xdp_flush_xmit, ixgbevf_prep_xdp_sq, + ixgbevf_xdp_xmit_desc); +LIBETH_XDP_DEFINE_RUN_PROG(static ixgbevf_xdp_run_prog, ixgbevf_xdp_flush_tx); +LIBETH_XDP_DEFINE_FINALIZE(static ixgbevf_xdp_finalize_xdp_napi, + ixgbevf_xdp_flush_tx, ixgbevf_xdp_rs_and_bump); +LIBETH_XDP_DEFINE_END(); - if (!xdp_prog) - goto xdp_out; +static u32 ixgbevf_rx_hsplit_wa(const struct libeth_fqe *hdr, + struct libeth_fqe *buf, u32 data_len) +{ + u32 copy = data_len <= L1_CACHE_BYTES ? data_len : ETH_HLEN; + struct page *hdr_page, *buf_page; + const void *src; + void *dst; - act = bpf_prog_run_xdp(xdp_prog, xdp); - switch (act) { - case XDP_PASS: - break; - case XDP_TX: - xdp_ring = adapter->xdp_ring[rx_ring->queue_index]; - result = ixgbevf_xmit_xdp_ring(xdp_ring, xdp); - if (result == IXGBEVF_XDP_CONSUMED) - goto out_failure; - break; - default: - bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, act); - fallthrough; - case XDP_ABORTED: -out_failure: - trace_xdp_exception(rx_ring->netdev, xdp_prog, act); - fallthrough; /* handle aborts by dropping packet */ - case XDP_DROP: - result = IXGBEVF_XDP_CONSUMED; - break; - } -xdp_out: - return result; -} + if (unlikely(netmem_is_net_iov(buf->netmem)) || + !libeth_rx_sync_for_cpu(buf, copy)) + return 0; -static unsigned int ixgbevf_rx_frame_truesize(struct ixgbevf_ring *rx_ring, - unsigned int size) -{ - unsigned int truesize; + hdr_page = __netmem_to_page(hdr->netmem); + buf_page = __netmem_to_page(buf->netmem); -#if (PAGE_SIZE < 8192) - truesize = ixgbevf_rx_pg_size(rx_ring) / 2; /* Must be power-of-2 */ -#else - truesize = ring_uses_build_skb(rx_ring) ? - SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size) + - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) : - SKB_DATA_ALIGN(size); -#endif - return truesize; -} + dst = page_address(hdr_page) + hdr->offset + + pp_page_to_nmdesc(hdr_page)->pp->p.offset; + src = page_address(buf_page) + buf->offset + + pp_page_to_nmdesc(buf_page)->pp->p.offset; -static void ixgbevf_rx_buffer_flip(struct ixgbevf_ring *rx_ring, - struct ixgbevf_rx_buffer *rx_buffer, - unsigned int size) -{ - unsigned int truesize = ixgbevf_rx_frame_truesize(rx_ring, size); + memcpy(dst, src, LARGEST_ALIGN(copy)); + buf->offset += copy; -#if (PAGE_SIZE < 8192) - rx_buffer->page_offset ^= truesize; -#else - rx_buffer->page_offset += truesize; -#endif + return copy; } static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, struct ixgbevf_ring *rx_ring, int budget) { - unsigned int total_rx_bytes = 0, total_rx_packets = 0, frame_sz = 0; + unsigned int total_rx_bytes = 0, total_rx_packets = 0; struct ixgbevf_adapter *adapter = q_vector->adapter; u16 cleaned_count = ixgbevf_desc_unused(rx_ring); - struct sk_buff *skb = rx_ring->skb; - bool xdp_xmit = false; - struct xdp_buff xdp; - int xdp_res = 0; - - /* Frame size depend on rx_ring setup when PAGE_SIZE=4K */ -#if (PAGE_SIZE < 8192) - frame_sz = ixgbevf_rx_frame_truesize(rx_ring, 0); -#endif - xdp_init_buff(&xdp, frame_sz, &rx_ring->xdp_rxq); + LIBETH_XDP_ONSTACK_BULK(xdp_tx_bulk); + LIBETH_XDP_ONSTACK_BUFF(xdp); + + libeth_xdp_init_buff(xdp, &rx_ring->xdp_stash, &rx_ring->xdp_rxq); + libeth_xdp_tx_init_bulk(&xdp_tx_bulk, rx_ring->xdp_prog, + adapter->netdev, adapter->xdp_ring, + adapter->num_xdp_queues); while (likely(total_rx_packets < budget)) { - struct ixgbevf_rx_buffer *rx_buffer; union ixgbe_adv_rx_desc *rx_desc; + struct libeth_fqe *rx_buffer; + struct sk_buff *skb; unsigned int size; /* return some buffers to hardware, one at a time is too slow */ @@ -1144,7 +639,7 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, rx_desc = IXGBEVF_RX_DESC(rx_ring, rx_ring->next_to_clean); size = le16_to_cpu(rx_desc->wb.upper.length); - if (!size) + if (unlikely(!size)) break; /* This memory barrier is needed to keep us from reading @@ -1153,73 +648,62 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, */ rmb(); - rx_buffer = ixgbevf_get_rx_buffer(rx_ring, size); + rx_buffer = &rx_ring->rx_fqes[rx_ring->next_to_clean]; - /* retrieve a buffer from the ring */ - if (!skb) { - unsigned int offset = ixgbevf_rx_offset(rx_ring); - unsigned char *hard_start; + if (unlikely(rx_ring->hdr_pp)) { + struct libeth_fqe *hdr_buff; + unsigned int hdr_size = 0; - hard_start = page_address(rx_buffer->page) + - rx_buffer->page_offset - offset; - xdp_prepare_buff(&xdp, hard_start, offset, size, true); -#if (PAGE_SIZE > 4096) - /* At larger PAGE_SIZE, frame_sz depend on len size */ - xdp.frame_sz = ixgbevf_rx_frame_truesize(rx_ring, size); -#endif - xdp_res = ixgbevf_run_xdp(adapter, rx_ring, &xdp); - } + hdr_buff = &rx_ring->hdr_fqes[rx_ring->next_to_clean]; - if (xdp_res) { - if (xdp_res == IXGBEVF_XDP_TX) { - xdp_xmit = true; - ixgbevf_rx_buffer_flip(rx_ring, rx_buffer, - size); - } else { - rx_buffer->pagecnt_bias++; + if (!xdp->data) { + hdr_size = ixgbevf_rx_hsplit_wa(hdr_buff, + rx_buffer, + size); + size -= hdr_size ? : size; } - total_rx_packets++; - total_rx_bytes += size; - } else if (skb) { - ixgbevf_add_rx_frag(rx_ring, rx_buffer, skb, size); - } else if (ring_uses_build_skb(rx_ring)) { - skb = ixgbevf_build_skb(rx_ring, rx_buffer, - &xdp, rx_desc); - } else { - skb = ixgbevf_construct_skb(rx_ring, rx_buffer, - &xdp, rx_desc); - } - /* exit if we failed to retrieve a buffer */ - if (!xdp_res && !skb) { - rx_ring->rx_stats.alloc_rx_buff_failed++; - rx_buffer->pagecnt_bias++; - break; + libeth_xdp_process_buff(xdp, hdr_buff, hdr_size); } - ixgbevf_put_rx_buffer(rx_ring, rx_buffer, skb); - cleaned_count++; + libeth_xdp_process_buff(xdp, rx_buffer, size); + cleaned_count++; /* fetch next buffer in frame if non-eop */ if (ixgbevf_is_non_eop(rx_ring, rx_desc)) continue; + total_rx_packets++; + total_rx_bytes += xdp_get_buff_len(&xdp->base); + + if (xdp_tx_bulk.prog && + !ixgbevf_xdp_run_prog(xdp, &xdp_tx_bulk)) + continue; + + skb = xdp_build_skb_from_buff(&xdp->base); + + /* exit if we failed to retrieve a buffer */ + if (unlikely(!skb)) { + libeth_xdp_return_buff_slow(xdp); + rx_ring->rx_stats.alloc_rx_buff_failed++; + break; + } + + xdp->data = NULL; + /* verify the packet layout is correct */ - if (xdp_res || ixgbevf_cleanup_headers(rx_ring, rx_desc, skb)) { + if (unlikely(ixgbevf_cleanup_headers(rx_ring, rx_desc, skb))) { skb = NULL; continue; } - /* probably a little skewed due to removing CRC */ - total_rx_bytes += skb->len; - /* Workaround hardware that can't do proper VEPA multicast * source pruning. */ - if ((skb->pkt_type == PACKET_BROADCAST || - skb->pkt_type == PACKET_MULTICAST) && - ether_addr_equal(rx_ring->netdev->dev_addr, - eth_hdr(skb)->h_source)) { + if (unlikely((skb->pkt_type == PACKET_BROADCAST || + skb->pkt_type == PACKET_MULTICAST) && + ether_addr_equal(rx_ring->netdev->dev_addr, + eth_hdr(skb)->h_source))) { dev_kfree_skb_irq(skb); continue; } @@ -1227,28 +711,13 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, /* populate checksum, VLAN, and protocol */ ixgbevf_process_skb_fields(rx_ring, rx_desc, skb); - ixgbevf_rx_skb(q_vector, skb); - - /* reset skb pointer */ - skb = NULL; - - /* update budget accounting */ - total_rx_packets++; + napi_gro_receive(&q_vector->napi, skb); } /* place incomplete frames back on ring for completion */ - rx_ring->skb = skb; - - if (xdp_xmit) { - struct ixgbevf_ring *xdp_ring = - adapter->xdp_ring[rx_ring->queue_index]; + libeth_xdp_save_buff(&rx_ring->xdp_stash, xdp); - /* Force memory writes to complete before letting h/w - * know there are new descriptors to fetch. - */ - wmb(); - ixgbevf_write_tail(xdp_ring, xdp_ring->next_to_use); - } + ixgbevf_xdp_finalize_xdp_napi(&xdp_tx_bulk); u64_stats_update_begin(&rx_ring->syncp); rx_ring->stats.packets += total_rx_packets; @@ -1260,6 +729,23 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, return total_rx_packets; } +static int ixgbevf_xdp_xmit(struct net_device *dev, int n, + struct xdp_frame **frames, u32 flags) +{ + struct ixgbevf_adapter *adapter = netdev_priv(dev); + + if (unlikely(test_bit(__IXGBEVF_DOWN, &adapter->state))) + return -ENETDOWN; + + if (unlikely(!adapter->num_xdp_queues)) + return -ENXIO; + + return libeth_xdp_xmit_do_bulk(dev, n, frames, flags, adapter->xdp_ring, + adapter->num_xdp_queues, + ixgbevf_xdp_flush_xmit, + ixgbevf_xdp_rs_and_bump); +} + /** * ixgbevf_poll - NAPI polling calback * @napi: napi struct with our devices info in it @@ -1278,8 +764,13 @@ static int ixgbevf_poll(struct napi_struct *napi, int budget) bool clean_complete = true; ixgbevf_for_each_ring(ring, q_vector->tx) { - if (!ixgbevf_clean_tx_irq(q_vector, ring, budget)) - clean_complete = false; + if (ring_is_xsk(ring)) + clean_complete &= + ixgbevf_clean_xsk_tx_irq(q_vector, ring, + budget); + else if (!ring_is_xdp(ring)) + clean_complete &= + ixgbevf_clean_tx_irq(q_vector, ring, budget); } if (budget <= 0) @@ -1294,7 +785,10 @@ static int ixgbevf_poll(struct napi_struct *napi, int budget) per_ring_budget = budget; ixgbevf_for_each_ring(ring, q_vector->rx) { - int cleaned = ixgbevf_clean_rx_irq(q_vector, ring, + int cleaned = ring_is_xsk(ring) ? + ixgbevf_clean_xsk_rx_irq(q_vector, ring, + per_ring_budget) : + ixgbevf_clean_rx_irq(q_vector, ring, per_ring_budget); work_done += cleaned; if (cleaned >= per_ring_budget) @@ -1666,7 +1160,7 @@ static inline void ixgbevf_irq_disable(struct ixgbevf_adapter *adapter) * ixgbevf_irq_enable - Enable default interrupt generation settings * @adapter: board private structure **/ -static inline void ixgbevf_irq_enable(struct ixgbevf_adapter *adapter) +void ixgbevf_irq_enable(struct ixgbevf_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; @@ -1675,6 +1169,24 @@ static inline void ixgbevf_irq_enable(struct ixgbevf_adapter *adapter) IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, adapter->eims_enable_mask); } +/** + * ixgbevf_xsk_pool_from_q - get ZC XSK buffer pool bound to a queue ID + * @ring: Rx or Tx ring + * + * Return: A pointer to xsk_buff_pool structure if there is a buffer pool + * attached, configured as zero-copy, and usable by this queue, NULL otherwise. + */ +static struct xsk_buff_pool *ixgbevf_xsk_pool_from_q(struct ixgbevf_ring *ring) +{ + struct xsk_buff_pool *pool = + xsk_get_pool_from_qid(ring->netdev, ring->queue_index); + + if (!READ_ONCE(ring->xdp_prog) && !ring_is_xdp(ring)) + return NULL; + + return (pool && pool->dev) ? pool : NULL; +} + /** * ixgbevf_configure_tx_ring - Configure 82599 VF Tx ring after Reset * @adapter: board private structure @@ -1682,8 +1194,8 @@ static inline void ixgbevf_irq_enable(struct ixgbevf_adapter *adapter) * * Configure the Tx descriptor ring after a reset. **/ -static void ixgbevf_configure_tx_ring(struct ixgbevf_adapter *adapter, - struct ixgbevf_ring *ring) +void ixgbevf_configure_tx_ring(struct ixgbevf_adapter *adapter, + struct ixgbevf_ring *ring) { struct ixgbe_hw *hw = &adapter->hw; u64 tdba = ring->dma; @@ -1717,6 +1229,8 @@ static void ixgbevf_configure_tx_ring(struct ixgbevf_adapter *adapter, /* reset ntu and ntc to place SW in sync with hardwdare */ ring->next_to_clean = 0; ring->next_to_use = 0; + ring->pending = 0; + ring->cached_ntu = 0; /* In order to avoid issues WTHRESH + PTHRESH should always be equal * to or less than the number of on chip descriptors, which is @@ -1729,8 +1243,24 @@ static void ixgbevf_configure_tx_ring(struct ixgbevf_adapter *adapter, 32; /* PTHRESH = 32 */ /* reinitialize tx_buffer_info */ - memset(ring->tx_buffer_info, 0, - sizeof(struct ixgbevf_tx_buffer) * ring->count); + if (!ring_is_xdp(ring)) { + memset(ring->tx_buffer_info, 0, + sizeof(struct ixgbevf_tx_buffer) * ring->count); + } else { + memset(ring->xdp_sqes, 0, + sizeof(struct libeth_sqe) * ring->count); + libeth_xdpsq_get(&ring->xdpq_lock, ring->netdev, + num_possible_cpus() > adapter->num_xdp_queues); + } + + ring->xsk_pool = ixgbevf_xsk_pool_from_q(ring); + if (ring_is_xdp(ring) && ring->xsk_pool) + set_ring_xsk(ring); + else + clear_ring_xsk(ring); + + ring->thresh = ring_is_xsk(ring) ? IXGBEVF_XSK_TX_CLEAN_THRESH(ring) : + XDP_BULK_QUEUE_SIZE; clear_bit(__IXGBEVF_HANG_CHECK_ARMED, &ring->state); clear_bit(__IXGBEVF_TX_XDP_RING_PRIMED, &ring->state); @@ -1766,7 +1296,8 @@ static void ixgbevf_configure_tx(struct ixgbevf_adapter *adapter) #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2 static void ixgbevf_configure_srrctl(struct ixgbevf_adapter *adapter, - struct ixgbevf_ring *ring, int index) + struct ixgbevf_ring *ring, int index, + bool rlpml_valid) { struct ixgbe_hw *hw = &adapter->hw; u32 srrctl; @@ -1774,10 +1305,11 @@ static void ixgbevf_configure_srrctl(struct ixgbevf_adapter *adapter, srrctl = IXGBE_SRRCTL_DROP_EN; srrctl |= IXGBEVF_RX_HDR_SIZE << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT; - if (ring_uses_large_buffer(ring)) - srrctl |= IXGBEVF_RXBUFFER_3072 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; + if (rlpml_valid) + srrctl |= DIV_ROUND_UP(ring->rx_buf_len, + IXGBE_SRRCTL_BSIZEPKT_STEP); else - srrctl |= IXGBEVF_RXBUFFER_2048 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; + srrctl |= ring->rx_buf_len / IXGBE_SRRCTL_BSIZEPKT_STEP; srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF; IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(index), srrctl); @@ -1799,8 +1331,8 @@ static void ixgbevf_setup_psrtype(struct ixgbevf_adapter *adapter) } #define IXGBEVF_MAX_RX_DESC_POLL 10 -static void ixgbevf_disable_rx_queue(struct ixgbevf_adapter *adapter, - struct ixgbevf_ring *ring) +void ixgbevf_disable_rx_queue(struct ixgbevf_adapter *adapter, + struct ixgbevf_ring *ring) { struct ixgbe_hw *hw = &adapter->hw; int wait_loop = IXGBEVF_MAX_RX_DESC_POLL; @@ -1824,10 +1356,15 @@ static void ixgbevf_disable_rx_queue(struct ixgbevf_adapter *adapter, if (!wait_loop) pr_err("RXDCTL.ENABLE queue %d not cleared while polling\n", reg_idx); + + /* Specification calls for 100 usec of delay after + * RXDCTL.ENABLE is cleared + */ + usleep_range(100, 200); } -static void ixgbevf_rx_desc_queue_enable(struct ixgbevf_adapter *adapter, - struct ixgbevf_ring *ring) +void ixgbevf_rx_desc_queue_enable(struct ixgbevf_adapter *adapter, + struct ixgbevf_ring *ring) { struct ixgbe_hw *hw = &adapter->hw; int wait_loop = IXGBEVF_MAX_RX_DESC_POLL; @@ -1903,14 +1440,163 @@ static void ixgbevf_setup_vfmrqc(struct ixgbevf_adapter *adapter) IXGBE_WRITE_REG(hw, IXGBE_VFMRQC, vfmrqc); } -static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter, - struct ixgbevf_ring *ring) +void ixgbevf_rx_destroy_pp(struct ixgbevf_ring *rx_ring) +{ + struct libeth_fq fq = { + .pp = rx_ring->pp, + .fqes = rx_ring->rx_fqes, + }; + + if (!fq.pp && !rx_ring->xsk_fqes) + return; + + if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) { + xdp_rxq_info_detach_mem_model(&rx_ring->xdp_rxq); + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); + } + + if (test_and_clear_bit(__IXGBEVF_RXTX_XSK_RING, &rx_ring->state)) { + struct libeth_xskfq xskfq = { + .fqes = rx_ring->xsk_fqes, + }; + + libeth_xskfq_destroy(&xskfq); + rx_ring->xsk_fqes = NULL; + rx_ring->pending = xskfq.pending; + rx_ring->thresh = xskfq.thresh; + rx_ring->rx_buf_len = xskfq.buf_len; + rx_ring->xsk_pool = NULL; + + return; + } + + libeth_rx_fq_destroy(&fq); + rx_ring->rx_fqes = NULL; + rx_ring->pp = NULL; + + if (!rx_ring->hdr_pp) + return; + + fq = (struct libeth_fq) { + .pp = rx_ring->hdr_pp, + .fqes = rx_ring->hdr_fqes, + }; + + libeth_rx_fq_destroy(&fq); + rx_ring->hdr_fqes = NULL; + rx_ring->hdr_pp = NULL; +} + +static int ixgbevf_rx_create_pp(struct ixgbevf_ring *rx_ring) +{ + u32 adapter_flags = rx_ring->q_vector->adapter->flags; + struct libeth_fq fq = { + .count = rx_ring->count, + .nid = NUMA_NO_NODE, + .type = LIBETH_FQE_MTU, + .xdp = !!rx_ring->xdp_prog, + .buf_len = IXGBEVF_RX_PAGE_LEN(rx_ring->xdp_prog ? + LIBETH_XDP_HEADROOM : + LIBETH_SKB_HEADROOM), + }; + struct xsk_buff_pool *pool; + u32 frame_size; + int ret; + + pool = ixgbevf_xsk_pool_from_q(rx_ring); + if (pool) { + u32 frag_sz = xsk_pool_get_rx_frag_step(pool); + struct libeth_xskfq xskfq = { + .nid = numa_node_id(), + .count = rx_ring->count, + .pool = pool, + }; + + ret = libeth_xskfq_create(&xskfq); + if (ret) + return ret; + + rx_ring->xsk_pool = xskfq.pool; + rx_ring->xsk_fqes = xskfq.fqes; + rx_ring->pending = xskfq.count - 1; + rx_ring->thresh = xskfq.thresh; + rx_ring->rx_buf_len = xskfq.buf_len; + set_ring_xsk(rx_ring); + + ret = __xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev, + rx_ring->queue_index, 0, frag_sz); + if (ret) + goto err; + + ret = xdp_rxq_info_reg_mem_model(&rx_ring->xdp_rxq, + MEM_TYPE_XSK_BUFF_POOL, + rx_ring->xsk_pool); + if (ret) + goto err; + + return 0; + } + + /* Some HW requires DMA write sizes to be aligned to 1K, + * which warrants fake header split usage, but this is + * not an issue if the frame size is at its maximum of 3K + */ + frame_size = + IXGBEVF_RX_SRRCTL_BUF_SIZE(READ_ONCE(rx_ring->netdev->mtu)); + fq.hsplit = (adapter_flags & IXGBEVF_FLAG_HSPLIT) && + frame_size < fq.buf_len; + ret = libeth_rx_fq_create(&fq, &rx_ring->q_vector->napi); + if (ret) + return ret; + + rx_ring->pp = fq.pp; + rx_ring->rx_fqes = fq.fqes; + rx_ring->truesize = fq.truesize; + rx_ring->rx_buf_len = fq.buf_len; + + /* XDP RX-queue info */ + ret = __xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev, + rx_ring->queue_index, 0, rx_ring->truesize); + if (ret) + goto err; + + xdp_rxq_info_attach_page_pool(&rx_ring->xdp_rxq, rx_ring->pp); + + if (!fq.hsplit) + return 0; + + fq = (struct libeth_fq) { + .count = rx_ring->count, + .nid = NUMA_NO_NODE, + .type = LIBETH_FQE_HDR, + .xdp = !!rx_ring->xdp_prog, + }; + + ret = libeth_rx_fq_create(&fq, &rx_ring->q_vector->napi); + if (ret) + goto err; + + rx_ring->hdr_pp = fq.pp; + rx_ring->hdr_fqes = fq.fqes; + rx_ring->hdr_truesize = fq.truesize; + + return 0; + +err: + ixgbevf_rx_destroy_pp(rx_ring); + return ret; +} + +void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter, + struct ixgbevf_ring *ring) { struct ixgbe_hw *hw = &adapter->hw; union ixgbe_adv_rx_desc *rx_desc; + u8 reg_idx = ring->reg_idx; + bool rlpml_valid = false; u64 rdba = ring->dma; u32 rxdctl; - u8 reg_idx = ring->reg_idx; + int err; /* disable queue to avoid issues while updating state */ rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(reg_idx)); @@ -1936,10 +1622,6 @@ static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter, IXGBE_WRITE_REG(hw, IXGBE_VFRDT(reg_idx), 0); ring->tail = adapter->io_addr + IXGBE_VFRDT(reg_idx); - /* initialize rx_buffer_info */ - memset(ring->rx_buffer_info, 0, - sizeof(struct ixgbevf_rx_buffer) * ring->count); - /* initialize Rx descriptor 0 */ rx_desc = IXGBEVF_RX_DESC(ring, 0); rx_desc->wb.upper.length = 0; @@ -1947,53 +1629,40 @@ static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter, /* reset ntu and ntc to place SW in sync with hardwdare */ ring->next_to_clean = 0; ring->next_to_use = 0; - ring->next_to_alloc = 0; + ring->pending = ixgbevf_desc_unused(ring); - ixgbevf_configure_srrctl(adapter, ring, reg_idx); + err = ixgbevf_rx_create_pp(ring); + if (err) { + netdev_err(ring->netdev, + "Failed to create Page Pool for buffer allocation: (%pe), RxQ %d is disabled, driver reload may be needed\n", + ERR_PTR(err), ring->queue_index); + return; + } /* RXDCTL.RLPML does not work on 82599 */ if (adapter->hw.mac.type != ixgbe_mac_82599_vf) { - rxdctl &= ~(IXGBE_RXDCTL_RLPMLMASK | - IXGBE_RXDCTL_RLPML_EN); - -#if (PAGE_SIZE < 8192) - /* Limit the maximum frame size so we don't overrun the skb */ - if (ring_uses_build_skb(ring) && - !ring_uses_large_buffer(ring)) - rxdctl |= IXGBEVF_MAX_FRAME_BUILD_SKB | - IXGBE_RXDCTL_RLPML_EN; -#endif + u32 pkt_len = + READ_ONCE(adapter->netdev->mtu) + LIBETH_RX_LL_LEN; + + rxdctl &= ~(IXGBE_RXDCTL_RLPMLMASK | IXGBE_RXDCTL_RLPML_EN); + if (pkt_len <= IXGBE_RXDCTL_RLPMLMASK) { + rxdctl |= pkt_len | IXGBE_RXDCTL_RLPML_EN; + if (pkt_len <= ring->rx_buf_len) + rlpml_valid = true; + } } + ixgbevf_configure_srrctl(adapter, ring, reg_idx, rlpml_valid); + rxdctl |= IXGBE_RXDCTL_ENABLE | IXGBE_RXDCTL_VME; IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(reg_idx), rxdctl); ixgbevf_rx_desc_queue_enable(adapter, ring); - ixgbevf_alloc_rx_buffers(ring, ixgbevf_desc_unused(ring)); -} - -static void ixgbevf_set_rx_buffer_len(struct ixgbevf_adapter *adapter, - struct ixgbevf_ring *rx_ring) -{ - struct net_device *netdev = adapter->netdev; - unsigned int max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN; - - /* set build_skb and buffer size flags */ - clear_ring_build_skb_enabled(rx_ring); - clear_ring_uses_large_buffer(rx_ring); - if (adapter->flags & IXGBEVF_FLAGS_LEGACY_RX) - return; - - if (PAGE_SIZE < 8192) - if (max_frame > IXGBEVF_MAX_FRAME_BUILD_SKB) - set_ring_uses_large_buffer(rx_ring); - - /* 82599 can't rely on RXDCTL.RLPML to restrict the size of the frame */ - if (adapter->hw.mac.type == ixgbe_mac_82599_vf && !ring_uses_large_buffer(rx_ring)) - return; - - set_ring_build_skb_enabled(rx_ring); + if (ring_is_xsk(ring)) + ixgbevf_xsk_alloc_rx_bufs(ring, ring->pending); + else + ixgbevf_alloc_rx_buffers(ring, ring->pending); } /** @@ -2026,7 +1695,6 @@ static void ixgbevf_configure_rx(struct ixgbevf_adapter *adapter) for (i = 0; i < adapter->num_rx_queues; i++) { struct ixgbevf_ring *rx_ring = adapter->rx_ring[i]; - ixgbevf_set_rx_buffer_len(adapter, rx_ring); ixgbevf_configure_rx_ring(adapter, rx_ring); } } @@ -2383,56 +2051,41 @@ void ixgbevf_up(struct ixgbevf_adapter *adapter) * ixgbevf_clean_rx_ring - Free Rx Buffers per Queue * @rx_ring: ring to free buffers from **/ -static void ixgbevf_clean_rx_ring(struct ixgbevf_ring *rx_ring) +void ixgbevf_clean_rx_ring(struct ixgbevf_ring *rx_ring) { - u16 i = rx_ring->next_to_clean; + if (ring_is_xsk(rx_ring)) { + ixgbevf_rx_xsk_ring_free_buffs(rx_ring); + goto reset; + } /* Free Rx ring sk_buff */ - if (rx_ring->skb) { - dev_kfree_skb(rx_ring->skb); - rx_ring->skb = NULL; - } + libeth_xdp_return_stash(&rx_ring->xdp_stash); /* Free all the Rx ring pages */ - while (i != rx_ring->next_to_alloc) { - struct ixgbevf_rx_buffer *rx_buffer; - - rx_buffer = &rx_ring->rx_buffer_info[i]; - - /* Invalidate cache lines that may have been written to by - * device so that we avoid corrupting memory. - */ - dma_sync_single_range_for_cpu(rx_ring->dev, - rx_buffer->dma, - rx_buffer->page_offset, - ixgbevf_rx_bufsz(rx_ring), - DMA_FROM_DEVICE); - - /* free resources associated with mapping */ - dma_unmap_page_attrs(rx_ring->dev, - rx_buffer->dma, - ixgbevf_rx_pg_size(rx_ring), - DMA_FROM_DEVICE, - IXGBEVF_RX_DMA_ATTR); - - __page_frag_cache_drain(rx_buffer->page, - rx_buffer->pagecnt_bias); - - i++; - if (i == rx_ring->count) + for (u32 i = rx_ring->next_to_clean; i != rx_ring->next_to_use; ) { + const struct libeth_fqe *rx_fqe = &rx_ring->rx_fqes[i]; + const struct libeth_fqe *hdr_fqe = rx_ring->hdr_fqes ? + &rx_ring->hdr_fqes[i] : + NULL; + + libeth_rx_recycle_slow(rx_fqe->netmem); + if (hdr_fqe) + libeth_rx_recycle_slow(hdr_fqe->netmem); + if (unlikely(++i == rx_ring->count)) i = 0; } - rx_ring->next_to_alloc = 0; +reset: rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; + rx_ring->pending = 0; } /** * ixgbevf_clean_tx_ring - Free Tx Buffers * @tx_ring: ring to be cleaned **/ -static void ixgbevf_clean_tx_ring(struct ixgbevf_ring *tx_ring) +void ixgbevf_clean_tx_ring(struct ixgbevf_ring *tx_ring) { u16 i = tx_ring->next_to_clean; struct ixgbevf_tx_buffer *tx_buffer = &tx_ring->tx_buffer_info[i]; @@ -2441,10 +2094,7 @@ static void ixgbevf_clean_tx_ring(struct ixgbevf_ring *tx_ring) union ixgbe_adv_tx_desc *eop_desc, *tx_desc; /* Free all the Tx ring sk_buffs */ - if (ring_is_xdp(tx_ring)) - page_frag_free(tx_buffer->data); - else - dev_kfree_skb_any(tx_buffer->skb); + dev_kfree_skb_any(tx_buffer->skb); /* unmap skb header data */ dma_unmap_single(tx_ring->dev, @@ -2498,8 +2148,10 @@ static void ixgbevf_clean_all_rx_rings(struct ixgbevf_adapter *adapter) { int i; - for (i = 0; i < adapter->num_rx_queues; i++) + for (i = 0; i < adapter->num_rx_queues; i++) { ixgbevf_clean_rx_ring(adapter->rx_ring[i]); + ixgbevf_rx_destroy_pp(adapter->rx_ring[i]); + } } /** @@ -2513,13 +2165,20 @@ static void ixgbevf_clean_all_tx_rings(struct ixgbevf_adapter *adapter) for (i = 0; i < adapter->num_tx_queues; i++) ixgbevf_clean_tx_ring(adapter->tx_ring[i]); for (i = 0; i < adapter->num_xdp_queues; i++) - ixgbevf_clean_tx_ring(adapter->xdp_ring[i]); + ixgbevf_clean_xdp_ring(adapter->xdp_ring[i]); +} + +void ixgbevf_flush_tx_queue(struct ixgbevf_ring *ring) +{ + u8 reg_idx = ring->reg_idx; + + IXGBE_WRITE_REG(&ring->q_vector->adapter->hw, IXGBE_VFTXDCTL(reg_idx), + IXGBE_TXDCTL_SWFLSH); } void ixgbevf_down(struct ixgbevf_adapter *adapter) { struct net_device *netdev = adapter->netdev; - struct ixgbe_hw *hw = &adapter->hw; int i; /* signal that we are down to the interrupt handler */ @@ -2545,19 +2204,11 @@ void ixgbevf_down(struct ixgbevf_adapter *adapter) timer_delete_sync(&adapter->service_timer); /* disable transmits in the hardware now that interrupts are off */ - for (i = 0; i < adapter->num_tx_queues; i++) { - u8 reg_idx = adapter->tx_ring[i]->reg_idx; - - IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(reg_idx), - IXGBE_TXDCTL_SWFLSH); - } - - for (i = 0; i < adapter->num_xdp_queues; i++) { - u8 reg_idx = adapter->xdp_ring[i]->reg_idx; + for (i = 0; i < adapter->num_tx_queues; i++) + ixgbevf_flush_tx_queue(adapter->tx_ring[i]); - IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(reg_idx), - IXGBE_TXDCTL_SWFLSH); - } + for (i = 0; i < adapter->num_xdp_queues; i++) + ixgbevf_flush_tx_queue(adapter->xdp_ring[i]); if (!pci_channel_offline(adapter->pdev)) ixgbevf_reset(adapter); @@ -3084,6 +2735,9 @@ static int ixgbevf_sw_init(struct ixgbevf_adapter *adapter) goto out; } + if (adapter->hw.mac.type == ixgbe_mac_82599_vf) + adapter->flags |= IXGBEVF_FLAG_HSPLIT; + /* assume legacy case in which PF would only give VF 2 queues */ hw->mac.max_tx_queues = 2; hw->mac.max_rx_queues = 2; @@ -3259,8 +2913,6 @@ static void ixgbevf_check_hang_subtask(struct ixgbevf_adapter *adapter) if (netif_carrier_ok(adapter->netdev)) { for (i = 0; i < adapter->num_tx_queues; i++) set_check_for_tx_hang(adapter->tx_ring[i]); - for (i = 0; i < adapter->num_xdp_queues; i++) - set_check_for_tx_hang(adapter->xdp_ring[i]); } /* get one bit for every active Tx/Rx interrupt vector */ @@ -3404,7 +3056,10 @@ static void ixgbevf_service_task(struct work_struct *work) **/ void ixgbevf_free_tx_resources(struct ixgbevf_ring *tx_ring) { - ixgbevf_clean_tx_ring(tx_ring); + if (!ring_is_xdp(tx_ring)) + ixgbevf_clean_tx_ring(tx_ring); + else + ixgbevf_clean_xdp_ring(tx_ring); vfree(tx_ring->tx_buffer_info); tx_ring->tx_buffer_info = NULL; @@ -3413,7 +3068,7 @@ void ixgbevf_free_tx_resources(struct ixgbevf_ring *tx_ring) if (!tx_ring->desc) return; - dma_free_coherent(tx_ring->dev, tx_ring->size, tx_ring->desc, + dma_free_coherent(tx_ring->dev, tx_ring->dma_size, tx_ring->desc, tx_ring->dma); tx_ring->desc = NULL; @@ -3448,7 +3103,9 @@ int ixgbevf_setup_tx_resources(struct ixgbevf_ring *tx_ring) struct ixgbevf_adapter *adapter = netdev_priv(tx_ring->netdev); int size; - size = sizeof(struct ixgbevf_tx_buffer) * tx_ring->count; + size = (!ring_is_xdp(tx_ring) ? sizeof(struct ixgbevf_tx_buffer) : + sizeof(struct libeth_sqe)) * tx_ring->count; + tx_ring->tx_buffer_info = vmalloc(size); if (!tx_ring->tx_buffer_info) goto err; @@ -3456,10 +3113,10 @@ int ixgbevf_setup_tx_resources(struct ixgbevf_ring *tx_ring) u64_stats_init(&tx_ring->syncp); /* round up to nearest 4K */ - tx_ring->size = tx_ring->count * sizeof(union ixgbe_adv_tx_desc); - tx_ring->size = ALIGN(tx_ring->size, 4096); + tx_ring->dma_size = tx_ring->count * sizeof(union ixgbe_adv_tx_desc); + tx_ring->dma_size = ALIGN(tx_ring->dma_size, 4096); - tx_ring->desc = dma_alloc_coherent(tx_ring->dev, tx_ring->size, + tx_ring->desc = dma_alloc_coherent(tx_ring->dev, tx_ring->dma_size, &tx_ring->dma, GFP_KERNEL); if (!tx_ring->desc) goto err; @@ -3514,48 +3171,40 @@ static int ixgbevf_setup_all_tx_resources(struct ixgbevf_adapter *adapter) return err; } +static struct device *ixgbevf_dma_dev_from_ring(struct ixgbevf_ring *ring) +{ + return &ring->q_vector->adapter->pdev->dev; +} + /** - * ixgbevf_setup_rx_resources - allocate Rx resources (Descriptors) + * ixgbevf_setup_rx_resources - allocate Rx resources * @adapter: board private structure * @rx_ring: Rx descriptor ring (for a specific queue) to setup * - * Returns 0 on success, negative on failure + * Returns: 0 on success, negative on failure. **/ int ixgbevf_setup_rx_resources(struct ixgbevf_adapter *adapter, struct ixgbevf_ring *rx_ring) { - int size; - - size = sizeof(struct ixgbevf_rx_buffer) * rx_ring->count; - rx_ring->rx_buffer_info = vmalloc(size); - if (!rx_ring->rx_buffer_info) - goto err; - u64_stats_init(&rx_ring->syncp); /* Round up to nearest 4K */ - rx_ring->size = rx_ring->count * sizeof(union ixgbe_adv_rx_desc); - rx_ring->size = ALIGN(rx_ring->size, 4096); + rx_ring->dma_size = rx_ring->count * sizeof(union ixgbe_adv_rx_desc); + rx_ring->dma_size = ALIGN(rx_ring->dma_size, 4096); - rx_ring->desc = dma_alloc_coherent(rx_ring->dev, rx_ring->size, + rx_ring->desc = dma_alloc_coherent(ixgbevf_dma_dev_from_ring(rx_ring), + rx_ring->dma_size, &rx_ring->dma, GFP_KERNEL); - if (!rx_ring->desc) - goto err; - - /* XDP RX-queue info */ - if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, adapter->netdev, - rx_ring->queue_index, 0) < 0) - goto err; + if (!rx_ring->desc) { + dev_err(rx_ring->dev, + "Unable to allocate memory for the Rx descriptor ring\n"); + return -ENOMEM; + } - rx_ring->xdp_prog = adapter->xdp_prog; + rcu_assign_pointer(rx_ring->xdp_prog, adapter->xdp_prog); return 0; -err: - vfree(rx_ring->rx_buffer_info); - rx_ring->rx_buffer_info = NULL; - dev_err(rx_ring->dev, "Unable to allocate memory for the Rx descriptor ring\n"); - return -ENOMEM; } /** @@ -3597,15 +3246,12 @@ static int ixgbevf_setup_all_rx_resources(struct ixgbevf_adapter *adapter) void ixgbevf_free_rx_resources(struct ixgbevf_ring *rx_ring) { ixgbevf_clean_rx_ring(rx_ring); + ixgbevf_rx_destroy_pp(rx_ring); + rcu_assign_pointer(rx_ring->xdp_prog, NULL); - rx_ring->xdp_prog = NULL; - xdp_rxq_info_unreg(&rx_ring->xdp_rxq); - vfree(rx_ring->rx_buffer_info); - rx_ring->rx_buffer_info = NULL; - - dma_free_coherent(rx_ring->dev, rx_ring->size, rx_ring->desc, + dma_free_coherent(ixgbevf_dma_dev_from_ring(rx_ring), + rx_ring->dma_size, rx_ring->desc, rx_ring->dma); - rx_ring->desc = NULL; } @@ -4292,6 +3938,18 @@ static int ixgbevf_set_mac(struct net_device *netdev, void *p) return 0; } +static bool ixgbevf_xdp_mtu_ok(const struct ixgbevf_adapter *adapter, + const struct bpf_prog *prog, unsigned int mtu) +{ + u32 frame_size = mtu + LIBETH_RX_LL_LEN; + bool requires_mbuf; + + requires_mbuf = frame_size > IXGBEVF_RX_PAGE_LEN(LIBETH_XDP_HEADROOM) || + adapter->flags & IXGBEVF_FLAG_HSPLIT; + + return prog->aux->xdp_has_frags || !requires_mbuf; +} + /** * ixgbevf_change_mtu - Change the Maximum Transfer Unit * @netdev: network interface device structure @@ -4307,8 +3965,10 @@ static int ixgbevf_change_mtu(struct net_device *netdev, int new_mtu) int ret; /* prevent MTU being changed to a size unsupported by XDP */ - if (adapter->xdp_prog) { - dev_warn(&adapter->pdev->dev, "MTU cannot be changed while XDP program is loaded\n"); + if (adapter->xdp_prog && + !ixgbevf_xdp_mtu_ok(adapter, adapter->xdp_prog, new_mtu)) { + netdev_warn(netdev, + "MTU value provided cannot be set while current XDP program is attached\n"); return -EPERM; } @@ -4468,24 +4128,24 @@ ixgbevf_features_check(struct sk_buff *skb, struct net_device *dev, return features; } -static int ixgbevf_xdp_setup(struct net_device *dev, struct bpf_prog *prog) +static int ixgbevf_xdp_setup(struct net_device *dev, struct bpf_prog *prog, + struct netlink_ext_ack *extack) { - int i, frame_size = dev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; struct ixgbevf_adapter *adapter = netdev_priv(dev); struct bpf_prog *old_prog; - /* verify ixgbevf ring attributes are sufficient for XDP */ - for (i = 0; i < adapter->num_rx_queues; i++) { - struct ixgbevf_ring *ring = adapter->rx_ring[i]; - - if (frame_size > ixgbevf_rx_bufsz(ring)) - return -EINVAL; + if (prog && !ixgbevf_xdp_mtu_ok(adapter, prog, READ_ONCE(dev->mtu))) { + NL_SET_ERR_MSG_MOD(extack, + "Configured MTU or HW limitations require non-linear frames and XDP prog does not support frags"); + return -EOPNOTSUPP; } old_prog = xchg(&adapter->xdp_prog, prog); /* If transitioning XDP modes reconfigure rings */ if (!!prog != !!old_prog) { + xdp_features_clear_redirect_target(dev); + /* Hardware has to reinitialize queues and interrupts to * match packet buffer alignment. Unfortunately, the * hardware is not flexible enough to do this dynamically. @@ -4499,10 +4159,15 @@ static int ixgbevf_xdp_setup(struct net_device *dev, struct bpf_prog *prog) if (netif_running(dev)) ixgbevf_open(dev); } else { - for (i = 0; i < adapter->num_rx_queues; i++) - xchg(&adapter->rx_ring[i]->xdp_prog, adapter->xdp_prog); + for (int i = 0; i < adapter->num_rx_queues; i++) + rcu_assign_pointer(adapter->rx_ring[i]->xdp_prog, + adapter->xdp_prog); + synchronize_net(); } + if (prog) + xdp_features_set_redirect_target(dev, true); + if (old_prog) bpf_prog_put(old_prog); @@ -4513,7 +4178,10 @@ static int ixgbevf_xdp(struct net_device *dev, struct netdev_bpf *xdp) { switch (xdp->command) { case XDP_SETUP_PROG: - return ixgbevf_xdp_setup(dev, xdp->prog); + return ixgbevf_xdp_setup(dev, xdp->prog, xdp->extack); + case XDP_SETUP_XSK_POOL: + return ixgbevf_setup_xsk_pool(netdev_priv(dev), xdp->xsk.pool, + xdp->xsk.queue_id); default: return -EINVAL; } @@ -4533,6 +4201,8 @@ static const struct net_device_ops ixgbevf_netdev_ops = { .ndo_vlan_rx_kill_vid = ixgbevf_vlan_rx_kill_vid, .ndo_features_check = ixgbevf_features_check, .ndo_bpf = ixgbevf_xdp, + .ndo_xdp_xmit = ixgbevf_xdp_xmit, + .ndo_xsk_wakeup = ixgbevf_xsk_wakeup, }; static void ixgbevf_assign_netdev_ops(struct net_device *dev) @@ -4665,7 +4335,7 @@ static int ixgbevf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) NETIF_F_HW_VLAN_CTAG_TX; netdev->priv_flags |= IFF_UNICAST_FLT; - netdev->xdp_features = NETDEV_XDP_ACT_BASIC; + libeth_xdp_set_features_noredir(netdev, NULL, IXGBEVF_XSK_MAX_ZC_FRAGS); /* MTU range: 68 - 1504 or 9710 */ netdev->min_mtu = ETH_MIN_MTU; diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_txrx_lib.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_txrx_lib.h new file mode 100644 index 00000000000000..5bad6990060715 --- /dev/null +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_txrx_lib.h @@ -0,0 +1,280 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2010-2026 Intel Corporation */ + +#ifndef _IXGBEVF_TXRX_LIB_H_ +#define _IXGBEVF_TXRX_LIB_H_ + +#include + +#include "ixgbevf.h" + +static inline void ixgbevf_xdp_rs_and_bump(void *xdpsq, bool sent, bool flush) +{ + struct ixgbevf_ring *xdp_ring = xdpsq; + union ixgbe_adv_tx_desc *desc; + u32 ltu; + + libeth_xdpsq_lock(&xdp_ring->xdpq_lock); + + if ((!flush && xdp_ring->pending < xdp_ring->count - 1) || + xdp_ring->cached_ntu == xdp_ring->next_to_use) + goto unlock; + + ltu = (xdp_ring->next_to_use ? : xdp_ring->count) - 1; + + /* We will not get DD on a context descriptor */ + if (unlikely(xdp_ring->xdp_sqes[ltu].type == LIBETH_SQE_CTX)) + goto unlock; + + desc = IXGBEVF_TX_DESC(xdp_ring, ltu); + desc->read.cmd_type_len |= cpu_to_le32(IXGBE_TXD_CMD); + + xdp_ring->xdp_sqes[xdp_ring->cached_ntu].rs_idx = ltu + 1; + xdp_ring->cached_ntu = xdp_ring->next_to_use; + + /* In case the packet was interrupted, discard it */ + xdp_ring->xdp_sqes[ltu].priv = 0; + + /* Finish descriptor writes before bumping tail */ + wmb(); + ixgbevf_write_tail(xdp_ring, xdp_ring->next_to_use); + +unlock: + libeth_xdpsq_unlock(&xdp_ring->xdpq_lock); +} + +/** + * ixgbevf_is_non_eop - process handling of non-EOP buffers + * @rx_ring: Rx ring being processed + * @rx_desc: Rx descriptor for current buffer + * + * This function updates next to clean. If the buffer is an EOP buffer + * this function exits returning false, otherwise it will place the + * sk_buff in the next buffer to be chained and return true indicating + * that this is in fact a non-EOP buffer. + **/ +static inline bool ixgbevf_is_non_eop(struct ixgbevf_ring *rx_ring, + union ixgbe_adv_rx_desc *rx_desc) +{ + u32 ntc = rx_ring->next_to_clean + 1; + + /* fetch, update, and store next to clean */ + ntc = (ntc < rx_ring->count) ? ntc : 0; + rx_ring->next_to_clean = ntc; + rx_ring->pending++; + + prefetch(IXGBEVF_RX_DESC(rx_ring, ntc)); + + if (likely(ixgbevf_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))) + return false; + + return true; +} + +/** + * ixgbevf_cleanup_headers - Correct corrupted or empty headers + * @rx_ring: rx descriptor ring packet is being transacted on + * @rx_desc: pointer to the EOP Rx descriptor + * @skb: pointer to current skb being fixed + * + * Check for corrupted packet headers caused by senders on the local L2 + * embedded NIC switch not setting up their Tx Descriptors right. These + * should be very rare. + * + * Also address the case where we are pulling data in on pages only + * and as such no data is present in the skb header. + * + * In addition if skb is not at least 60 bytes we need to pad it so that + * it is large enough to qualify as a valid Ethernet frame. + * + * Returns true if an error was encountered and skb was freed. + **/ +static inline bool ixgbevf_cleanup_headers(struct ixgbevf_ring *rx_ring, + union ixgbe_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + /* verify that the packet does not have any known errors */ + if (unlikely(ixgbevf_test_staterr(rx_desc, + IXGBE_RXDADV_ERR_FRAME_ERR_MASK))) { + struct net_device *netdev = rx_ring->netdev; + + if (!(netdev->features & NETIF_F_RXALL)) { + dev_kfree_skb_any(skb); + return true; + } + } + + return false; +} + +#define IXGBE_RSS_L4_TYPES_MASK \ + ((1ul << IXGBE_RXDADV_RSSTYPE_IPV4_TCP) | \ + (1ul << IXGBE_RXDADV_RSSTYPE_IPV4_UDP) | \ + (1ul << IXGBE_RXDADV_RSSTYPE_IPV6_TCP) | \ + (1ul << IXGBE_RXDADV_RSSTYPE_IPV6_UDP)) + +static inline void ixgbevf_rx_hash(struct ixgbevf_ring *ring, + union ixgbe_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + u16 rss_type; + + if (!(ring->netdev->features & NETIF_F_RXHASH)) + return; + + rss_type = le16_to_cpu(rx_desc->wb.lower.lo_dword.hs_rss.pkt_info) & + IXGBE_RXDADV_RSSTYPE_MASK; + + if (!rss_type) + return; + + skb_set_hash(skb, le32_to_cpu(rx_desc->wb.lower.hi_dword.rss), + (IXGBE_RSS_L4_TYPES_MASK & (1ul << rss_type)) ? + PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3); +} + +/** + * ixgbevf_rx_checksum - indicate in skb if hw indicated a good cksum + * @ring: structure containing ring specific data + * @rx_desc: current Rx descriptor being processed + * @skb: skb currently being received and modified + **/ +static inline void ixgbevf_rx_checksum(struct ixgbevf_ring *ring, + union ixgbe_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + skb_checksum_none_assert(skb); + + /* Rx csum disabled */ + if (!(ring->netdev->features & NETIF_F_RXCSUM)) + return; + + /* if IP and error */ + if (ixgbevf_test_staterr(rx_desc, IXGBE_RXD_STAT_IPCS) && + ixgbevf_test_staterr(rx_desc, IXGBE_RXDADV_ERR_IPE)) { + ring->rx_stats.csum_err++; + return; + } + + if (!ixgbevf_test_staterr(rx_desc, IXGBE_RXD_STAT_L4CS)) + return; + + if (ixgbevf_test_staterr(rx_desc, IXGBE_RXDADV_ERR_TCPE)) { + ring->rx_stats.csum_err++; + return; + } + + /* It must be a TCP or UDP packet with a valid checksum */ + skb->ip_summed = CHECKSUM_UNNECESSARY; +} + +/** + * ixgbevf_process_skb_fields - Populate skb header fields from Rx descriptor + * @rx_ring: rx descriptor ring packet is being transacted on + * @rx_desc: pointer to the EOP Rx descriptor + * @skb: pointer to current skb being populated + * + * This function checks the ring, descriptor, and packet information in + * order to populate the checksum, VLAN, protocol, and other fields within + * the skb. + **/ +static inline void ixgbevf_process_skb_fields(struct ixgbevf_ring *rx_ring, + union ixgbe_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + ixgbevf_rx_hash(rx_ring, rx_desc, skb); + ixgbevf_rx_checksum(rx_ring, rx_desc, skb); + + if (ixgbevf_test_staterr(rx_desc, IXGBE_RXD_STAT_VP)) { + u16 vid = le16_to_cpu(rx_desc->wb.upper.vlan); + unsigned long *active_vlans = netdev_priv(rx_ring->netdev); + + if (test_bit(vid & VLAN_VID_MASK, active_vlans)) + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); + } + + if (ixgbevf_test_staterr(rx_desc, IXGBE_RXDADV_STAT_SECP)) + ixgbevf_ipsec_rx(rx_ring, rx_desc, skb); +} + +static inline u16 ixgbevf_tx_get_num_sent(struct ixgbevf_ring *xdp_ring) +{ + u16 ntc = xdp_ring->next_to_clean; + u16 to_clean = 0; + + while (likely(to_clean < xdp_ring->pending)) { + u32 idx = xdp_ring->xdp_sqes[ntc].rs_idx; + union ixgbe_adv_tx_desc *rs_desc; + + if (!idx--) + break; + + rs_desc = IXGBEVF_TX_DESC(xdp_ring, idx); + + if (!(rs_desc->wb.status & cpu_to_le32(IXGBE_TXD_STAT_DD))) + break; + + xdp_ring->xdp_sqes[ntc].rs_idx = 0; + + to_clean += + (idx >= ntc ? idx : idx + xdp_ring->count) - ntc + 1; + + ntc = (idx + 1 == xdp_ring->count) ? 0 : idx + 1; + } + + return to_clean; +} + +void ixgbevf_clean_xdp_num(struct ixgbevf_ring *xdp_ring, bool in_napi, + u16 to_clean); + +static inline u32 ixgbevf_prep_xdp_sq(void *xdpsq, struct libeth_xdpsq *sq) +{ + struct ixgbevf_ring *xdp_ring = xdpsq; + + libeth_xdpsq_lock(&xdp_ring->xdpq_lock); + if (unlikely(ixgbevf_desc_unused(xdp_ring) < xdp_ring->thresh)) { + u16 to_clean = ixgbevf_tx_get_num_sent(xdpsq); + + if (likely(to_clean)) + ixgbevf_clean_xdp_num(xdp_ring, true, to_clean); + } + + if (unlikely(!test_bit(__IXGBEVF_TX_XDP_RING_PRIMED, + &xdp_ring->state))) { + struct ixgbe_adv_tx_context_desc *context_desc; + + set_bit(__IXGBEVF_TX_XDP_RING_PRIMED, &xdp_ring->state); + + context_desc = IXGBEVF_TX_CTXTDESC(xdp_ring, 0); + context_desc->vlan_macip_lens = + cpu_to_le32(ETH_HLEN << IXGBE_ADVTXD_MACLEN_SHIFT); + context_desc->fceof_saidx = 0; + context_desc->type_tucmd_mlhl = + cpu_to_le32(IXGBE_TXD_CMD_DEXT | + IXGBE_ADVTXD_DTYP_CTXT); + context_desc->mss_l4len_idx = 0; + + xdp_ring->next_to_use = 1; + xdp_ring->pending = 1; + xdp_ring->xdp_sqes[0].type = LIBETH_SQE_CTX; + + /* Finish descriptor writes before bumping tail */ + wmb(); + ixgbevf_write_tail(xdp_ring, 1); + } + + *sq = (struct libeth_xdpsq) { + .count = xdp_ring->count, + .descs = xdp_ring->desc, + .lock = &xdp_ring->xdpq_lock, + .ntu = &xdp_ring->next_to_use, + .pending = &xdp_ring->pending, + .pool = xdp_ring->xsk_pool, + .sqes = xdp_ring->xdp_sqes, + }; + + return ixgbevf_desc_unused(xdp_ring); +} + +#endif /* _IXGBEVF_TXRX_LIB_H_ */ diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.c new file mode 100644 index 00000000000000..1545fed4cdb51b --- /dev/null +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.c @@ -0,0 +1,351 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2026 Intel Corporation */ + +#include + +#include "ixgbevf_txrx_lib.h" +#include "ixgbevf_xsk.h" + +/** + * ixgbevf_single_irq_disable - Mask off interrupt generation on a single vector + * @adapter: board private structure + * @vidx: vector id + **/ +static void ixgbevf_single_irq_disable(struct ixgbevf_adapter *adapter, + u16 vidx) +{ + struct ixgbe_hw *hw = &adapter->hw; + + IXGBE_WRITE_REG(hw, IXGBE_VTEIAM, + adapter->eims_enable_mask & ~BIT(vidx)); + IXGBE_WRITE_REG(hw, IXGBE_VTEIMC, BIT(vidx)); + IXGBE_WRITE_REG(hw, IXGBE_VTEIAC, + adapter->eims_enable_mask & ~BIT(vidx)); + + IXGBE_WRITE_FLUSH(hw); + + synchronize_irq(adapter->msix_entries[vidx].vector); +} + +static void ixgbevf_qp_dis(struct ixgbevf_adapter *adapter, u16 qid) +{ + struct ixgbevf_ring *tx_ring, *rx_ring = adapter->rx_ring[qid]; + struct ixgbevf_q_vector *q_vector = rx_ring->q_vector; + + netif_stop_subqueue(adapter->netdev, qid); + ixgbevf_single_irq_disable(adapter, q_vector->v_idx); + napi_disable(&q_vector->napi); + + ixgbevf_disable_rx_queue(adapter, adapter->rx_ring[qid]); + ixgbevf_clean_rx_ring(rx_ring); + ixgbevf_rx_destroy_pp(rx_ring); + + /* Clean both XDP and normal Tx queue */ + ixgbevf_for_each_ring(tx_ring, q_vector->tx) { + ixgbevf_flush_tx_queue(tx_ring); + if (ring_is_xdp(tx_ring)) + ixgbevf_clean_xdp_ring(tx_ring); + else + ixgbevf_clean_tx_ring(tx_ring); + } +} + +static void ixgbevf_qp_ena(struct ixgbevf_adapter *adapter, u16 qid) +{ + struct ixgbevf_ring *tx_ring, *rx_ring = adapter->rx_ring[qid]; + struct ixgbevf_q_vector *q_vector = rx_ring->q_vector; + + ixgbevf_configure_rx_ring(adapter, rx_ring); + ixgbevf_for_each_ring(tx_ring, q_vector->tx) + ixgbevf_configure_tx_ring(adapter, tx_ring); + + napi_enable(&q_vector->napi); + ixgbevf_irq_enable(adapter); + netif_start_subqueue(adapter->netdev, qid); + napi_schedule(&q_vector->napi); +} + +int ixgbevf_setup_xsk_pool(struct ixgbevf_adapter *adapter, + struct xsk_buff_pool *pool, u16 qid) +{ + bool running = !test_bit(__IXGBEVF_DOWN, &adapter->state) && + adapter->xdp_prog; + int err; + + if (running) + ixgbevf_qp_dis(adapter, qid); + + err = libeth_xsk_setup_pool(adapter->netdev, qid, !!pool); + + if (running) + ixgbevf_qp_ena(adapter, qid); + + return err; +} + +static void ixgbevf_fill_rx_xsk_desc(const struct libeth_xskfq_fp *fq, u32 i) +{ + union ixgbe_adv_rx_desc *rx_desc = + &((union ixgbe_adv_rx_desc *)fq->descs)[i]; + + rx_desc->read.pkt_addr = + cpu_to_le64(libeth_xsk_buff_xdp_get_dma(fq->fqes[i])); + rx_desc->wb.upper.length = 0; +} + +bool ixgbevf_xsk_alloc_rx_bufs(struct ixgbevf_ring *rx_ring, u32 num) +{ + struct libeth_xskfq_fp fq = { + .count = rx_ring->count, + .descs = rx_ring->desc, + .fqes = rx_ring->xsk_fqes, + .ntu = rx_ring->next_to_use, + .pool = rx_ring->xsk_pool, + }; + u32 done; + + done = libeth_xskfqe_alloc(&fq, num, ixgbevf_fill_rx_xsk_desc); + if (likely(done)) { + /* Finish descriptor writes before bumping tail */ + wmb(); + ixgbevf_write_tail(rx_ring, fq.ntu); + } + + rx_ring->next_to_use = fq.ntu; + rx_ring->pending -= done; + + return done == num; +} + +void ixgbevf_rx_xsk_ring_free_buffs(struct ixgbevf_ring *rx_ring) +{ + u32 ntc = rx_ring->next_to_clean; + + if (rx_ring->xsk_xdp_head) + xsk_buff_free(&rx_ring->xsk_xdp_head->base); + + rx_ring->xsk_xdp_head = NULL; + + while (ntc != rx_ring->next_to_use) { + xsk_buff_free(&rx_ring->xsk_fqes[ntc]->base); + ntc++; + ntc = ntc == rx_ring->count ? 0 : ntc; + } +} + +struct ixgbevf_zc_sqe_priv { + u16 first_desc; + u16 len; +}; + +static_assert(sizeof(struct ixgbevf_zc_sqe_priv) <= + sizeof_field(struct libeth_sqe, priv)); + +static void ixgbevf_xsk_xmit_desc(struct libeth_xdp_tx_desc desc, u32 i, + const struct libeth_xdpsq *sq, u64 priv) +{ + union ixgbe_adv_tx_desc *descs = sq->descs, *tx_desc = &descs[i]; + u32 ltu = (i ? : sq->count) - 1; + + u32 cmd_type = IXGBE_ADVTXD_DTYP_DATA | + IXGBE_ADVTXD_DCMD_DEXT | + IXGBE_ADVTXD_DCMD_IFCS | + desc.len; + + tx_desc->read.buffer_addr = cpu_to_le64(desc.addr); + + if (likely((desc.flags & LIBETH_XDP_TX_LAST) && !sq->sqes[ltu].priv)) { + tx_desc->read.olinfo_status = + cpu_to_le32((desc.len << IXGBE_ADVTXD_PAYLEN_SHIFT) | + IXGBE_ADVTXD_CC); + tx_desc->read.cmd_type_len = + cpu_to_le32(cmd_type | IXGBE_TXD_CMD_EOP); + return; + } + + /* No previous packet */ + if (!sq->sqes[ltu].priv) { + struct ixgbevf_zc_sqe_priv *sqe_priv = + (void *)&sq->sqes[i].priv; + + sqe_priv->first_desc = i; + sqe_priv->len = desc.len; + + tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); + + return; + } + + if (sq->sqes[ltu].priv) { + struct ixgbevf_zc_sqe_priv *sqe_priv = + (void *)&sq->sqes[i].priv; + + sq->sqes[i].priv = sq->sqes[ltu].priv; + sq->sqes[ltu].priv = 0; + sqe_priv->len += desc.len; + + if (desc.flags & LIBETH_XDP_TX_LAST) { + union ixgbe_adv_tx_desc *first_desc = + &descs[sqe_priv->first_desc]; + + first_desc->read.olinfo_status = + cpu_to_le32((sqe_priv->len << + IXGBE_ADVTXD_PAYLEN_SHIFT) | + IXGBE_ADVTXD_CC); + tx_desc->read.cmd_type_len = + cpu_to_le32(cmd_type | IXGBE_TXD_CMD_EOP); + cmd_type |= IXGBE_TXD_CMD_EOP; + sq->sqes[i].priv = 0; + } + + tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); + } +} + +LIBETH_XDP_DEFINE_START(); +LIBETH_XSK_DEFINE_FLUSH_TX(static ixgbevf_xsk_flush_tx, ixgbevf_prep_xdp_sq, + ixgbevf_xsk_xmit_desc); +LIBETH_XSK_DEFINE_RUN_PROG(static ixgbevf_xsk_run_prog, ixgbevf_xsk_flush_tx); +LIBETH_XSK_DEFINE_FINALIZE(static ixgbevf_xsk_finalize_xdp_napi, + ixgbevf_xsk_flush_tx, ixgbevf_xdp_rs_and_bump); +LIBETH_XDP_DEFINE_END(); + +u32 ixgbevf_clean_xsk_rx_irq(struct ixgbevf_q_vector *q_vector, + struct ixgbevf_ring *rx_ring, int budget) +{ + struct ixgbevf_adapter *adapter = q_vector->adapter; + u32 total_rx_bytes = 0, total_rx_packets = 0; + LIBETH_XDP_ONSTACK_BULK(xdp_tx_bulk); + struct libeth_xdp_buff *head_xdp; + bool failure = false, wake; + struct sk_buff *skb; + + wake = xsk_uses_need_wakeup(rx_ring->xsk_pool); + if (wake) + xsk_clear_rx_need_wakeup(rx_ring->xsk_pool); + + head_xdp = rx_ring->xsk_xdp_head; + libeth_xsk_tx_init_bulk(&xdp_tx_bulk, rx_ring->xdp_prog, + adapter->netdev, adapter->xdp_ring, + adapter->num_xdp_queues); + + while (likely(total_rx_packets < budget)) { + union ixgbe_adv_rx_desc *rx_desc; + struct libeth_xdp_buff *rx_buffer; + unsigned int size; + u32 xdp_result; + + rx_desc = IXGBEVF_RX_DESC(rx_ring, rx_ring->next_to_clean); + size = le16_to_cpu(rx_desc->wb.upper.length); + if (unlikely(!size)) + break; + + /* Avoid reading other descriptor fields before checking size */ + rmb(); + + rx_buffer = rx_ring->xsk_fqes[rx_ring->next_to_clean]; + head_xdp = libeth_xsk_process_buff(head_xdp, rx_buffer, size); + if (unlikely(!head_xdp) || ixgbevf_is_non_eop(rx_ring, rx_desc)) + continue; + + total_rx_packets++; + total_rx_bytes += xdp_get_buff_len(&head_xdp->base); + + xdp_result = ixgbevf_xsk_run_prog(head_xdp, &xdp_tx_bulk); + if (xdp_result) { + head_xdp = NULL; + if (likely(xdp_result != LIBETH_XDP_ABORTED)) + continue; + failure = true; + break; + } + + skb = xdp_build_skb_from_zc(&head_xdp->base); + + if (unlikely(!skb)) { + libeth_xdp_return_buff_slow(head_xdp); + head_xdp = NULL; + rx_ring->rx_stats.alloc_rx_buff_failed++; + break; + } + + head_xdp = NULL; + + if (unlikely(ixgbevf_cleanup_headers(rx_ring, rx_desc, skb))) { + skb = NULL; + continue; + } + + if (unlikely((skb->pkt_type == PACKET_BROADCAST || + skb->pkt_type == PACKET_MULTICAST) && + ether_addr_equal(rx_ring->netdev->dev_addr, + eth_hdr(skb)->h_source))) { + dev_kfree_skb_irq(skb); + continue; + } + + /* populate checksum, VLAN, and protocol */ + ixgbevf_process_skb_fields(rx_ring, rx_desc, skb); + + napi_gro_receive(&q_vector->napi, skb); + } + + if (rx_ring->pending >= rx_ring->thresh) + failure |= !ixgbevf_xsk_alloc_rx_bufs(rx_ring, + rx_ring->pending); + + /* place incomplete frames back on ring for completion */ + rx_ring->xsk_xdp_head = head_xdp; + + ixgbevf_xsk_finalize_xdp_napi(&xdp_tx_bulk); + + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->stats.packets += total_rx_packets; + rx_ring->stats.bytes += total_rx_bytes; + u64_stats_update_end(&rx_ring->syncp); + q_vector->rx.total_packets += total_rx_packets; + q_vector->rx.total_bytes += total_rx_bytes; + + if (likely(!failure)) + return total_rx_packets; + + if (wake) + xsk_set_rx_need_wakeup(rx_ring->xsk_pool); + + return budget; +} + +bool ixgbevf_clean_xsk_tx_irq(struct ixgbevf_q_vector *q_vector, + struct ixgbevf_ring *tx_ring, int napi_budget) +{ + u32 budget = min_t(u32, napi_budget, tx_ring->thresh); + + return libeth_xsk_xmit_do_bulk(tx_ring->xsk_pool, tx_ring, budget, + NULL, ixgbevf_prep_xdp_sq, + ixgbevf_xsk_xmit_desc, + ixgbevf_xdp_rs_and_bump); +} + +int ixgbevf_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) +{ + struct ixgbevf_adapter *adapter = netdev_priv(dev); + struct ixgbevf_q_vector *q_vector; + struct ixgbevf_ring *rx_ring; + + if (unlikely(test_bit(__IXGBEVF_DOWN, &adapter->state))) + return -ENETDOWN; + + if (unlikely(queue_id >= adapter->num_xdp_queues)) + return -EINVAL; + + rx_ring = adapter->rx_ring[queue_id]; + if (unlikely(!ring_is_xsk(rx_ring))) + return -EINVAL; + + q_vector = rx_ring->q_vector; + if (!napi_if_scheduled_mark_missed(&q_vector->napi)) + IXGBE_WRITE_REG(&adapter->hw, IXGBE_VTEICS, + BIT(q_vector->v_idx)); + + return 0; +} diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.h new file mode 100644 index 00000000000000..2bb39735b10efd --- /dev/null +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2026 Intel Corporation */ + +#ifndef _IXGBEVF_XSK_H_ +#define _IXGBEVF_XSK_H_ + +/* Process completions as soon as possible */ +#define IXGBEVF_XSK_TX_CLEAN_THRESH(r) ((r)->count - 1) +#define IXGBEVF_XSK_MAX_ZC_FRAGS min(18, MAX_SKB_FRAGS) + +int ixgbevf_setup_xsk_pool(struct ixgbevf_adapter *adapter, + struct xsk_buff_pool *pool, u16 qid); +bool ixgbevf_xsk_alloc_rx_bufs(struct ixgbevf_ring *rx_ring, u32 num); +void ixgbevf_rx_xsk_ring_free_buffs(struct ixgbevf_ring *rx_ring); +u32 ixgbevf_clean_xsk_rx_irq(struct ixgbevf_q_vector *q_vector, + struct ixgbevf_ring *rx_ring, int budget); +bool ixgbevf_clean_xsk_tx_irq(struct ixgbevf_q_vector *q_vector, + struct ixgbevf_ring *tx_ring, int napi_budget); +int ixgbevf_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags); + +#endif /* _IXGBEVF_XSK_H_ */ diff --git a/drivers/net/ethernet/intel/libeth/xsk.c b/drivers/net/ethernet/intel/libeth/xsk.c index 846e902e31b600..4882951d5c9c43 100644 --- a/drivers/net/ethernet/intel/libeth/xsk.c +++ b/drivers/net/ethernet/intel/libeth/xsk.c @@ -167,6 +167,7 @@ int libeth_xskfq_create(struct libeth_xskfq *fq) fq->pending = fq->count; fq->thresh = libeth_xdp_queue_threshold(fq->count); fq->buf_len = xsk_pool_get_rx_frame_size(fq->pool); + fq->truesize = xsk_pool_get_rx_frag_step(fq->pool); return 0; } diff --git a/include/net/libeth/xdp.h b/include/net/libeth/xdp.h index 898723ab62e812..2e2154ccecae61 100644 --- a/include/net/libeth/xdp.h +++ b/include/net/libeth/xdp.h @@ -1094,7 +1094,7 @@ __libeth_xdp_xmit_do_bulk(struct libeth_xdp_tx_bulk *bq, * @xqs: array of XDPSQs driver structs * @nqs: number of active XDPSQs, the above array length * @fl: driver callback to flush an XDP xmit bulk - * @fin: driver cabback to finalize the queue + * @fin: driver callback to finalize the queue * * If the driver has active XDPSQs, perform common checks and send the frames. * Finalize the queue, if requested. diff --git a/include/net/libeth/xsk.h b/include/net/libeth/xsk.h index 481a7b28e6f248..82b5d21aae8784 100644 --- a/include/net/libeth/xsk.h +++ b/include/net/libeth/xsk.h @@ -597,6 +597,7 @@ __libeth_xsk_run_pass(struct libeth_xdp_buff *xdp, * @pending: current number of XSkFQEs to refill * @thresh: threshold below which the queue is refilled * @buf_len: HW-writeable length per each buffer + * @truesize: step between consecutive buffers, 0 if none exists * @nid: ID of the closest NUMA node with memory */ struct libeth_xskfq { @@ -614,6 +615,8 @@ struct libeth_xskfq { u32 thresh; u32 buf_len; + u32 truesize; + int nid; }; diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h index 242e34f771cca6..09d972f4bd6089 100644 --- a/include/net/xdp_sock_drv.h +++ b/include/net/xdp_sock_drv.h @@ -51,6 +51,11 @@ static inline u32 xsk_pool_get_rx_frame_size(struct xsk_buff_pool *pool) return xsk_pool_get_chunk_size(pool) - xsk_pool_get_headroom(pool); } +static inline u32 xsk_pool_get_rx_frag_step(struct xsk_buff_pool *pool) +{ + return pool->unaligned ? 0 : xsk_pool_get_chunk_size(pool); +} + static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool, struct xdp_rxq_info *rxq) { @@ -337,6 +342,11 @@ static inline u32 xsk_pool_get_rx_frame_size(struct xsk_buff_pool *pool) return 0; } +static inline u32 xsk_pool_get_rx_frag_step(struct xsk_buff_pool *pool) +{ + return 0; +} + static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool, struct xdp_rxq_info *rxq) { diff --git a/net/core/filter.c b/net/core/filter.c index 029e560e32ce3e..40ed01579c1bbb 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4151,12 +4151,14 @@ static int bpf_xdp_frags_increase_tail(struct xdp_buff *xdp, int offset) struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); skb_frag_t *frag = &sinfo->frags[sinfo->nr_frags - 1]; struct xdp_rxq_info *rxq = xdp->rxq; - unsigned int tailroom; + int tailroom; if (!rxq->frag_size || rxq->frag_size > xdp->frame_sz) return -EOPNOTSUPP; - tailroom = rxq->frag_size - skb_frag_size(frag) - skb_frag_off(frag); + tailroom = rxq->frag_size - skb_frag_size(frag) - + skb_frag_off(frag) % rxq->frag_size; + WARN_ON_ONCE(tailroom < 0); if (unlikely(offset > tailroom)) return -EINVAL;