cndm: Reorganize driver

Signed-off-by: Alex Forencich <alex@alexforencich.com>
2026-05-25 08:50:58 -07:00 · 2026-03-04 01:19:37 -08:00
parent 9f56b9febd
commit f8f73ea570
9 changed files with 952 additions and 623 deletions
--- a/src/cndm/modules/cndm/Makefile
+++ b/src/cndm/modules/cndm/Makefile
@@ -12,8 +12,9 @@ cndm-y += cndm_dev.o
 cndm-y += cndm_netdev.o
 cndm-y += cndm_ethtool.o
 cndm-y += cndm_ptp.o
-cndm-y += cndm_tx.o
+cndm-y += cndm_cq.o
-cndm-y += cndm_rx.o
+cndm-y += cndm_sq.o
 cndm-y += cndm_rq.o
 ifneq ($(DEBUG),)
 ccflags-y += -DDEBUG
--- a/src/cndm/modules/cndm/cndm.h
+++ b/src/cndm/modules/cndm/cndm.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL */
 /*
-Copyright (c) 2025 FPGA Ninja, LLC
+Copyright (c) 2025-2026 FPGA Ninja, LLC
 Authors:
 - Alex Forencich
@@ -73,6 +73,71 @@ struct cndm_rx_info {
 	u32 len;
 };
 struct cndm_ring {
 	// written on enqueue
 	u32 prod_ptr;
 	u64 bytes;
 	u64 packet;
 	u64 dropped_packets;
 	struct netdev_queue *tx_queue;
 	// written from completion
 	u32 cons_ptr ____cacheline_aligned_in_smp;
 	u64 ts_s;
 	u8 ts_valid;
 	// mostly constant
 	u32 size;
 	u32 full_size;
 	u32 size_mask;
 	u32 stride;
 	u32 mtu;
 	size_t buf_size;
 	u8 *buf;
 	dma_addr_t buf_dma_addr;
 	union {
 		struct cndm_tx_info *tx_info;
 		struct cndm_rx_info *rx_info;
 	};
 	struct device *dev;
 	struct cndm_dev *cdev;
 	struct cndm_priv *priv;
 	int index;
 	int enabled;
 	struct cndm_cq *cq;
 	u32 db_offset;
 	u8 __iomem *db_addr;
 } ____cacheline_aligned_in_smp;
 struct cndm_cq {
 	u32 cons_ptr;
 	u32 size;
 	u32 size_mask;
 	u32 stride;
 	size_t buf_size;
 	u8 *buf;
 	dma_addr_t buf_dma_addr;
 	struct device *dev;
 	struct cndm_dev *cdev;
 	struct cndm_priv *priv;
 	struct napi_struct napi;
 	int cqn;
 	int enabled;
 	struct cndm_ring *src_ring;
 	void (*handler)(struct cndm_cq *cq);
 };
 struct cndm_priv {
 	struct device *dev;
 	struct net_device *ndev;
@@ -83,66 +148,18 @@ struct cndm_priv {
 	void __iomem *hw_addr;
 	size_t txq_region_len;
 	void *txq_region;
 	dma_addr_t txq_region_addr;
 	struct cndm_irq *irq;
 	struct notifier_block irq_nb;
 	struct hwtstamp_config hwts_config;
 	u64 ts_s;
 	u8 ts_valid;
-	struct cndm_tx_info *tx_info;
+	int rxq_count;
-	struct cndm_rx_info *rx_info;
+	int txq_count;
-	struct netdev_queue *tx_queue;
+	struct cndm_ring *txq;
-
+	struct cndm_cq *txcq;
-	struct napi_struct tx_napi;
+	struct cndm_ring *rxq;
-	struct napi_struct rx_napi;
+	struct cndm_cq *rxcq;
 	u32 txq_log_size;
 	u32 txq_size;
 	u32 txq_mask;
 	u32 txq_prod;
 	u32 txq_cons;
 	u32 txq_db_offs;
 	u32 tx_sqn;
 	size_t rxq_region_len;
 	void *rxq_region;
 	dma_addr_t rxq_region_addr;
 	u32 rxq_log_size;
 	u32 rxq_size;
 	u32 rxq_mask;
 	u32 rxq_prod;
 	u32 rxq_cons;
 	u32 rxq_db_offs;
 	u32 rx_rqn;
 	size_t txcq_region_len;
 	void *txcq_region;
 	dma_addr_t txcq_region_addr;
 	u32 txcq_log_size;
 	u32 txcq_size;
 	u32 txcq_mask;
 	u32 txcq_prod;
 	u32 txcq_cons;
 	u32 tx_cqn;
 	size_t rxcq_region_len;
 	void *rxcq_region;
 	dma_addr_t rxcq_region_addr;
 	u32 rxcq_log_size;
 	u32 rxcq_size;
 	u32 rxcq_mask;
 	u32 rxcq_prod;
 	u32 rxcq_cons;
 	u32 rx_cqn;
 };
 // cndm_cmd.c
@@ -169,18 +186,32 @@ extern const struct file_operations cndm_fops;
 extern const struct ethtool_ops cndm_ethtool_ops;
 // cndm_ptp.c
-ktime_t cndm_read_cpl_ts(struct cndm_priv *priv, const struct cndm_cpl *cpl);
+ktime_t cndm_read_cpl_ts(struct cndm_ring *ring, const struct cndm_cpl *cpl);
 void cndm_register_phc(struct cndm_dev *cdev);
 void cndm_unregister_phc(struct cndm_dev *cdev);
-// cndm_tx.c
+// cndm_cq.c
-int cndm_free_tx_buf(struct cndm_priv *priv);
+struct cndm_cq *cndm_create_cq(struct cndm_priv *priv);
 void cndm_destroy_cq(struct cndm_cq *cq);
 int cndm_open_cq(struct cndm_cq *cq, int irqn, int size);
 void cndm_close_cq(struct cndm_cq *cq);
 // cndm_sq.c
 struct cndm_ring *cndm_create_sq(struct cndm_priv *priv);
 void cndm_destroy_sq(struct cndm_ring *sq);
 int cndm_open_sq(struct cndm_ring *sq, struct cndm_priv *priv, struct cndm_cq *cq, int size);
 void cndm_close_sq(struct cndm_ring *sq);
 int cndm_free_tx_buf(struct cndm_ring *sq);
 int cndm_poll_tx_cq(struct napi_struct *napi, int budget);
 int cndm_start_xmit(struct sk_buff *skb, struct net_device *ndev);
-// cndm_rx.c
+// cndm_rq.c
-int cndm_free_rx_buf(struct cndm_priv *priv);
+struct cndm_ring *cndm_create_rq(struct cndm_priv *priv);
-int cndm_refill_rx_buffers(struct cndm_priv *priv);
+void cndm_destroy_rq(struct cndm_ring *rq);
 int cndm_open_rq(struct cndm_ring *rq, struct cndm_priv *priv, struct cndm_cq *cq, int size);
 void cndm_close_rq(struct cndm_ring *rq);
 int cndm_free_rx_buf(struct cndm_ring *rq);
 int cndm_refill_rx_buffers(struct cndm_ring *rq);
 int cndm_poll_rx_cq(struct napi_struct *napi, int budget);
 #endif
--- a/src/cndm/modules/cndm/cndm_cq.c
+++ b/src/cndm/modules/cndm/cndm_cq.c
@@ -0,0 +1,112 @@
 // SPDX-License-Identifier: GPL
 /*
 Copyright (c) 2026 FPGA Ninja, LLC
 Authors:
 - Alex Forencich
 */
 #include "cndm.h"
 struct cndm_cq *cndm_create_cq(struct cndm_priv *priv)
 {
 	struct cndm_cq *cq;
 	cq = kzalloc(sizeof(*cq), GFP_KERNEL);
 	if (!cq)
 		return ERR_PTR(-ENOMEM);
 	cq->cdev = priv->cdev;
 	cq->dev = priv->dev;
 	cq->priv = priv;
 	cq->cqn = -1;
 	cq->enabled = 0;
 	cq->cons_ptr = 0;
 	return cq;
 }
 void cndm_destroy_cq(struct cndm_cq *cq)
 {
 	cndm_close_cq(cq);
 	kfree(cq);
 }
 int cndm_open_cq(struct cndm_cq *cq, int irqn, int size)
 {
 	int ret = 0;
 	struct cndm_cmd_queue cmd;
 	struct cndm_cmd_queue rsp;
 	if (cq->enabled || cq->buf)
 		return -EINVAL;
 	cq->size = roundup_pow_of_two(size);
 	cq->size_mask = cq->size - 1;
 	cq->stride = 16;
 	cq->buf_size = cq->size * cq->stride;
 	cq->buf = dma_alloc_coherent(cq->dev, cq->buf_size, &cq->buf_dma_addr, GFP_KERNEL);
 	if (!cq->buf)
 		return -ENOMEM;
 	cq->cons_ptr = 0;
 	// clear all phase tag bits
 	memset(cq->buf, 0, cq->buf_size);
 	cmd.opcode = CNDM_CMD_OP_CREATE_CQ;
 	cmd.flags = 0x00000000;
 	cmd.port = cq->priv->ndev->dev_port;
 	cmd.qn = 0;
 	cmd.qn2 = irqn; // TODO
 	cmd.pd = 0;
 	cmd.size = ilog2(cq->size);
 	cmd.dboffs = 0;
 	cmd.ptr1 = cq->buf_dma_addr;
 	cmd.ptr2 = 0;
 	cndm_exec_cmd(cq->cdev, &cmd, &rsp);
 	cq->cqn = rsp.qn;
 	cq->enabled = 1;
 	return 0;
 fail:
 	cndm_close_cq(cq);
 	return ret;
 }
 void cndm_close_cq(struct cndm_cq *cq)
 {
 	struct cndm_dev *cdev = cq->cdev;
 	struct cndm_cmd_queue cmd;
 	struct cndm_cmd_queue rsp;
 	cq->enabled = 0;
 	if (cq->cqn != -1) {
 		cmd.opcode = CNDM_CMD_OP_DESTROY_CQ;
 		cmd.flags = 0x00000000;
 		cmd.port = cq->priv->ndev->dev_port;
 		cmd.qn = cq->cqn;
 		cndm_exec_cmd(cdev, &cmd, &rsp);
 		cq->cqn = -1;
 	}
 	if (cq->buf) {
 		dma_free_coherent(cq->dev, cq->buf_size, cq->buf, cq->buf_dma_addr);
 		cq->buf = NULL;
 		cq->buf_dma_addr = 0;
 	}
 }
--- a/src/cndm/modules/cndm/cndm_netdev.c
+++ b/src/cndm/modules/cndm/cndm_netdev.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL
 /*
-Copyright (c) 2025 FPGA Ninja, LLC
+Copyright (c) 2025-2026 FPGA Ninja, LLC
 Authors:
 - Alex Forencich
@@ -17,14 +17,14 @@ static int cndm_open(struct net_device *ndev)
 {
 	struct cndm_priv *priv = netdev_priv(ndev);
-	cndm_refill_rx_buffers(priv);
+	cndm_refill_rx_buffers(priv->rxq);
-	priv->tx_queue = netdev_get_tx_queue(ndev, 0);
+	priv->txq->tx_queue = netdev_get_tx_queue(ndev, 0);
-	netif_napi_add_tx(ndev, &priv->tx_napi, cndm_poll_tx_cq);
+	netif_napi_add_tx(ndev, &priv->txcq->napi, cndm_poll_tx_cq);
-	napi_enable(&priv->tx_napi);
+	napi_enable(&priv->txcq->napi);
-	netif_napi_add(ndev, &priv->rx_napi, cndm_poll_rx_cq);
+	netif_napi_add(ndev, &priv->rxcq->napi, cndm_poll_rx_cq);
-	napi_enable(&priv->rx_napi);
+	napi_enable(&priv->rxcq->napi);
 	netif_tx_start_all_queues(ndev);
 	netif_carrier_on(ndev);
@@ -39,12 +39,19 @@ static int cndm_close(struct net_device *ndev)
 {
 	struct cndm_priv *priv = netdev_priv(ndev);
 	if (!priv->port_up)
 		return 0;
 	priv->port_up = 0;
-	napi_disable(&priv->tx_napi);
+	if (priv->txcq) {
-	netif_napi_del(&priv->tx_napi);
+		napi_disable(&priv->txcq->napi);
-	napi_disable(&priv->rx_napi);
+		netif_napi_del(&priv->txcq->napi);
-	netif_napi_del(&priv->rx_napi);
+	}
 	if (priv->rxcq) {
 		napi_disable(&priv->rxcq->napi);
 		netif_napi_del(&priv->rxcq->napi);
 	}
 	netif_tx_stop_all_queues(ndev);
 	netif_carrier_off(ndev);
@@ -164,8 +171,8 @@ static int cndm_netdev_irq(struct notifier_block *nb, unsigned long action, void
 	netdev_dbg(priv->ndev, "Interrupt");
 	if (priv->port_up) {
-		napi_schedule_irqoff(&priv->tx_napi);
+		napi_schedule_irqoff(&priv->txcq->napi);
-		napi_schedule_irqoff(&priv->rx_napi);
+		napi_schedule_irqoff(&priv->rxcq->napi);
 	}
 	return NOTIFY_DONE;
@@ -178,9 +185,6 @@ struct net_device *cndm_create_netdev(struct cndm_dev *cdev, int port)
 	struct cndm_priv *priv;
 	int ret = 0;
 	struct cndm_cmd_queue cmd;
 	struct cndm_cmd_queue rsp;
 	ndev = alloc_etherdev_mqs(sizeof(*priv), 1, 1);
 	if (!ndev) {
 		dev_err(dev, "Failed to allocate net_device");
@@ -199,6 +203,9 @@ struct net_device *cndm_create_netdev(struct cndm_dev *cdev, int port)
 	priv->hw_addr = cdev->hw_addr;
 	priv->rxq_count = 1;
 	priv->txq_count = 1;
 	netif_set_real_num_tx_queues(ndev, 1);
 	netif_set_real_num_rx_queues(ndev, 1);
@@ -219,134 +226,54 @@ struct net_device *cndm_create_netdev(struct cndm_dev *cdev, int port)
 	ndev->min_mtu = ETH_MIN_MTU;
 	ndev->max_mtu = 1500;
-	priv->rxq_log_size = ilog2(256);
+	priv->rxcq = cndm_create_cq(priv);
-	priv->rxq_size = 1 << priv->rxq_log_size;
+	if (IS_ERR_OR_NULL(priv->rxcq)) {
-	priv->rxq_mask = priv->rxq_size-1;
+		ret = PTR_ERR(priv->rxcq);
-	priv->rxq_prod = 0;
+		goto fail;
-	priv->rxq_cons = 0;
+	}
-
+	ret = cndm_open_cq(priv->rxcq, 0, 256);
-	priv->txq_log_size = ilog2(256);
+	if (ret) {
-	priv->txq_size = 1 << priv->txq_log_size;
+		cndm_destroy_cq(priv->rxcq);
-	priv->txq_mask = priv->txq_size-1;
+		priv->rxcq = NULL;
 	priv->txq_prod = 0;
 	priv->txq_cons = 0;
 	priv->rxcq_log_size = ilog2(256);
 	priv->rxcq_size = 1 << priv->rxcq_log_size;
 	priv->rxcq_mask = priv->rxcq_size-1;
 	priv->rxcq_prod = 0;
 	priv->rxcq_cons = 0;
 	priv->txcq_log_size = ilog2(256);
 	priv->txcq_size = 1 << priv->txcq_log_size;
 	priv->txcq_mask = priv->txcq_size-1;
 	priv->txcq_prod = 0;
 	priv->txcq_cons = 0;
 	// allocate DMA buffers
 	priv->txq_region_len = priv->txq_size*16;
 	priv->txq_region = dma_alloc_coherent(dev, priv->txq_region_len, &priv->txq_region_addr, GFP_KERNEL | __GFP_ZERO);
 	if (!priv->txq_region) {
 		ret = -ENOMEM;
 		goto fail;
 	}
-	priv->rxq_region_len = priv->rxq_size*16;
+	priv->rxq = cndm_create_rq(priv);
-	priv->rxq_region = dma_alloc_coherent(dev, priv->rxq_region_len, &priv->rxq_region_addr, GFP_KERNEL | __GFP_ZERO);
+	if (IS_ERR_OR_NULL(priv->rxq)) {
-	if (!priv->rxq_region) {
+		ret = PTR_ERR(priv->rxq);
-		ret = -ENOMEM;
+		goto fail;
 	}
 	ret = cndm_open_rq(priv->rxq, priv, priv->rxcq, 256);
 	if (ret) {
 		cndm_destroy_rq(priv->rxq);
 		priv->rxq = NULL;
 		goto fail;
 	}
-	priv->txcq_region_len = priv->txcq_size*16;
+	priv->txcq = cndm_create_cq(priv);
-	priv->txcq_region = dma_alloc_coherent(dev, priv->txcq_region_len, &priv->txcq_region_addr, GFP_KERNEL | __GFP_ZERO);
+	if (IS_ERR_OR_NULL(priv->txcq)) {
-	if (!priv->txcq_region) {
+		ret = PTR_ERR(priv->txcq);
-		ret = -ENOMEM;
+		goto fail;
 	}
 	ret = cndm_open_cq(priv->txcq, 0, 256);
 	if (ret) {
 		cndm_destroy_cq(priv->txcq);
 		priv->txcq = NULL;
 		goto fail;
 	}
-	priv->rxcq_region_len = priv->rxcq_size*16;
+	priv->txq = cndm_create_sq(priv);
-	priv->rxcq_region = dma_alloc_coherent(dev, priv->rxcq_region_len, &priv->rxcq_region_addr, GFP_KERNEL | __GFP_ZERO);
+	if (IS_ERR_OR_NULL(priv->txq)) {
-	if (!priv->rxcq_region) {
+		ret = PTR_ERR(priv->txq);
 		ret = -ENOMEM;
 		goto fail;
 	}
-
+	ret = cndm_open_sq(priv->txq, priv, priv->txcq, 256);
-	// allocate info rings
+	if (ret) {
-	priv->tx_info = kvzalloc(sizeof(*priv->tx_info) * priv->txq_size, GFP_KERNEL);
+		cndm_destroy_sq(priv->txq);
-	if (!priv->tx_info) {
+		priv->txq = NULL;
 		ret = -ENOMEM;
 		goto fail;
 	}
 	priv->rx_info = kvzalloc(sizeof(*priv->rx_info) * priv->rxq_size, GFP_KERNEL);
 	if (!priv->tx_info) {
 		ret = -ENOMEM;
 		goto fail;
 	}
 	cmd.opcode = CNDM_CMD_OP_CREATE_CQ;
 	cmd.flags = 0x00000000;
 	cmd.port = port;
 	cmd.qn = 0;
 	cmd.qn2 = port;
 	cmd.pd = 0;
 	cmd.size = priv->rxcq_log_size;
 	cmd.dboffs = 0;
 	cmd.ptr1 = priv->rxcq_region_addr;
 	cmd.ptr2 = 0;
 	cndm_exec_cmd(cdev, &cmd, &rsp);
 	priv->rx_cqn = rsp.qn;
 	cmd.opcode = CNDM_CMD_OP_CREATE_RQ;
 	cmd.flags = 0x00000000;
 	cmd.port = port;
 	cmd.qn = 0;
 	cmd.qn2 = priv->rx_cqn;
 	cmd.pd = 0;
 	cmd.size = priv->rxq_log_size;
 	cmd.dboffs = 0;
 	cmd.ptr1 = priv->rxq_region_addr;
 	cmd.ptr2 = 0;
 	cndm_exec_cmd(cdev, &cmd, &rsp);
 	priv->rx_rqn = rsp.qn;
 	priv->rxq_db_offs = rsp.dboffs;
 	cmd.opcode = CNDM_CMD_OP_CREATE_CQ;
 	cmd.flags = 0x00000000;
 	cmd.port = port;
 	cmd.qn = 0;
 	cmd.qn2 = port;
 	cmd.pd = 0;
 	cmd.size = priv->txcq_log_size;
 	cmd.dboffs = 0;
 	cmd.ptr1 = priv->txcq_region_addr;
 	cmd.ptr2 = 0;
 	cndm_exec_cmd(cdev, &cmd, &rsp);
 	priv->tx_cqn = rsp.qn;
 	cmd.opcode = CNDM_CMD_OP_CREATE_SQ;
 	cmd.flags = 0x00000000;
 	cmd.port = port;
 	cmd.qn = 0;
 	cmd.qn2 = priv->tx_cqn;
 	cmd.pd = 0;
 	cmd.size = priv->txq_log_size;
 	cmd.dboffs = 0;
 	cmd.ptr1 = priv->txq_region_addr;
 	cmd.ptr2 = 0;
 	cndm_exec_cmd(cdev, &cmd, &rsp);
 	priv->tx_sqn = rsp.qn;
 	priv->txq_db_offs = rsp.dboffs;
 	netif_carrier_off(ndev);
 	ret = register_netdev(ndev);
@@ -376,39 +303,33 @@ fail:
 void cndm_destroy_netdev(struct net_device *ndev)
 {
 	struct cndm_priv *priv = netdev_priv(ndev);
 	struct cndm_dev *cdev = priv->cdev;
 	struct device *dev = priv->dev;
-	struct cndm_cmd_queue cmd;
+	if (priv->port_up)
-	struct cndm_cmd_queue rsp;
+		cndm_close(ndev);
-	cmd.opcode = CNDM_CMD_OP_DESTROY_SQ;
+	if (priv->txq) {
-	cmd.flags = 0x00000000;
+		cndm_close_sq(priv->txq);
-	cmd.port = ndev->dev_port;
+		cndm_destroy_sq(priv->txq);
-	cmd.qn = priv->tx_sqn;
+		priv->txq = NULL;
 	}
-	cndm_exec_cmd(cdev, &cmd, &rsp);
+	if (priv->txcq) {
 		cndm_close_cq(priv->txcq);
 		cndm_destroy_cq(priv->txcq);
 		priv->txcq = NULL;
 	}
-	cmd.opcode = CNDM_CMD_OP_DESTROY_CQ;
+	if (priv->rxq) {
-	cmd.flags = 0x00000000;
+		cndm_close_rq(priv->rxq);
-	cmd.port = ndev->dev_port;
+		cndm_destroy_rq(priv->rxq);
-	cmd.qn = priv->tx_cqn;
+		priv->rxq = NULL;
 	}
-	cndm_exec_cmd(cdev, &cmd, &rsp);
+	if (priv->rxcq) {
-
+		cndm_close_cq(priv->rxcq);
-	cmd.opcode = CNDM_CMD_OP_DESTROY_RQ;
+		cndm_destroy_cq(priv->rxcq);
-	cmd.flags = 0x00000000;
+		priv->rxcq = NULL;
-	cmd.port = ndev->dev_port;
+	}
 	cmd.qn = priv->rx_rqn;
 	cndm_exec_cmd(cdev, &cmd, &rsp);
 	cmd.opcode = CNDM_CMD_OP_DESTROY_CQ;
 	cmd.flags = 0x00000000;
 	cmd.port = ndev->dev_port;
 	cmd.qn = priv->rx_cqn;
 	cndm_exec_cmd(cdev, &cmd, &rsp);
 	if (priv->irq)
 		atomic_notifier_chain_unregister(&priv->irq->nh, &priv->irq_nb);
@@ -418,22 +339,5 @@ void cndm_destroy_netdev(struct net_device *ndev)
 	if (priv->registered)
 		unregister_netdev(ndev);
 	if (priv->tx_info) {
 		cndm_free_tx_buf(priv);
 		kvfree(priv->tx_info);
 	}
 	if (priv->rx_info) {
 		cndm_free_rx_buf(priv);
 		kvfree(priv->rx_info);
 	}
 	if (priv->txq_region)
 		dma_free_coherent(dev, priv->txq_region_len, priv->txq_region, priv->txq_region_addr);
 	if (priv->rxq_region)
 		dma_free_coherent(dev, priv->rxq_region_len, priv->rxq_region, priv->rxq_region_addr);
 	if (priv->txcq_region)
 		dma_free_coherent(dev, priv->txcq_region_len, priv->txcq_region, priv->txcq_region_addr);
 	if (priv->rxcq_region)
 		dma_free_coherent(dev, priv->rxcq_region_len, priv->rxcq_region, priv->rxcq_region_addr);
 	free_netdev(ndev);
 }
--- a/src/cndm/modules/cndm/cndm_ptp.c
+++ b/src/cndm/modules/cndm/cndm_ptp.c
@@ -11,22 +11,22 @@ Authors:
 #include "cndm.h"
 #include <linux/version.h>
-ktime_t cndm_read_cpl_ts(struct cndm_priv *priv, const struct cndm_cpl *cpl)
+ktime_t cndm_read_cpl_ts(struct cndm_ring *ring, const struct cndm_cpl *cpl)
 {
-	struct cndm_dev *cdev = priv->cdev;
+	struct cndm_dev *cdev = ring->cdev;
 	// u64 ts_s = le16_to_cpu(cpl->ts_s);
 	u64 ts_s = cpl->ts_s;
 	u32 ts_ns = le32_to_cpu(cpl->ts_ns);
-	if (unlikely(!priv->ts_valid || (priv->ts_s ^ ts_s) & 0xf0)) {
+	if (unlikely(!ring->ts_valid || (ring->ts_s ^ ts_s) & 0xf0)) {
 		// seconds MSBs do not match, update cached timestamp
-		priv->ts_s = ioread32(cdev->hw_addr + 0x0308);
+		ring->ts_s = ioread32(cdev->hw_addr + 0x0308);
-		priv->ts_s |= (u64) ioread32(cdev->hw_addr + 0x030C) << 32;
+		ring->ts_s |= (u64) ioread32(cdev->hw_addr + 0x030C) << 32;
-		priv->ts_valid = 1;
+		ring->ts_valid = 1;
 	}
-	ts_s |= priv->ts_s & 0xfffffffffffffff0;
+	ts_s |= ring->ts_s & 0xfffffffffffffff0;
 	dev_dbg(cdev->dev, "%s: Read timestamp: %lld.%09d", __func__, ts_s, ts_ns);
--- a/src/cndm/modules/cndm/cndm_rq.c
+++ b/src/cndm/modules/cndm/cndm_rq.c
@@ -0,0 +1,344 @@
 // SPDX-License-Identifier: GPL
 /*
 Copyright (c) 2025-2026 FPGA Ninja, LLC
 Authors:
 - Alex Forencich
 */
 #include "cndm.h"
 struct cndm_ring *cndm_create_rq(struct cndm_priv *priv)
 {
 	struct cndm_ring *rq;
 	rq = kzalloc(sizeof(*rq), GFP_KERNEL);
 	if (!rq)
 		return ERR_PTR(-ENOMEM);
 	rq->cdev = priv->cdev;
 	rq->dev = priv->dev;
 	rq->priv = priv;
 	rq->index = -1;
 	rq->enabled = 0;
 	rq->prod_ptr = 0;
 	rq->cons_ptr = 0;
 	rq->db_offset = 0;
 	rq->db_addr = NULL;
 	return rq;
 }
 void cndm_destroy_rq(struct cndm_ring *rq)
 {
 	cndm_close_rq(rq);
 	kfree(rq);
 }
 int cndm_open_rq(struct cndm_ring *rq, struct cndm_priv *priv, struct cndm_cq *cq, int size)
 {
 	int ret = 0;
 	struct cndm_cmd_queue cmd;
 	struct cndm_cmd_queue rsp;
 	if (rq->enabled || rq->buf || !priv || !cq)
 		return -EINVAL;
 	rq->size = roundup_pow_of_two(size);
 	rq->size_mask = rq->size - 1;
 	rq->stride = 16;
 	rq->rx_info = kvzalloc(sizeof(*rq->rx_info) * rq->size, GFP_KERNEL);
 	if (!rq->rx_info)
 		ret = -ENOMEM;
 	rq->buf_size = rq->size * rq->stride;
 	rq->buf = dma_alloc_coherent(rq->dev, rq->buf_size, &rq->buf_dma_addr, GFP_KERNEL);
 	if (!rq->buf) {
 		return -ENOMEM;
 		goto fail;
 	}
 	rq->priv = priv;
 	rq->cq = cq;
 	cq->src_ring = rq;
 	// cq->handler = cndm_rx_irq;
 	rq->prod_ptr = 0;
 	rq->cons_ptr = 0;
 	cmd.opcode = CNDM_CMD_OP_CREATE_RQ;
 	cmd.flags = 0x00000000;
 	cmd.port = rq->priv->ndev->dev_port;
 	cmd.qn = 0;
 	cmd.qn2 = cq->cqn;
 	cmd.pd = 0;
 	cmd.size = ilog2(rq->size);
 	cmd.dboffs = 0;
 	cmd.ptr1 = rq->buf_dma_addr;
 	cmd.ptr2 = 0;
 	cndm_exec_cmd(rq->cdev, &cmd, &rsp);
 	rq->index = rsp.qn;
 	rq->db_offset = rsp.dboffs;
 	rq->db_addr = priv->cdev->hw_addr + rsp.dboffs;
 	rq->enabled = 1;
 	ret = cndm_refill_rx_buffers(rq);
 	if (ret) {
 		netdev_err(priv->ndev, "failed to allocate RX buffer for RX queue index %d (of %u total) entry index %u (of %u total)",
 				rq->index, priv->rxq_count, rq->prod_ptr, rq->size);
 		if (ret == -ENOMEM)
 			netdev_err(priv->ndev, "machine might not have enough DMA-capable RAM; try to decrease number of RX channels (currently %u) and/or RX ring parameters (entries; currently %u)",
 					priv->rxq_count, rq->size);
 		goto fail;
 	}
 	return 0;
 fail:
 	cndm_close_rq(rq);
 	return ret;
 }
 void cndm_close_rq(struct cndm_ring *rq)
 {
 	struct cndm_dev *cdev = rq->cdev;
 	struct cndm_cmd_queue cmd;
 	struct cndm_cmd_queue rsp;
 	rq->enabled = 0;
 	if (rq->cq) {
 		rq->cq->src_ring = NULL;
 		rq->cq->handler = NULL;
 	}
 	rq->cq = NULL;
 	if (rq->index != -1) {
 		cmd.opcode = CNDM_CMD_OP_DESTROY_RQ;
 		cmd.flags = 0x00000000;
 		cmd.port = rq->priv->ndev->dev_port;
 		cmd.qn = rq->index;
 		cndm_exec_cmd(cdev, &cmd, &rsp);
 		rq->index = -1;
 	}
 	if (rq->buf) {
 		cndm_free_rx_buf(rq);
 		dma_free_coherent(rq->dev, rq->buf_size, rq->buf, rq->buf_dma_addr);
 		rq->buf = NULL;
 		rq->buf_dma_addr = 0;
 	}
 	if (rq->rx_info) {
 		kvfree(rq->rx_info);
 		rq->rx_info = NULL;
 	}
 	rq->priv = NULL;
 }
 static void cndm_free_rx_desc(struct cndm_ring *rq, int index)
 {
 	struct cndm_priv *priv = rq->priv;
 	struct device *dev = priv->dev;
 	struct cndm_rx_info *rx_info = &rq->rx_info[index];
 	netdev_dbg(priv->ndev, "Free RX desc index %d", index);
 	if (!rx_info->page)
 		return;
 	dma_unmap_page(dev, rx_info->dma_addr, rx_info->len, DMA_FROM_DEVICE);
 	rx_info->dma_addr = 0;
 	__free_pages(rx_info->page, 0);
 	rx_info->page = NULL;
 }
 int cndm_free_rx_buf(struct cndm_ring *rq)
 {
 	u32 index;
 	int cnt = 0;
 	while (rq->prod_ptr != rq->cons_ptr) {
 		index = rq->cons_ptr & rq->size_mask;
 		cndm_free_rx_desc(rq, index);
 		rq->cons_ptr++;
 		cnt++;
 	}
 	return cnt;
 }
 static int cndm_prepare_rx_desc(struct cndm_ring *rq, int index)
 {
 	struct cndm_priv *priv = rq->priv;
 	struct device *dev = rq->dev;
 	struct cndm_rx_info *rx_info = &rq->rx_info[index];
 	struct cndm_desc *rx_desc = (struct cndm_desc *)(rq->buf + index*16);
 	struct page *page;
 	u32 len = PAGE_SIZE;
 	dma_addr_t dma_addr;
 	netdev_dbg(priv->ndev, "Prepare RX desc index %d", index);
 	page = dev_alloc_pages(0);
 	if (unlikely(!page)) {
 		netdev_err(priv->ndev, "Failed to allocate page");
 		return -ENOMEM;
 	}
 	dma_addr = dma_map_page(dev, page, 0, len, DMA_FROM_DEVICE);
 	if (unlikely(dma_mapping_error(dev, dma_addr))) {
 		netdev_err(priv->ndev, "Mapping failed");
 		__free_pages(page, 0);
 		return -1;
 	}
 	rx_desc->len = cpu_to_le32(len);
 	rx_desc->addr = cpu_to_le64(dma_addr);
 	rx_info->page = page;
 	rx_info->len = len;
 	rx_info->dma_addr = dma_addr;
 	return 0;
 }
 int cndm_refill_rx_buffers(struct cndm_ring *rq)
 {
 	u32 missing = 128 - (rq->prod_ptr - rq->cons_ptr); // TODO
 	int ret = 0;
 	if (missing < 8)
 		return 0;
 	for (; missing-- > 0;) {
 		ret = cndm_prepare_rx_desc(rq, rq->prod_ptr & rq->size_mask);
 		if (ret)
 			break;
 		rq->prod_ptr++;
 	}
 	dma_wmb();
 	iowrite32(rq->prod_ptr & 0xffff, rq->db_addr);
 	return ret;
 }
 static int cndm_process_rx_cq(struct cndm_cq *cq, int napi_budget)
 {
 	struct cndm_priv *priv = cq->priv;
 	struct cndm_ring *rq = cq->src_ring;
 	struct cndm_cpl *cpl;
 	struct cndm_rx_info *rx_info;
 	struct sk_buff *skb;
 	struct page *page;
 	int done = 0;
 	u32 len;
 	u32 cq_cons_ptr;
 	u32 cq_index;
 	u32 cons_ptr;
 	u32 index;
 	cq_cons_ptr = cq->cons_ptr;
 	cons_ptr = rq->cons_ptr;
 	while (done < napi_budget) {
 		cq_index = cq_cons_ptr & cq->size_mask;
 		cpl = (struct cndm_cpl *)(cq->buf + cq_index * 16);
 		if (!!(cpl->phase & 0x80) == !!(cq_cons_ptr & cq->size))
 			break;
 		dma_rmb();
 		index = cons_ptr & rq->size_mask;
 		rx_info = &rq->rx_info[index];
 		page = rx_info->page;
 		len = min_t(u32, le16_to_cpu(cpl->len), rx_info->len);
 		netdev_dbg(priv->ndev, "Process RX cpl index %d", index);
 		if (!page) {
 			netdev_err(priv->ndev, "Null page at index %d", index);
 			break;
 		}
 		dma_unmap_page(priv->dev, rx_info->dma_addr, rx_info->len, DMA_FROM_DEVICE);
 		rx_info->dma_addr = 0;
 		rx_info->page = NULL;
 		if (len < ETH_HLEN) {
 			netdev_warn(priv->ndev, "Dropping short frame (len %d)", len);
 			__free_pages(page, 0);
 			goto rx_drop;
 		}
 		skb = napi_get_frags(&cq->napi);
 		if (!skb) {
 			netdev_err(priv->ndev, "Failed to allocate skb %d", index);
 			__free_pages(page, 0);
 			goto rx_drop;
 		}
 		// RX hardware timestamp
 		skb_hwtstamps(skb)->hwtstamp = cndm_read_cpl_ts(rq, cpl);
 		__skb_fill_page_desc(skb, 0, page, 0, len);
 		skb_shinfo(skb)->nr_frags = 1;
 		skb->len = len;
 		skb->data_len = len;
 		skb->truesize = rx_info->len;
 		napi_gro_frags(&cq->napi);
 rx_drop:
 		done++;
 		cq_cons_ptr++;
 		cons_ptr++;
 	}
 	cq->cons_ptr = cq_cons_ptr;
 	rq->cons_ptr = cons_ptr;
 	cndm_refill_rx_buffers(rq);
 	return done;
 }
 int cndm_poll_rx_cq(struct napi_struct *napi, int budget)
 {
 	struct cndm_cq *cq = container_of(napi, struct cndm_cq, napi);
 	int done;
 	done = cndm_process_rx_cq(cq, budget);
 	if (done == budget)
 		return done;
 	napi_complete(napi);
 	// TODO re-enable interrupts
 	return done;
 }
--- a/src/cndm/modules/cndm/cndm_rx.c
+++ b/src/cndm/modules/cndm/cndm_rx.c
@@ -1,198 +0,0 @@
 // SPDX-License-Identifier: GPL
 /*
 Copyright (c) 2025 FPGA Ninja, LLC
 Authors:
 - Alex Forencich
 */
 #include "cndm.h"
 static void cndm_free_rx_desc(struct cndm_priv *priv, int index)
 {
 	struct device *dev = priv->dev;
 	struct cndm_rx_info *rx_info = &priv->rx_info[index];
 	netdev_dbg(priv->ndev, "Free RX desc index %d", index);
 	if (!rx_info->page)
 		return;
 	dma_unmap_page(dev, rx_info->dma_addr, rx_info->len, DMA_FROM_DEVICE);
 	rx_info->dma_addr = 0;
 	__free_pages(rx_info->page, 0);
 	rx_info->page = NULL;
 }
 int cndm_free_rx_buf(struct cndm_priv *priv)
 {
 	u32 index;
 	int cnt = 0;
 	while (priv->rxq_prod != priv->rxq_cons) {
 		index = priv->rxq_cons & priv->rxq_mask;
 		cndm_free_rx_desc(priv, index);
 		priv->rxq_cons++;
 		cnt++;
 	}
 	return cnt;
 }
 static int cndm_prepare_rx_desc(struct cndm_priv *priv, int index)
 {
 	struct device *dev = priv->dev;
 	struct cndm_rx_info *rx_info = &priv->rx_info[index];
 	struct cndm_desc *rx_desc = (struct cndm_desc *)(priv->rxq_region + index*16);
 	struct page *page;
 	u32 len = PAGE_SIZE;
 	dma_addr_t dma_addr;
 	netdev_dbg(priv->ndev, "Prepare RX desc index %d", index);
 	page = dev_alloc_pages(0);
 	if (unlikely(!page)) {
 		netdev_err(priv->ndev, "Failed to allocate page");
 		return -ENOMEM;
 	}
 	dma_addr = dma_map_page(dev, page, 0, len, DMA_FROM_DEVICE);
 	if (unlikely(dma_mapping_error(dev, dma_addr))) {
 		netdev_err(priv->ndev, "Mapping failed");
 		__free_pages(page, 0);
 		return -1;
 	}
 	rx_desc->len = cpu_to_le32(len);
 	rx_desc->addr = cpu_to_le64(dma_addr);
 	rx_info->page = page;
 	rx_info->len = len;
 	rx_info->dma_addr = dma_addr;
 	return 0;
 }
 int cndm_refill_rx_buffers(struct cndm_priv *priv)
 {
 	u32 missing = 128 - (priv->rxq_prod - priv->rxq_cons); // TODO
 	int ret = 0;
 	if (missing < 8)
 		return 0;
 	for (; missing-- > 0;) {
 		ret = cndm_prepare_rx_desc(priv, priv->rxq_prod & priv->rxq_mask);
 		if (ret)
 			break;
 		priv->rxq_prod++;
 	}
 	dma_wmb();
 	iowrite32(priv->rxq_prod & 0xffff, priv->hw_addr + priv->rxq_db_offs);
 	return ret;
 }
 static int cndm_process_rx_cq(struct net_device *ndev, int napi_budget)
 {
 	struct cndm_priv *priv = netdev_priv(ndev);
 	struct cndm_cpl *cpl;
 	struct cndm_rx_info *rx_info;
 	struct sk_buff *skb;
 	struct page *page;
 	int done = 0;
 	u32 len;
 	u32 cq_cons_ptr;
 	u32 cq_index;
 	u32 cons_ptr;
 	u32 index;
 	cq_cons_ptr = priv->rxcq_cons;
 	cons_ptr = priv->rxq_cons;
 	while (done < napi_budget) {
 		cq_index = cq_cons_ptr & priv->rxcq_mask;
 		cpl = (struct cndm_cpl *)(priv->rxcq_region + cq_index * 16);
 		if (!!(cpl->phase & 0x80) == !!(cq_cons_ptr & priv->rxcq_size))
 			break;
 		dma_rmb();
 		index = cons_ptr & priv->rxq_mask;
 		rx_info = &priv->rx_info[index];
 		page = rx_info->page;
 		len = min_t(u32, le16_to_cpu(cpl->len), rx_info->len);
 		netdev_dbg(priv->ndev, "Process RX cpl index %d", index);
 		if (!page) {
 			netdev_err(priv->ndev, "Null page at index %d", index);
 			break;
 		}
 		dma_unmap_page(priv->dev, rx_info->dma_addr, rx_info->len, DMA_FROM_DEVICE);
 		rx_info->dma_addr = 0;
 		rx_info->page = NULL;
 		if (len < ETH_HLEN) {
 			netdev_warn(priv->ndev, "Dropping short frame (len %d)", len);
 			__free_pages(page, 0);
 			goto rx_drop;
 		}
 		skb = napi_get_frags(&priv->rx_napi);
 		if (!skb) {
 			netdev_err(priv->ndev, "Failed to allocate skb %d", index);
 			__free_pages(page, 0);
 			goto rx_drop;
 		}
 		// RX hardware timestamp
 		skb_hwtstamps(skb)->hwtstamp = cndm_read_cpl_ts(priv, cpl);
 		__skb_fill_page_desc(skb, 0, page, 0, len);
 		skb_shinfo(skb)->nr_frags = 1;
 		skb->len = len;
 		skb->data_len = len;
 		skb->truesize = rx_info->len;
 		napi_gro_frags(&priv->rx_napi);
 rx_drop:
 		done++;
 		cq_cons_ptr++;
 		cons_ptr++;
 	}
 	priv->rxcq_cons = cq_cons_ptr;
 	priv->rxq_cons = cons_ptr;
 	cndm_refill_rx_buffers(priv);
 	return done;
 }
 int cndm_poll_rx_cq(struct napi_struct *napi, int budget)
 {
 	struct cndm_priv *priv = container_of(napi, struct cndm_priv, rx_napi);
 	int done;
 	done = cndm_process_rx_cq(priv->ndev, budget);
 	if (done == budget)
 		return done;
 	napi_complete(napi);
 	// TODO re-enable interrupts
 	return done;
 }
--- a/src/cndm/modules/cndm/cndm_sq.c
+++ b/src/cndm/modules/cndm/cndm_sq.c
@@ -0,0 +1,315 @@
 // SPDX-License-Identifier: GPL
 /*
 Copyright (c) 2025-2026 FPGA Ninja, LLC
 Authors:
 - Alex Forencich
 */
 #include "cndm.h"
 struct cndm_ring *cndm_create_sq(struct cndm_priv *priv)
 {
 	struct cndm_ring *sq;
 	sq = kzalloc(sizeof(*sq), GFP_KERNEL);
 	if (!sq)
 		return ERR_PTR(-ENOMEM);
 	sq->cdev = priv->cdev;
 	sq->dev = priv->dev;
 	sq->priv = priv;
 	sq->index = -1;
 	sq->enabled = 0;
 	sq->prod_ptr = 0;
 	sq->cons_ptr = 0;
 	sq->db_offset = 0;
 	sq->db_addr = NULL;
 	return sq;
 }
 void cndm_destroy_sq(struct cndm_ring *sq)
 {
 	cndm_close_sq(sq);
 	kfree(sq);
 }
 int cndm_open_sq(struct cndm_ring *sq, struct cndm_priv *priv, struct cndm_cq *cq, int size)
 {
 	int ret = 0;
 	struct cndm_cmd_queue cmd;
 	struct cndm_cmd_queue rsp;
 	if (sq->enabled || sq->buf || !priv || !cq)
 		return -EINVAL;
 	sq->size = roundup_pow_of_two(size);
 	sq->size_mask = sq->size - 1;
 	sq->stride = 16;
 	sq->tx_info = kvzalloc(sizeof(*sq->tx_info) * sq->size, GFP_KERNEL);
 	if (!sq->tx_info)
 		ret = -ENOMEM;
 	sq->buf_size = sq->size * sq->stride;
 	sq->buf = dma_alloc_coherent(sq->dev, sq->buf_size, &sq->buf_dma_addr, GFP_KERNEL);
 	if (!sq->buf) {
 		return -ENOMEM;
 		goto fail;
 	}
 	sq->priv = priv;
 	sq->cq = cq;
 	cq->src_ring = sq;
 	// cq->handler = cndm_tx_irq;
 	sq->prod_ptr = 0;
 	sq->cons_ptr = 0;
 	cmd.opcode = CNDM_CMD_OP_CREATE_SQ;
 	cmd.flags = 0x00000000;
 	cmd.port = sq->priv->ndev->dev_port;
 	cmd.qn = 0;
 	cmd.qn2 = cq->cqn;
 	cmd.pd = 0;
 	cmd.size = ilog2(sq->size);
 	cmd.dboffs = 0;
 	cmd.ptr1 = sq->buf_dma_addr;
 	cmd.ptr2 = 0;
 	cndm_exec_cmd(sq->cdev, &cmd, &rsp);
 	sq->index = rsp.qn;
 	sq->db_offset = rsp.dboffs;
 	sq->db_addr = priv->cdev->hw_addr + rsp.dboffs;
 	sq->enabled = 1;
 	return 0;
 fail:
 	cndm_close_sq(sq);
 	return ret;
 }
 void cndm_close_sq(struct cndm_ring *sq)
 {
 	struct cndm_dev *cdev = sq->cdev;
 	struct cndm_cmd_queue cmd;
 	struct cndm_cmd_queue rsp;
 	sq->enabled = 0;
 	if (sq->cq) {
 		sq->cq->src_ring = NULL;
 		sq->cq->handler = NULL;
 	}
 	sq->cq = NULL;
 	if (sq->index != -1) {
 		cmd.opcode = CNDM_CMD_OP_DESTROY_SQ;
 		cmd.flags = 0x00000000;
 		cmd.port = sq->priv->ndev->dev_port;
 		cmd.qn = sq->index;
 		cndm_exec_cmd(cdev, &cmd, &rsp);
 		sq->index = -1;
 	}
 	if (sq->buf) {
 		cndm_free_tx_buf(sq);
 		dma_free_coherent(sq->dev, sq->buf_size, sq->buf, sq->buf_dma_addr);
 		sq->buf = NULL;
 		sq->buf_dma_addr = 0;
 	}
 	if (sq->tx_info) {
 		kvfree(sq->tx_info);
 		sq->tx_info = NULL;
 	}
 	sq->priv = NULL;
 }
 static void cndm_free_tx_desc(struct cndm_ring *sq, int index, int napi_budget)
 {
 	struct cndm_priv *priv = sq->priv;
 	struct device *dev = priv->dev;
 	struct cndm_tx_info *tx_info = &sq->tx_info[index];
 	struct sk_buff *skb = tx_info->skb;
 	netdev_dbg(priv->ndev, "Free TX desc index %d", index);
 	dma_unmap_single(dev, tx_info->dma_addr, tx_info->len, DMA_TO_DEVICE);
 	tx_info->dma_addr = 0;
 	napi_consume_skb(skb, napi_budget);
 	tx_info->skb = NULL;
 }
 int cndm_free_tx_buf(struct cndm_ring *sq)
 {
 	u32 index;
 	int cnt = 0;
 	while (sq->prod_ptr != sq->cons_ptr) {
 		index = sq->cons_ptr & sq->size_mask;
 		cndm_free_tx_desc(sq, index, 0);
 		sq->cons_ptr++;
 		cnt++;
 	}
 	return cnt;
 }
 static int cndm_process_tx_cq(struct cndm_cq *cq, int napi_budget)
 {
 	struct cndm_priv *priv = cq->priv;
 	struct cndm_ring *sq = cq->src_ring;
 	struct cndm_tx_info *tx_info;
 	struct cndm_cpl *cpl;
 	struct skb_shared_hwtstamps hwts;
 	int done = 0;
 	u32 cq_cons_ptr;
 	u32 cq_index;
 	u32 cons_ptr;
 	u32 index;
 	cq_cons_ptr = cq->cons_ptr;
 	cons_ptr = sq->cons_ptr;
 	while (done < napi_budget) {
 		cq_index = cq_cons_ptr & cq->size_mask;
 		cpl = (struct cndm_cpl *)(cq->buf + cq_index * 16);
 		if (!!(cpl->phase & 0x80) == !!(cq_cons_ptr & cq->size))
 			break;
 		dma_rmb();
 		index = cons_ptr & sq->size_mask;
 		tx_info = &sq->tx_info[index];
 		// TX hardware timestamp
 		if (unlikely(tx_info->ts_requested)) {
 			netdev_dbg(priv->ndev, "%s: TX TS requested", __func__);
 			hwts.hwtstamp = cndm_read_cpl_ts(sq, cpl);
 			skb_tstamp_tx(tx_info->skb, &hwts);
 		}
 		cndm_free_tx_desc(sq, index, napi_budget);
 		done++;
 		cq_cons_ptr++;
 		cons_ptr++;
 	}
 	cq->cons_ptr = cq_cons_ptr;
 	sq->cons_ptr = cons_ptr;
 	if (netif_tx_queue_stopped(sq->tx_queue) && (done != 0 || sq->prod_ptr == sq->cons_ptr))
 		netif_tx_wake_queue(sq->tx_queue);
 	return done;
 }
 int cndm_poll_tx_cq(struct napi_struct *napi, int budget)
 {
 	struct cndm_cq *cq = container_of(napi, struct cndm_cq, napi);
 	int done;
 	done = cndm_process_tx_cq(cq, budget);
 	if (done == budget)
 		return done;
 	napi_complete(napi);
 	// TODO re-enable interrupts
 	return done;
 }
 int cndm_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 {
 	struct skb_shared_info *shinfo = skb_shinfo(skb);
 	struct cndm_priv *priv = netdev_priv(ndev);
 	struct cndm_ring *sq = priv->txq;
 	struct device *dev = priv->dev;
 	u32 index;
 	u32 cons_ptr;
 	u32 len;
 	dma_addr_t dma_addr;
 	struct cndm_desc *tx_desc;
 	struct cndm_tx_info *tx_info;
 	netdev_dbg(ndev, "Got packet for TX");
 	if (skb->len < ETH_HLEN) {
 		netdev_warn(ndev, "Dropping short frame");
 		goto tx_drop;
 	}
 	cons_ptr = READ_ONCE(sq->cons_ptr);
 	index = sq->prod_ptr & sq->size_mask;
 	tx_desc = (struct cndm_desc *)(sq->buf + index*16);
 	tx_info = &sq->tx_info[index];
 	// TX hardware timestamp
 	tx_info->ts_requested = 0;
 	if (unlikely(shinfo->tx_flags & SKBTX_HW_TSTAMP)) {
 		netdev_dbg(ndev, "%s: TX TS requested", __func__);
 		shinfo->tx_flags |= SKBTX_IN_PROGRESS;
 		tx_info->ts_requested = 1;
 	}
 	len = skb_headlen(skb);
 	dma_addr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
 	if (unlikely(dma_mapping_error(dev, dma_addr))) {
 		netdev_err(ndev, "Mapping failed");
 		goto tx_drop;
 	}
 	tx_desc->len = cpu_to_le32(len);
 	tx_desc->addr = cpu_to_le64(dma_addr);
 	tx_info->skb = skb;
 	tx_info->len = len;
 	tx_info->dma_addr = dma_addr;
 	netdev_dbg(ndev, "Write desc index %d len %d", index, len);
 	sq->prod_ptr++;
 	skb_tx_timestamp(skb);
 	if (sq->prod_ptr - sq->cons_ptr >= 128) {
 		netdev_dbg(ndev, "TX ring full");
 		netif_tx_stop_queue(sq->tx_queue);
 	}
 	dma_wmb();
 	iowrite32(sq->prod_ptr & 0xffff, sq->db_addr);
 	return NETDEV_TX_OK;
 tx_drop:
 	dev_kfree_skb_any(skb);
 	return NETDEV_TX_OK;
 }
--- a/src/cndm/modules/cndm/cndm_tx.c
+++ b/src/cndm/modules/cndm/cndm_tx.c
@@ -1,180 +0,0 @@
 // SPDX-License-Identifier: GPL
 /*
 Copyright (c) 2025 FPGA Ninja, LLC
 Authors:
 - Alex Forencich
 */
 #include "cndm.h"
 static void cndm_free_tx_desc(struct cndm_priv *priv, int index, int napi_budget)
 {
 	struct device *dev = priv->dev;
 	struct cndm_tx_info *tx_info = &priv->tx_info[index];
 	struct sk_buff *skb = tx_info->skb;
 	netdev_dbg(priv->ndev, "Free TX desc index %d", index);
 	dma_unmap_single(dev, tx_info->dma_addr, tx_info->len, DMA_TO_DEVICE);
 	tx_info->dma_addr = 0;
 	napi_consume_skb(skb, napi_budget);
 	tx_info->skb = NULL;
 }
 int cndm_free_tx_buf(struct cndm_priv *priv)
 {
 	u32 index;
 	int cnt = 0;
 	while (priv->txq_prod != priv->txq_cons) {
 		index = priv->txq_cons & priv->txq_mask;
 		cndm_free_tx_desc(priv, index, 0);
 		priv->txq_cons++;
 		cnt++;
 	}
 	return cnt;
 }
 static int cndm_process_tx_cq(struct net_device *ndev, int napi_budget)
 {
 	struct cndm_priv *priv = netdev_priv(ndev);
 	struct cndm_tx_info *tx_info;
 	struct cndm_cpl *cpl;
 	struct skb_shared_hwtstamps hwts;
 	int done = 0;
 	u32 cq_cons_ptr;
 	u32 cq_index;
 	u32 cons_ptr;
 	u32 index;
 	cq_cons_ptr = priv->txcq_cons;
 	cons_ptr = priv->txq_cons;
 	while (done < napi_budget) {
 		cq_index = cq_cons_ptr & priv->txcq_mask;
 		cpl = (struct cndm_cpl *)(priv->txcq_region + cq_index * 16);
 		if (!!(cpl->phase & 0x80) == !!(cq_cons_ptr & priv->txcq_size))
 			break;
 		dma_rmb();
 		index = cons_ptr & priv->txq_mask;
 		tx_info = &priv->tx_info[index];
 		// TX hardware timestamp
 		if (unlikely(tx_info->ts_requested)) {
 			netdev_dbg(priv->ndev, "%s: TX TS requested", __func__);
 			hwts.hwtstamp = cndm_read_cpl_ts(priv, cpl);
 			skb_tstamp_tx(tx_info->skb, &hwts);
 		}
 		cndm_free_tx_desc(priv, index, napi_budget);
 		done++;
 		cq_cons_ptr++;
 		cons_ptr++;
 	}
 	priv->txcq_cons = cq_cons_ptr;
 	priv->txq_cons = cons_ptr;
 	if (netif_tx_queue_stopped(priv->tx_queue) && (done != 0 || priv->txq_prod == priv->txq_cons))
 		netif_tx_wake_queue(priv->tx_queue);
 	return done;
 }
 int cndm_poll_tx_cq(struct napi_struct *napi, int budget)
 {
 	struct cndm_priv *priv = container_of(napi, struct cndm_priv, tx_napi);
 	int done;
 	done = cndm_process_tx_cq(priv->ndev, budget);
 	if (done == budget)
 		return done;
 	napi_complete(napi);
 	// TODO re-enable interrupts
 	return done;
 }
 int cndm_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 {
 	struct skb_shared_info *shinfo = skb_shinfo(skb);
 	struct cndm_priv *priv = netdev_priv(ndev);
 	struct device *dev = priv->dev;
 	u32 index;
 	u32 cons_ptr;
 	u32 len;
 	dma_addr_t dma_addr;
 	struct cndm_desc *tx_desc;
 	struct cndm_tx_info *tx_info;
 	netdev_dbg(ndev, "Got packet for TX");
 	if (skb->len < ETH_HLEN) {
 		netdev_warn(ndev, "Dropping short frame");
 		goto tx_drop;
 	}
 	cons_ptr = READ_ONCE(priv->txq_cons);
 	index = priv->txq_prod & priv->txq_mask;
 	tx_desc = (struct cndm_desc *)(priv->txq_region + index*16);
 	tx_info = &priv->tx_info[index];
 	// TX hardware timestamp
 	tx_info->ts_requested = 0;
 	if (unlikely(shinfo->tx_flags & SKBTX_HW_TSTAMP)) {
 		netdev_dbg(ndev, "%s: TX TS requested", __func__);
 		shinfo->tx_flags |= SKBTX_IN_PROGRESS;
 		tx_info->ts_requested = 1;
 	}
 	len = skb_headlen(skb);
 	dma_addr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
 	if (unlikely(dma_mapping_error(dev, dma_addr))) {
 		netdev_err(ndev, "Mapping failed");
 		goto tx_drop;
 	}
 	tx_desc->len = cpu_to_le32(len);
 	tx_desc->addr = cpu_to_le64(dma_addr);
 	tx_info->skb = skb;
 	tx_info->len = len;
 	tx_info->dma_addr = dma_addr;
 	netdev_dbg(ndev, "Write desc index %d len %d", index, len);
 	priv->txq_prod++;
 	skb_tx_timestamp(skb);
 	if (priv->txq_prod - priv->txq_cons >= 128) {
 		netdev_dbg(ndev, "TX ring full");
 		netif_tx_stop_queue(priv->tx_queue);
 	}
 	dma_wmb();
 	iowrite32(priv->txq_prod & 0xffff, priv->hw_addr + priv->txq_db_offs);
 	return NETDEV_TX_OK;
 tx_drop:
 	dev_kfree_skb_any(skb);
 	return NETDEV_TX_OK;
 }