From 74e49a77e2e07d8e96a486d0db32b45726f29ddd Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Wed, 31 Dec 2025 17:32:18 -0800 Subject: [PATCH] cndm_proto: Initial commit of corundum-proto Signed-off-by: Alex Forencich --- src/cndm_proto/lib/taxi | 1 + src/cndm_proto/modules/cndm_proto/Makefile | 31 + .../modules/cndm_proto/cndm_proto.h | 125 ++++ .../modules/cndm_proto/cndm_proto_ddcmd.sh | 28 + .../modules/cndm_proto/cndm_proto_main.c | 168 ++++++ .../modules/cndm_proto/cndm_proto_netdev.c | 243 ++++++++ .../modules/cndm_proto/cndm_proto_rx.c | 186 ++++++ .../modules/cndm_proto/cndm_proto_tx.c | 156 +++++ src/cndm_proto/rtl/cndm_proto_core.f | 14 + src/cndm_proto/rtl/cndm_proto_core.sv | 299 ++++++++++ src/cndm_proto/rtl/cndm_proto_cpl_wr.sv | 227 ++++++++ src/cndm_proto/rtl/cndm_proto_desc_rd.sv | 266 +++++++++ src/cndm_proto/rtl/cndm_proto_pcie_us.f | 5 + src/cndm_proto/rtl/cndm_proto_pcie_us.sv | 465 +++++++++++++++ src/cndm_proto/rtl/cndm_proto_port.sv | 544 ++++++++++++++++++ src/cndm_proto/rtl/cndm_proto_rx.sv | 211 +++++++ src/cndm_proto/rtl/cndm_proto_tx.sv | 205 +++++++ src/cndm_proto/tb/cndm_proto.py | 276 +++++++++ src/cndm_proto/tb/cndm_proto_pcie_us/Makefile | 59 ++ .../tb/cndm_proto_pcie_us/cndm_proto.py | 1 + .../test_cndm_proto_pcie_us.py | 473 +++++++++++++++ .../test_cndm_proto_pcie_us.sv | 228 ++++++++ 22 files changed, 4211 insertions(+) create mode 120000 src/cndm_proto/lib/taxi create mode 100644 src/cndm_proto/modules/cndm_proto/Makefile create mode 100644 src/cndm_proto/modules/cndm_proto/cndm_proto.h create mode 100755 src/cndm_proto/modules/cndm_proto/cndm_proto_ddcmd.sh create mode 100644 src/cndm_proto/modules/cndm_proto/cndm_proto_main.c create mode 100644 src/cndm_proto/modules/cndm_proto/cndm_proto_netdev.c create mode 100644 src/cndm_proto/modules/cndm_proto/cndm_proto_rx.c create mode 100644 src/cndm_proto/modules/cndm_proto/cndm_proto_tx.c create mode 100644 src/cndm_proto/rtl/cndm_proto_core.f create mode 100644 src/cndm_proto/rtl/cndm_proto_core.sv create mode 100644 src/cndm_proto/rtl/cndm_proto_cpl_wr.sv create mode 100644 src/cndm_proto/rtl/cndm_proto_desc_rd.sv create mode 100644 src/cndm_proto/rtl/cndm_proto_pcie_us.f create mode 100644 src/cndm_proto/rtl/cndm_proto_pcie_us.sv create mode 100644 src/cndm_proto/rtl/cndm_proto_port.sv create mode 100644 src/cndm_proto/rtl/cndm_proto_rx.sv create mode 100644 src/cndm_proto/rtl/cndm_proto_tx.sv create mode 100644 src/cndm_proto/tb/cndm_proto.py create mode 100644 src/cndm_proto/tb/cndm_proto_pcie_us/Makefile create mode 120000 src/cndm_proto/tb/cndm_proto_pcie_us/cndm_proto.py create mode 100644 src/cndm_proto/tb/cndm_proto_pcie_us/test_cndm_proto_pcie_us.py create mode 100644 src/cndm_proto/tb/cndm_proto_pcie_us/test_cndm_proto_pcie_us.sv diff --git a/src/cndm_proto/lib/taxi b/src/cndm_proto/lib/taxi new file mode 120000 index 0000000..1b20c9f --- /dev/null +++ b/src/cndm_proto/lib/taxi @@ -0,0 +1 @@ +../../../ \ No newline at end of file diff --git a/src/cndm_proto/modules/cndm_proto/Makefile b/src/cndm_proto/modules/cndm_proto/Makefile new file mode 100644 index 0000000..2ac98a5 --- /dev/null +++ b/src/cndm_proto/modules/cndm_proto/Makefile @@ -0,0 +1,31 @@ +# SPDX-License-Identifier: GPL +# Copyright (c) 2025 FPGA Ninja + +ifneq ($(KERNELRELEASE),) + +obj-m += cndm_proto.o +cndm_proto-y += cndm_proto_main.o +cndm_proto-y += cndm_proto_netdev.o +cndm_proto-y += cndm_proto_tx.o +cndm_proto-y += cndm_proto_rx.o + +ifneq ($(DEBUG),) +ccflags-y += -DDEBUG +endif + +else + +ifneq ($(KERNEL_SRC),) +KDIR ?= $(KERNEL_SRC) +endif + +KDIR ?= /lib/modules/$(shell uname -r)/build + +all: modules + +help modules modules_install clean: + $(MAKE) -C $(KDIR) M=$(shell pwd) $@ + +install: modules_install + +endif diff --git a/src/cndm_proto/modules/cndm_proto/cndm_proto.h b/src/cndm_proto/modules/cndm_proto/cndm_proto.h new file mode 100644 index 0000000..94280f4 --- /dev/null +++ b/src/cndm_proto/modules/cndm_proto/cndm_proto.h @@ -0,0 +1,125 @@ +/* SPDX-License-Identifier: GPL */ +// Copyright (c) 2025 FPGA Ninja + +#ifndef CNDM_PROTO_H +#define CNDM_PROTO_H + +#include +#include +#include +#include + +#define DRIVER_NAME "cndm_proto" +#define DRIVER_VERSION "0.1" + +struct cndm_proto_dev { + struct pci_dev *pdev; + struct device *dev; + + struct net_device *ndev[32]; + + void __iomem *bar; + resource_size_t bar_len; + + u32 port_count; + u32 port_offset; + u32 port_stride; +}; + +struct cndm_proto_tx_info { + struct sk_buff *skb; + dma_addr_t dma_addr; + u32 len; +}; + +struct cndm_proto_rx_info { + struct page *page; + dma_addr_t dma_addr; + u32 len; +}; + +struct cndm_proto_priv { + struct device *dev; + struct net_device *ndev; + struct cndm_proto_dev *cdev; + + bool registered; + bool port_up; + + void __iomem *hw_addr; + + size_t txq_region_len; + void *txq_region; + dma_addr_t txq_region_addr; + + struct cndm_proto_tx_info *tx_info; + struct cndm_proto_rx_info *rx_info; + + struct netdev_queue *tx_queue; + + struct napi_struct tx_napi; + struct napi_struct rx_napi; + + u32 txq_log_size; + u32 txq_size; + u32 txq_mask; + u32 txq_prod; + u32 txq_cons; + + size_t rxq_region_len; + void *rxq_region; + dma_addr_t rxq_region_addr; + + u32 rxq_log_size; + u32 rxq_size; + u32 rxq_mask; + u32 rxq_prod; + u32 rxq_cons; + + size_t txcq_region_len; + void *txcq_region; + dma_addr_t txcq_region_addr; + + u32 txcq_log_size; + u32 txcq_size; + u32 txcq_mask; + u32 txcq_prod; + u32 txcq_cons; + + size_t rxcq_region_len; + void *rxcq_region; + dma_addr_t rxcq_region_addr; + + u32 rxcq_log_size; + u32 rxcq_size; + u32 rxcq_mask; + u32 rxcq_prod; + u32 rxcq_cons; +}; + +struct cndm_proto_desc { + __u8 rsvd[4]; + __le32 len; + __le64 addr; +}; + +struct cndm_proto_cpl { + __u8 rsvd[4]; + __le32 len; + __u8 rsvd2[7]; + __u8 phase; +}; + +irqreturn_t cndm_proto_irq(int irqn, void *data); +struct net_device *cndm_proto_create_netdev(struct cndm_proto_dev *cdev, int port, void __iomem *hw_addr); +void cndm_proto_destroy_netdev(struct net_device *ndev); + +int cndm_proto_free_tx_buf(struct cndm_proto_priv *priv); +int cndm_proto_poll_tx_cq(struct napi_struct *napi, int budget); +int cndm_proto_start_xmit(struct sk_buff *skb, struct net_device *ndev); + +int cndm_proto_free_rx_buf(struct cndm_proto_priv *priv); +int cndm_proto_refill_rx_buffers(struct cndm_proto_priv *priv); +int cndm_proto_poll_rx_cq(struct napi_struct *napi, int budget); + +#endif diff --git a/src/cndm_proto/modules/cndm_proto/cndm_proto_ddcmd.sh b/src/cndm_proto/modules/cndm_proto/cndm_proto_ddcmd.sh new file mode 100755 index 0000000..e46bd67 --- /dev/null +++ b/src/cndm_proto/modules/cndm_proto/cndm_proto_ddcmd.sh @@ -0,0 +1,28 @@ +#!/bin/sh + +module=cndm_proto + +control=/proc/dynamic_debug/control + +if ! test -f $control; then + control=/sys/kernel/debug/dynamic_debug/control +fi + +if ! test -f $control; then + >&2 echo "Error: dynamic debug control file not found" + exit 1 +fi + +if [ $# -eq 0 ]; then + >&2 echo "Error: no argument provided" + >&2 echo "usage: $0 [stmt]" + >&2 echo "Disable all debug print statements: $0 =_" + >&2 echo "Enable all debug print statements: $0 =p" + >&2 echo "More verbose: $0 =pflmt" + >&2 echo "Pattern match: $0 format \"some-string\" =p" + >&2 echo "Current configuration:" + grep "\[$module\]" $control >&2 + exit 1 +fi + +echo module $module "${@@Q}" > $control diff --git a/src/cndm_proto/modules/cndm_proto/cndm_proto_main.c b/src/cndm_proto/modules/cndm_proto/cndm_proto_main.c new file mode 100644 index 0000000..6481687 --- /dev/null +++ b/src/cndm_proto/modules/cndm_proto/cndm_proto_main.c @@ -0,0 +1,168 @@ +// SPDX-License-Identifier: GPL +// Copyright (c) 2025 FPGA Ninja + +#include "cndm_proto.h" +#include +#include + +MODULE_DESCRIPTION("Corundum-proto device driver"); +MODULE_AUTHOR("FPGA Ninja"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(DRIVER_VERSION); + +static int cndm_proto_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + struct device *dev = &pdev->dev; + struct cndm_proto_dev *cdev; + int ret = 0; + int k; + + dev_info(dev, DRIVER_NAME " PCI probe"); + dev_info(dev, "Corundum-proto device driver"); + dev_info(dev, "Version " DRIVER_VERSION); + dev_info(dev, "Copyright (c) 2025 FPGA Ninja"); + dev_info(dev, "https://fpga.ninja/"); + + pcie_print_link_status(pdev); + + cdev = devm_kzalloc(dev, sizeof(struct cndm_proto_dev), GFP_KERNEL); + if (!cdev) + return -ENOMEM; + + cdev->pdev = pdev; + cdev->dev = dev; + pci_set_drvdata(pdev, cdev); + + ret = pci_enable_device_mem(pdev); + if (ret) { + dev_err(dev, "Failed to enable device"); + goto fail_enable_device; + } + + pci_set_master(pdev); + + ret = pci_request_regions(pdev, DRIVER_NAME); + if (ret) { + dev_err(dev, "Failed to reserve regions"); + goto fail_regions; + } + + cdev->bar_len = pci_resource_len(pdev, 0); + + dev_info(dev, "BAR size: %llu", cdev->bar_len); + cdev->bar = pci_ioremap_bar(pdev, 0); + if (!cdev->bar) { + ret = -ENOMEM; + dev_err(dev, "Failed to map BAR 0"); + goto fail_map_bars; + } + + if (ioread32(cdev->bar + 0x0000) == 0xffffffff) { + ret = -EIO; + dev_err(dev, "Device needs to be reset"); + goto fail_map_bars; + } + + ret = pci_alloc_irq_vectors(pdev, 1, 32, PCI_IRQ_MSI | PCI_IRQ_MSIX); + if (ret < 0) { + dev_err(dev, "Failed to allocate IRQs"); + goto fail_map_bars; + } + + cdev->port_count = ioread32(cdev->bar + 0x0100); + cdev->port_offset = ioread32(cdev->bar + 0x0104); + cdev->port_stride = ioread32(cdev->bar + 0x0108); + + dev_info(dev, "Port count: %d", cdev->port_count); + dev_info(dev, "Port offset: 0x%x", cdev->port_offset); + dev_info(dev, "Port stride: 0x%x", cdev->port_stride); + + for (k = 0; k < cdev->port_count; k++) { + struct net_device *ndev; + + ndev = cndm_proto_create_netdev(cdev, k, cdev->bar + cdev->port_offset + (cdev->port_stride*k)); + if (IS_ERR_OR_NULL(ndev)) { + ret = PTR_ERR(ndev); + goto fail_netdev; + } + + ret = pci_request_irq(pdev, k, cndm_proto_irq, 0, ndev, DRIVER_NAME); + if (ret < 0) { + dev_err(dev, "Failed to request IRQ"); + cndm_proto_destroy_netdev(ndev); + goto fail_netdev; + } + + cdev->ndev[k] = ndev; + } + + return 0; + +fail_netdev: + for (k = 0; k < 32; k++) { + if (cdev->ndev[k]) { + pci_free_irq(pdev, k, cdev->ndev[k]); + cndm_proto_destroy_netdev(cdev->ndev[k]); + cdev->ndev[k] = NULL; + } + } + pci_free_irq_vectors(pdev); +fail_map_bars: + if (cdev->bar) + pci_iounmap(pdev, cdev->bar); + pci_release_regions(pdev); +fail_regions: + pci_clear_master(pdev); + pci_disable_device(pdev); +fail_enable_device: + return ret; +} + +static void cndm_proto_pci_remove(struct pci_dev *pdev) +{ + struct device *dev = &pdev->dev; + struct cndm_proto_dev *cdev = pci_get_drvdata(pdev); + int k; + + dev_info(dev, DRIVER_NAME " PCI remove"); + + for (k = 0; k < 32; k++) { + if (cdev->ndev[k]) { + pci_free_irq(pdev, k, cdev->ndev[k]); + cndm_proto_destroy_netdev(cdev->ndev[k]); + cdev->ndev[k] = NULL; + } + } + + pci_free_irq_vectors(pdev); + if (cdev->bar) + pci_iounmap(pdev, cdev->bar); + pci_release_regions(pdev); + pci_clear_master(pdev); + pci_disable_device(pdev); +} + +static const struct pci_device_id cndm_proto_pci_id_table[] = { + {PCI_DEVICE(0x1234, 0xC070)}, + {0} +}; + +static struct pci_driver cndm_proto_driver = { + .name = DRIVER_NAME, + .id_table = cndm_proto_pci_id_table, + .probe = cndm_proto_pci_probe, + .remove = cndm_proto_pci_remove +}; + +static int __init cndm_proto_init(void) +{ + return pci_register_driver(&cndm_proto_driver); +} + +static void __exit cndm_proto_exit(void) +{ + pci_unregister_driver(&cndm_proto_driver); +} + +module_init(cndm_proto_init); +module_exit(cndm_proto_exit); diff --git a/src/cndm_proto/modules/cndm_proto/cndm_proto_netdev.c b/src/cndm_proto/modules/cndm_proto/cndm_proto_netdev.c new file mode 100644 index 0000000..3f9a66b --- /dev/null +++ b/src/cndm_proto/modules/cndm_proto/cndm_proto_netdev.c @@ -0,0 +1,243 @@ +// SPDX-License-Identifier: GPL +// Copyright (c) 2025 FPGA Ninja + +#include "cndm_proto.h" + +static int cndm_proto_open(struct net_device *ndev) +{ + struct cndm_proto_priv *priv = netdev_priv(ndev); + + cndm_proto_refill_rx_buffers(priv); + + priv->tx_queue = netdev_get_tx_queue(ndev, 0); + + netif_napi_add_tx(ndev, &priv->tx_napi, cndm_proto_poll_tx_cq); + napi_enable(&priv->tx_napi); + netif_napi_add(ndev, &priv->rx_napi, cndm_proto_poll_rx_cq); + napi_enable(&priv->rx_napi); + + netif_tx_start_all_queues(ndev); + netif_carrier_on(ndev); + netif_device_attach(ndev); + + priv->port_up = 1; + + return 0; +} + +static int cndm_proto_close(struct net_device *ndev) +{ + struct cndm_proto_priv *priv = netdev_priv(ndev); + + priv->port_up = 0; + + napi_disable(&priv->tx_napi); + netif_napi_del(&priv->tx_napi); + napi_disable(&priv->rx_napi); + netif_napi_del(&priv->rx_napi); + + netif_tx_stop_all_queues(ndev); + netif_carrier_off(ndev); + netif_tx_disable(ndev); + + return 0; +} + +static const struct net_device_ops cndm_proto_netdev_ops = { + .ndo_open = cndm_proto_open, + .ndo_stop = cndm_proto_close, + .ndo_start_xmit = cndm_proto_start_xmit, +}; + +irqreturn_t cndm_proto_irq(int irqn, void *data) +{ + struct net_device *ndev = data; + struct cndm_proto_priv *priv = netdev_priv(ndev); + + netdev_dbg(ndev, "Interrupt"); + + if (priv->port_up) { + napi_schedule_irqoff(&priv->tx_napi); + napi_schedule_irqoff(&priv->rx_napi); + } + + return IRQ_HANDLED; +} + +struct net_device *cndm_proto_create_netdev(struct cndm_proto_dev *cdev, int port, void __iomem *hw_addr) +{ + struct device *dev = cdev->dev; + struct net_device *ndev; + struct cndm_proto_priv *priv; + int ret = 0; + + ndev = alloc_etherdev_mqs(sizeof(*priv), 1, 1); + if (!ndev) { + dev_err(dev, "Failed to allocate net_device"); + return ERR_PTR(-ENOMEM); + } + + SET_NETDEV_DEV(ndev, dev); + ndev->dev_port = port; + + priv = netdev_priv(ndev); + memset(priv, 0, sizeof(*priv)); + + priv->dev = dev; + priv->ndev = ndev; + priv->cdev = cdev; + + priv->hw_addr = hw_addr; + + netif_set_real_num_tx_queues(ndev, 1); + netif_set_real_num_rx_queues(ndev, 1); + + ndev->addr_len = ETH_ALEN; + + eth_hw_addr_random(ndev); + + ndev->netdev_ops = &cndm_proto_netdev_ops; + + ndev->hw_features = 0; + ndev->features = 0; + + ndev->min_mtu = ETH_MIN_MTU; + ndev->max_mtu = 1500; + + priv->rxq_log_size = ilog2(256); + priv->rxq_size = 1 << priv->rxq_log_size; + priv->rxq_mask = priv->rxq_size-1; + priv->rxq_prod = 0; + priv->rxq_cons = 0; + + priv->txq_log_size = ilog2(256); + priv->txq_size = 1 << priv->txq_log_size; + priv->txq_mask = priv->txq_size-1; + priv->txq_prod = 0; + priv->txq_cons = 0; + + priv->rxcq_log_size = ilog2(256); + priv->rxcq_size = 1 << priv->rxcq_log_size; + priv->rxcq_mask = priv->rxcq_size-1; + priv->rxcq_prod = 0; + priv->rxcq_cons = 0; + + priv->txcq_log_size = ilog2(256); + priv->txcq_size = 1 << priv->txcq_log_size; + priv->txcq_mask = priv->txcq_size-1; + priv->txcq_prod = 0; + priv->txcq_cons = 0; + + // allocate DMA buffers + priv->txq_region_len = priv->txq_size*16; + priv->txq_region = dma_alloc_coherent(dev, priv->txq_region_len, &priv->txq_region_addr, GFP_KERNEL | __GFP_ZERO); + if (!priv->txq_region) { + ret = -ENOMEM; + goto fail; + } + + priv->rxq_region_len = priv->rxq_size*16; + priv->rxq_region = dma_alloc_coherent(dev, priv->rxq_region_len, &priv->rxq_region_addr, GFP_KERNEL | __GFP_ZERO); + if (!priv->rxq_region) { + ret = -ENOMEM; + goto fail; + } + + priv->txcq_region_len = priv->txcq_size*16; + priv->txcq_region = dma_alloc_coherent(dev, priv->txcq_region_len, &priv->txcq_region_addr, GFP_KERNEL | __GFP_ZERO); + if (!priv->txcq_region) { + ret = -ENOMEM; + goto fail; + } + + priv->rxcq_region_len = priv->rxcq_size*16; + priv->rxcq_region = dma_alloc_coherent(dev, priv->rxcq_region_len, &priv->rxcq_region_addr, GFP_KERNEL | __GFP_ZERO); + if (!priv->rxcq_region) { + ret = -ENOMEM; + goto fail; + } + + // allocate info rings + priv->tx_info = kvzalloc(sizeof(*priv->tx_info) * priv->txq_size, GFP_KERNEL); + if (!priv->tx_info) { + ret = -ENOMEM; + goto fail; + } + + priv->rx_info = kvzalloc(sizeof(*priv->rx_info) * priv->rxq_size, GFP_KERNEL); + if (!priv->tx_info) { + ret = -ENOMEM; + goto fail; + } + + iowrite32(0x00000000, priv->hw_addr + 0x200); + iowrite32(priv->rxq_prod & 0xffff, priv->hw_addr + 0x204); + iowrite32(priv->rxq_region_addr & 0xffffffff, priv->hw_addr + 0x208); + iowrite32(priv->rxq_region_addr >> 32, priv->hw_addr + 0x20c); + iowrite32(0x00000001 | (priv->rxq_log_size << 16), priv->hw_addr + 0x200); + + iowrite32(0x00000000, priv->hw_addr + 0x100); + iowrite32(priv->txq_prod & 0xffff, priv->hw_addr + 0x104); + iowrite32(priv->txq_region_addr & 0xffffffff, priv->hw_addr + 0x108); + iowrite32(priv->txq_region_addr >> 32, priv->hw_addr + 0x10c); + iowrite32(0x00000001 | (priv->txq_log_size << 16), priv->hw_addr + 0x100); + + iowrite32(0x00000000, priv->hw_addr + 0x400); + iowrite32(priv->rxcq_region_addr & 0xffffffff, priv->hw_addr + 0x408); + iowrite32(priv->rxcq_region_addr >> 32, priv->hw_addr + 0x40c); + iowrite32(0x00000001 | (priv->rxcq_log_size << 16), priv->hw_addr + 0x400); + + iowrite32(0x00000000, priv->hw_addr + 0x300); + iowrite32(priv->txcq_region_addr & 0xffffffff, priv->hw_addr + 0x308); + iowrite32(priv->txcq_region_addr >> 32, priv->hw_addr + 0x30c); + iowrite32(0x00000001 | (priv->txcq_log_size << 16), priv->hw_addr + 0x300); + + netif_carrier_off(ndev); + + ret = register_netdev(ndev); + if (ret) { + dev_err(dev, "netdev registration failed"); + goto fail; + } + + priv->registered = 1; + + return ndev; + +fail: + cndm_proto_destroy_netdev(ndev); + return ERR_PTR(ret); +} + +void cndm_proto_destroy_netdev(struct net_device *ndev) +{ + struct cndm_proto_priv *priv = netdev_priv(ndev); + struct device *dev = priv->dev; + + iowrite32(0x00000000, priv->hw_addr + 0x200); + iowrite32(0x00000000, priv->hw_addr + 0x100); + iowrite32(0x00000000, priv->hw_addr + 0x400); + iowrite32(0x00000000, priv->hw_addr + 0x300); + + if (priv->registered) + unregister_netdev(ndev); + + if (priv->tx_info) { + cndm_proto_free_tx_buf(priv); + kvfree(priv->tx_info); + } + if (priv->rx_info) { + cndm_proto_free_rx_buf(priv); + kvfree(priv->rx_info); + } + if (priv->txq_region) + dma_free_coherent(dev, priv->txq_region_len, priv->txq_region, priv->txq_region_addr); + if (priv->rxq_region) + dma_free_coherent(dev, priv->rxq_region_len, priv->rxq_region, priv->rxq_region_addr); + if (priv->txcq_region) + dma_free_coherent(dev, priv->txcq_region_len, priv->txcq_region, priv->txcq_region_addr); + if (priv->rxcq_region) + dma_free_coherent(dev, priv->rxcq_region_len, priv->rxcq_region, priv->rxcq_region_addr); + + free_netdev(ndev); +} diff --git a/src/cndm_proto/modules/cndm_proto/cndm_proto_rx.c b/src/cndm_proto/modules/cndm_proto/cndm_proto_rx.c new file mode 100644 index 0000000..90e4919 --- /dev/null +++ b/src/cndm_proto/modules/cndm_proto/cndm_proto_rx.c @@ -0,0 +1,186 @@ +// SPDX-License-Identifier: GPL +// Copyright (c) 2025 FPGA Ninja + +#include "cndm_proto.h" + +static void cndm_proto_free_rx_desc(struct cndm_proto_priv *priv, int index) +{ + struct device *dev = priv->dev; + struct cndm_proto_rx_info *rx_info = &priv->rx_info[index]; + + netdev_dbg(priv->ndev, "Free RX desc index %d", index); + + if (!rx_info->page) + return; + + dma_unmap_page(dev, rx_info->dma_addr, rx_info->len, DMA_FROM_DEVICE); + rx_info->dma_addr = 0; + __free_pages(rx_info->page, 0); + rx_info->page = NULL; +} + +int cndm_proto_free_rx_buf(struct cndm_proto_priv *priv) +{ + u32 index; + int cnt = 0; + + while (priv->rxq_prod != priv->rxq_cons) { + index = priv->rxq_cons & priv->rxq_mask; + cndm_proto_free_rx_desc(priv, index); + priv->rxq_cons++; + cnt++; + } + + return cnt; +} + +static int cndm_proto_prepare_rx_desc(struct cndm_proto_priv *priv, int index) +{ + struct device *dev = priv->dev; + struct cndm_proto_rx_info *rx_info = &priv->rx_info[index]; + struct cndm_proto_desc *rx_desc = (struct cndm_proto_desc *)(priv->rxq_region + index*16); + struct page *page; + u32 len = PAGE_SIZE; + dma_addr_t dma_addr; + + netdev_dbg(priv->ndev, "Prepare RX desc index %d", index); + + page = dev_alloc_pages(0); + if (unlikely(!page)) { + netdev_err(priv->ndev, "Failed to allocate page"); + return -ENOMEM; + } + + dma_addr = dma_map_page(dev, page, 0, len, DMA_FROM_DEVICE); + + if (unlikely(dma_mapping_error(dev, dma_addr))) { + netdev_err(priv->ndev, "Mapping failed"); + __free_pages(page, 0); + return -1; + } + + rx_desc->len = cpu_to_le32(len); + rx_desc->addr = cpu_to_le64(dma_addr); + + rx_info->page = page; + rx_info->len = len; + rx_info->dma_addr = dma_addr; + + return 0; +} + +int cndm_proto_refill_rx_buffers(struct cndm_proto_priv *priv) +{ + u32 missing = 128 - (priv->rxq_prod - priv->rxq_cons); // TODO + int ret = 0; + + if (missing < 8) + return 0; + + for (; missing-- > 0;) { + ret = cndm_proto_prepare_rx_desc(priv, priv->rxq_prod & priv->rxq_mask); + if (ret) + break; + priv->rxq_prod++; + } + + dma_wmb(); + iowrite32(priv->rxq_prod & 0xffff, priv->hw_addr + 0x204); + + return ret; +} + +static int cndm_proto_process_rx_cq(struct net_device *ndev, int napi_budget) +{ + struct cndm_proto_priv *priv = netdev_priv(ndev); + struct cndm_proto_cpl *cpl; + struct cndm_proto_rx_info *rx_info; + struct sk_buff *skb; + struct page *page; + int done = 0; + u32 len; + + u32 cq_cons_ptr; + u32 cq_index; + u32 cons_ptr; + u32 index; + + cq_cons_ptr = priv->rxcq_cons; + cons_ptr = priv->rxq_cons; + + while (done < napi_budget) { + cq_index = cq_cons_ptr & priv->rxcq_mask; + cpl = (struct cndm_proto_cpl *)(priv->rxcq_region + cq_index * 16); + + if (!!(cpl->phase & 0x80) == !!(cq_cons_ptr & priv->rxcq_size)) + break; + + dma_rmb(); + + index = cons_ptr & priv->rxq_mask; + + rx_info = &priv->rx_info[index]; + page = rx_info->page; + len = min_t(u32, le16_to_cpu(cpl->len), rx_info->len); + + netdev_dbg(priv->ndev, "Process RX cpl index %d", index); + + if (!page) { + netdev_err(priv->ndev, "Null page at index %d", index); + break; + } + + dma_unmap_page(priv->dev, rx_info->dma_addr, rx_info->len, DMA_FROM_DEVICE); + rx_info->dma_addr = 0; + rx_info->page = NULL; + + if (len < ETH_HLEN) { + netdev_warn(priv->ndev, "Dropping short frame (len %d)", len); + goto rx_drop; + } + + skb = napi_get_frags(&priv->rx_napi); + if (!skb) { + netdev_err(priv->ndev, "Failed to allocate skb %d", index); + break; + } + + __skb_fill_page_desc(skb, 0, page, 0, len); + + skb_shinfo(skb)->nr_frags = 1; + skb->len = len; + skb->data_len = len; + skb->truesize = rx_info->len; + + napi_gro_frags(&priv->rx_napi); + +rx_drop: + done++; + cq_cons_ptr++; + cons_ptr++; + } + + priv->rxcq_cons = cq_cons_ptr; + priv->rxq_cons = cons_ptr; + + cndm_proto_refill_rx_buffers(priv); + + return done; +} + +int cndm_proto_poll_rx_cq(struct napi_struct *napi, int budget) +{ + struct cndm_proto_priv *priv = container_of(napi, struct cndm_proto_priv, rx_napi); + int done; + + done = cndm_proto_process_rx_cq(priv->ndev, budget); + + if (done == budget) + return done; + + napi_complete(napi); + + // TODO re-enable interrupts + + return done; +} diff --git a/src/cndm_proto/modules/cndm_proto/cndm_proto_tx.c b/src/cndm_proto/modules/cndm_proto/cndm_proto_tx.c new file mode 100644 index 0000000..ddeeb84 --- /dev/null +++ b/src/cndm_proto/modules/cndm_proto/cndm_proto_tx.c @@ -0,0 +1,156 @@ +// SPDX-License-Identifier: GPL +// Copyright (c) 2025 FPGA Ninja + +#include "cndm_proto.h" + +static void cndm_proto_free_tx_desc(struct cndm_proto_priv *priv, int index, int napi_budget) +{ + struct device *dev = priv->dev; + struct cndm_proto_tx_info *tx_info = &priv->tx_info[index]; + struct sk_buff *skb = tx_info->skb; + + netdev_dbg(priv->ndev, "Free TX desc index %d", index); + + dma_unmap_single(dev, tx_info->dma_addr, tx_info->len, DMA_TO_DEVICE); + tx_info->dma_addr = 0; + + napi_consume_skb(skb, napi_budget); + tx_info->skb = NULL; +} + +int cndm_proto_free_tx_buf(struct cndm_proto_priv *priv) +{ + u32 index; + int cnt = 0; + + while (priv->txq_prod != priv->txq_cons) { + index = priv->txq_cons & priv->txq_mask; + cndm_proto_free_tx_desc(priv, index, 0); + priv->txq_cons++; + cnt++; + } + + return cnt; +} + +static int cndm_proto_process_tx_cq(struct net_device *ndev, int napi_budget) +{ + struct cndm_proto_priv *priv = netdev_priv(ndev); + struct cndm_proto_cpl *cpl; + int done = 0; + + u32 cq_cons_ptr; + u32 cq_index; + u32 cons_ptr; + u32 index; + + cq_cons_ptr = priv->txcq_cons; + cons_ptr = priv->txq_cons; + + while (done < napi_budget) { + cq_index = cq_cons_ptr & priv->txcq_mask; + cpl = (struct cndm_proto_cpl *)(priv->txcq_region + cq_index * 16); + + if (!!(cpl->phase & 0x80) == !!(cq_cons_ptr & priv->txcq_size)) + break; + + dma_rmb(); + + index = cons_ptr & priv->txq_mask; + + cndm_proto_free_tx_desc(priv, index, napi_budget); + + done++; + cq_cons_ptr++; + cons_ptr++; + } + + priv->txcq_cons = cq_cons_ptr; + priv->txq_cons = cons_ptr; + + if (netif_tx_queue_stopped(priv->tx_queue) && (done != 0 || priv->txq_prod == priv->txq_cons)) + netif_tx_wake_queue(priv->tx_queue); + + return done; +} + +int cndm_proto_poll_tx_cq(struct napi_struct *napi, int budget) +{ + struct cndm_proto_priv *priv = container_of(napi, struct cndm_proto_priv, tx_napi); + int done; + + done = cndm_proto_process_tx_cq(priv->ndev, budget); + + if (done == budget) + return done; + + napi_complete(napi); + + // TODO re-enable interrupts + + return done; +} + +int cndm_proto_start_xmit(struct sk_buff *skb, struct net_device *ndev) +{ + struct cndm_proto_priv *priv = netdev_priv(ndev); + struct device *dev = priv->dev; + u32 index; + u32 cons_ptr; + u32 len; + dma_addr_t dma_addr; + struct cndm_proto_desc *tx_desc; + struct cndm_proto_tx_info *tx_info; + + netdev_dbg(ndev, "Got packet for TX"); + + // TODO workaround for MAC padding bug + if (skb_put_padto(skb, 60)) + goto tx_drop; + + if (skb->len < ETH_HLEN) { + netdev_warn(ndev, "Dropping short frame"); + goto tx_drop; + } + + cons_ptr = READ_ONCE(priv->txq_cons); + + index = priv->txq_prod & priv->txq_mask; + + tx_desc = (struct cndm_proto_desc *)(priv->txq_region + index*16); + tx_info = &priv->tx_info[index]; + + len = skb_headlen(skb); + + dma_addr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE); + + if (unlikely(dma_mapping_error(dev, dma_addr))) { + netdev_err(ndev, "Mapping failed"); + goto tx_drop; + } + + tx_desc->len = cpu_to_le32(len); + tx_desc->addr = cpu_to_le64(dma_addr); + + tx_info->skb = skb; + tx_info->len = len; + tx_info->dma_addr = dma_addr; + + netdev_dbg(ndev, "Write desc index %d len %d", index, len); + + priv->txq_prod++; + + if (priv->txq_prod - priv->txq_cons >= 128) { + netdev_dbg(ndev, "TX ring full"); + netif_tx_stop_queue(priv->tx_queue); + } + + dma_wmb(); + iowrite32(priv->txq_prod & 0xffff, priv->hw_addr + 0x104); + + return NETDEV_TX_OK; + +tx_drop: + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; +} diff --git a/src/cndm_proto/rtl/cndm_proto_core.f b/src/cndm_proto/rtl/cndm_proto_core.f new file mode 100644 index 0000000..8fe473d --- /dev/null +++ b/src/cndm_proto/rtl/cndm_proto_core.f @@ -0,0 +1,14 @@ +cndm_proto_core.sv +cndm_proto_port.sv +cndm_proto_rx.sv +cndm_proto_tx.sv +cndm_proto_desc_rd.sv +cndm_proto_cpl_wr.sv +../lib/taxi/src/dma/rtl/taxi_dma_client_axis_source.sv +../lib/taxi/src/dma/rtl/taxi_dma_client_axis_sink.sv +../lib/taxi/src/dma/rtl/taxi_dma_if_mux.f +../lib/taxi/src/dma/rtl/taxi_dma_psdpram.sv +../lib/taxi/src/axi/rtl/taxi_axil_interconnect_1s.f +../lib/taxi/src/axis/rtl/taxi_axis_async_fifo.f +../lib/taxi/src/axis/rtl/taxi_axis_arb_mux.f +../lib/taxi/src/axis/rtl/taxi_axis_demux.sv diff --git a/src/cndm_proto/rtl/cndm_proto_core.sv b/src/cndm_proto/rtl/cndm_proto_core.sv new file mode 100644 index 0000000..fd2b6d4 --- /dev/null +++ b/src/cndm_proto/rtl/cndm_proto_core.sv @@ -0,0 +1,299 @@ +// SPDX-License-Identifier: CERN-OHL-S-2.0 +/* + +Copyright (c) 2025 FPGA Ninja, LLC + +Authors: +- Alex Forencich + +*/ + +`resetall +`timescale 1ns / 1ps +`default_nettype none + +/* + * Corundum-proto core logic + */ +module cndm_proto_core #( + parameter PORTS = 2 +) +( + input wire logic clk, + input wire logic rst, + + /* + * Control register interface + */ + taxi_axil_if.wr_slv s_axil_wr, + taxi_axil_if.rd_slv s_axil_rd, + + /* + * DMA + */ + taxi_dma_desc_if.req_src dma_rd_desc_req, + taxi_dma_desc_if.sts_snk dma_rd_desc_sts, + taxi_dma_desc_if.req_src dma_wr_desc_req, + taxi_dma_desc_if.sts_snk dma_wr_desc_sts, + taxi_dma_ram_if.wr_slv dma_ram_wr, + taxi_dma_ram_if.rd_slv dma_ram_rd, + + output wire logic [PORTS-1:0] irq, + + /* + * Ethernet + */ + input wire logic mac_tx_clk[PORTS], + input wire logic mac_tx_rst[PORTS], + taxi_axis_if.src mac_axis_tx[PORTS], + taxi_axis_if.snk mac_axis_tx_cpl[PORTS], + + input wire logic mac_rx_clk[PORTS], + input wire logic mac_rx_rst[PORTS], + taxi_axis_if.snk mac_axis_rx[PORTS] +); + +localparam CL_PORTS = $clog2(PORTS); + +localparam AXIL_ADDR_W = s_axil_wr.ADDR_W; +localparam AXIL_DATA_W = s_axil_wr.DATA_W; + +localparam RAM_SEGS = dma_ram_wr.SEGS; +localparam RAM_SEG_ADDR_W = dma_ram_wr.SEG_ADDR_W; +localparam RAM_SEG_DATA_W = dma_ram_wr.SEG_DATA_W; +localparam RAM_SEG_BE_W = dma_ram_wr.SEG_BE_W; +localparam RAM_SEL_W = dma_ram_wr.SEL_W; + +localparam PORT_OFFSET = 1; + +taxi_axil_if #( + .DATA_W(s_axil_wr.DATA_W), + .ADDR_W(16), + .STRB_W(s_axil_wr.STRB_W), + .AWUSER_EN(s_axil_wr.AWUSER_EN), + .AWUSER_W(s_axil_wr.AWUSER_W), + .WUSER_EN(s_axil_wr.WUSER_EN), + .WUSER_W(s_axil_wr.WUSER_W), + .BUSER_EN(s_axil_wr.BUSER_EN), + .BUSER_W(s_axil_wr.BUSER_W), + .ARUSER_EN(s_axil_wr.ARUSER_EN), + .ARUSER_W(s_axil_wr.ARUSER_W), + .RUSER_EN(s_axil_wr.RUSER_EN), + .RUSER_W(s_axil_wr.RUSER_W) +) +s_axil_ctrl[PORTS+PORT_OFFSET](); + +taxi_axil_interconnect_1s #( + .M_COUNT($size(s_axil_ctrl)), + .ADDR_W(s_axil_wr.ADDR_W), + .M_REGIONS(1), + .M_BASE_ADDR('0), + .M_ADDR_W({$size(s_axil_ctrl){{1{32'd16}}}}), + .M_SECURE({$size(s_axil_ctrl){1'b0}}) +) +port_intercon_inst ( + .clk(clk), + .rst(rst), + + /* + * AXI4-lite slave interface + */ + .s_axil_wr(s_axil_wr), + .s_axil_rd(s_axil_rd), + + /* + * AXI4-lite master interfaces + */ + .m_axil_wr(s_axil_ctrl), + .m_axil_rd(s_axil_ctrl) +); + +logic s_axil_awready_reg = 1'b0; +logic s_axil_wready_reg = 1'b0; +logic s_axil_bvalid_reg = 1'b0; + +logic s_axil_arready_reg = 1'b0; +logic [AXIL_DATA_W-1:0] s_axil_rdata_reg = '0; +logic s_axil_rvalid_reg = 1'b0; + +assign s_axil_ctrl[0].awready = s_axil_awready_reg; +assign s_axil_ctrl[0].wready = s_axil_wready_reg; +assign s_axil_ctrl[0].bresp = '0; +assign s_axil_ctrl[0].buser = '0; +assign s_axil_ctrl[0].bvalid = s_axil_bvalid_reg; + +assign s_axil_ctrl[0].arready = s_axil_arready_reg; +assign s_axil_ctrl[0].rdata = s_axil_rdata_reg; +assign s_axil_ctrl[0].rresp = '0; +assign s_axil_ctrl[0].ruser = '0; +assign s_axil_ctrl[0].rvalid = s_axil_rvalid_reg; + +always_ff @(posedge clk) begin + s_axil_awready_reg <= 1'b0; + s_axil_wready_reg <= 1'b0; + s_axil_bvalid_reg <= s_axil_bvalid_reg && !s_axil_ctrl[0].bready; + + s_axil_arready_reg <= 1'b0; + s_axil_rvalid_reg <= s_axil_rvalid_reg && !s_axil_ctrl[0].rready; + + if (s_axil_ctrl[0].awvalid && s_axil_ctrl[0].wvalid && !s_axil_bvalid_reg) begin + s_axil_awready_reg <= 1'b1; + s_axil_wready_reg <= 1'b1; + s_axil_bvalid_reg <= 1'b1; + + case ({s_axil_ctrl[0].awaddr[15:2], 2'b00}) + // 16'h0100: reg <= s_axil_ctrl[0].wdata; + default: begin end + endcase + end + + if (s_axil_ctrl[0].arvalid && !s_axil_rvalid_reg) begin + s_axil_rdata_reg <= '0; + + s_axil_arready_reg <= 1'b1; + s_axil_rvalid_reg <= 1'b1; + + case ({s_axil_ctrl[0].araddr[15:2], 2'b00}) + 16'h0100: s_axil_rdata_reg <= PORTS; // port count + 16'h0104: s_axil_rdata_reg <= 32'h00010000; // port offset + 16'h0108: s_axil_rdata_reg <= 32'h00010000; // port stride + default: begin end + endcase + end + + if (rst) begin + s_axil_awready_reg <= 1'b0; + s_axil_wready_reg <= 1'b0; + s_axil_bvalid_reg <= 1'b0; + + s_axil_arready_reg <= 1'b0; + s_axil_rvalid_reg <= 1'b0; + end +end + +taxi_dma_desc_if #( + .SRC_ADDR_W(dma_rd_desc_req.SRC_ADDR_W), + .SRC_SEL_EN(dma_rd_desc_req.SRC_SEL_EN), + .SRC_SEL_W(dma_rd_desc_req.SRC_SEL_W), + .SRC_ASID_EN(dma_rd_desc_req.SRC_ASID_EN), + .DST_ADDR_W(dma_rd_desc_req.DST_ADDR_W), + .DST_SEL_EN(dma_rd_desc_req.DST_SEL_EN), + .DST_SEL_W(dma_rd_desc_req.DST_SEL_W-CL_PORTS), + .DST_ASID_EN(dma_rd_desc_req.DST_ASID_EN), + .IMM_EN(dma_rd_desc_req.IMM_EN), + .LEN_W(dma_rd_desc_req.LEN_W), + .TAG_W(dma_rd_desc_req.TAG_W-CL_PORTS), + .ID_EN(dma_rd_desc_req.ID_EN), + .DEST_EN(dma_rd_desc_req.DEST_EN), + .USER_EN(dma_rd_desc_req.USER_EN) +) dma_rd_desc_int[PORTS](); + +taxi_dma_desc_if #( + .SRC_ADDR_W(dma_wr_desc_req.SRC_ADDR_W), + .SRC_SEL_EN(dma_wr_desc_req.SRC_SEL_EN), + .SRC_SEL_W(dma_wr_desc_req.SRC_SEL_W-CL_PORTS), + .SRC_ASID_EN(dma_wr_desc_req.SRC_ASID_EN), + .DST_ADDR_W(dma_wr_desc_req.DST_ADDR_W), + .DST_SEL_EN(dma_wr_desc_req.DST_SEL_EN), + .DST_SEL_W(dma_wr_desc_req.DST_SEL_W), + .DST_ASID_EN(dma_wr_desc_req.DST_ASID_EN), + .IMM_EN(dma_wr_desc_req.IMM_EN), + .IMM_W(dma_wr_desc_req.IMM_W), + .LEN_W(dma_wr_desc_req.LEN_W), + .TAG_W(dma_wr_desc_req.TAG_W-CL_PORTS), + .ID_EN(dma_wr_desc_req.ID_EN), + .DEST_EN(dma_wr_desc_req.DEST_EN), + .USER_EN(dma_wr_desc_req.USER_EN) +) dma_wr_desc_int[PORTS](); + +taxi_dma_ram_if #( + .SEGS(RAM_SEGS), + .SEG_ADDR_W(RAM_SEG_ADDR_W), + .SEG_DATA_W(RAM_SEG_DATA_W), + .SEG_BE_W(RAM_SEG_BE_W), + .SEL_W(RAM_SEL_W-CL_PORTS) +) dma_ram_int[PORTS](); + +taxi_dma_if_mux #( + .PORTS(PORTS), + .ARB_ROUND_ROBIN(1), + .ARB_LSB_HIGH_PRIO(1) +) +dma_mux_inst ( + .clk(clk), + .rst(rst), + + /* + * DMA descriptors from clients + */ + .client_rd_req(dma_rd_desc_int), + .client_rd_sts(dma_rd_desc_int), + .client_wr_req(dma_wr_desc_int), + .client_wr_sts(dma_wr_desc_int), + + /* + * DMA descriptors to DMA engines + */ + .dma_rd_req(dma_rd_desc_req), + .dma_rd_sts(dma_rd_desc_sts), + .dma_wr_req(dma_wr_desc_req), + .dma_wr_sts(dma_wr_desc_sts), + + /* + * RAM interface (from DMA interface) + */ + .dma_ram_wr(dma_ram_wr), + .dma_ram_rd(dma_ram_rd), + + /* + * RAM interface (towards RAM) + */ + .client_ram_wr(dma_ram_int), + .client_ram_rd(dma_ram_int) +); + +for (genvar p = 0; p < PORTS; p = p + 1) begin : port + + cndm_proto_port #( + .PORTS(PORTS) + ) + port_inst ( + .clk(clk), + .rst(rst), + + /* + * Control register interface + */ + .s_axil_wr(s_axil_ctrl[PORT_OFFSET+p]), + .s_axil_rd(s_axil_ctrl[PORT_OFFSET+p]), + + /* + * DMA + */ + .dma_rd_desc_req(dma_rd_desc_int[p]), + .dma_rd_desc_sts(dma_rd_desc_int[p]), + .dma_wr_desc_req(dma_wr_desc_int[p]), + .dma_wr_desc_sts(dma_wr_desc_int[p]), + .dma_ram_wr(dma_ram_int[p]), + .dma_ram_rd(dma_ram_int[p]), + + .irq(irq[p]), + + /* + * Ethernet + */ + .mac_tx_clk(mac_tx_clk[p]), + .mac_tx_rst(mac_tx_rst[p]), + .mac_axis_tx(mac_axis_tx[p]), + .mac_axis_tx_cpl(mac_axis_tx_cpl[p]), + + .mac_rx_clk(mac_rx_clk[p]), + .mac_rx_rst(mac_rx_rst[p]), + .mac_axis_rx(mac_axis_rx[p]) + ); + +end + +endmodule + +`resetall diff --git a/src/cndm_proto/rtl/cndm_proto_cpl_wr.sv b/src/cndm_proto/rtl/cndm_proto_cpl_wr.sv new file mode 100644 index 0000000..8b9eca6 --- /dev/null +++ b/src/cndm_proto/rtl/cndm_proto_cpl_wr.sv @@ -0,0 +1,227 @@ +// SPDX-License-Identifier: CERN-OHL-S-2.0 +/* + +Copyright (c) 2025 FPGA Ninja, LLC + +Authors: +- Alex Forencich + +*/ + +`resetall +`timescale 1ns / 1ps +`default_nettype none + +/* + * Corundum-proto completion write module + */ +module cndm_proto_cpl_wr +( + input wire logic clk, + input wire logic rst, + + /* + * DMA + */ + taxi_dma_desc_if.req_src dma_wr_desc_req, + taxi_dma_desc_if.sts_snk dma_wr_desc_sts, + taxi_dma_ram_if.rd_slv dma_ram_rd, + + input wire logic txcq_en, + input wire logic [3:0] txcq_size, + input wire logic [63:0] txcq_base_addr, + output wire logic [15:0] txcq_prod, + input wire logic rxcq_en, + input wire logic [3:0] rxcq_size, + input wire logic [63:0] rxcq_base_addr, + output wire logic [15:0] rxcq_prod, + + taxi_axis_if.snk axis_cpl[2], + output wire logic irq +); + +taxi_axis_if #( + .DATA_W(axis_cpl[0].DATA_W), + .KEEP_EN(axis_cpl[0].KEEP_EN), + .KEEP_W(axis_cpl[0].KEEP_W), + .STRB_EN(axis_cpl[0].STRB_EN), + .LAST_EN(axis_cpl[0].LAST_EN), + .ID_EN(1), + .ID_W(1), + .DEST_EN(axis_cpl[0].DEST_EN), + .DEST_W(axis_cpl[0].DEST_W), + .USER_EN(axis_cpl[0].USER_EN), + .USER_W(axis_cpl[0].USER_W) +) cpl_comb(); + +localparam [2:0] + STATE_IDLE = 0, + STATE_RX_CPL = 1, + STATE_WRITE_DATA = 2; + +logic [2:0] state_reg = STATE_IDLE; + +logic [15:0] txcq_prod_ptr_reg = '0; +logic [15:0] rxcq_prod_ptr_reg = '0; + +logic phase_tag_reg = 1'b0; + +logic irq_reg = 1'b0; + +assign txcq_prod = txcq_prod_ptr_reg; +assign rxcq_prod = rxcq_prod_ptr_reg; + +assign irq = irq_reg; + +always_ff @(posedge clk) begin + cpl_comb.tready <= 1'b0; + + dma_wr_desc_req.req_src_sel <= '0; + dma_wr_desc_req.req_src_asid <= '0; + dma_wr_desc_req.req_dst_sel <= '0; + dma_wr_desc_req.req_dst_asid <= '0; + dma_wr_desc_req.req_imm <= '0; + dma_wr_desc_req.req_imm_en <= '0; + dma_wr_desc_req.req_len <= 16; + dma_wr_desc_req.req_tag <= '0; + dma_wr_desc_req.req_id <= '0; + dma_wr_desc_req.req_dest <= '0; + dma_wr_desc_req.req_user <= '0; + dma_wr_desc_req.req_valid <= dma_wr_desc_req.req_valid && !dma_wr_desc_req.req_ready; + + if (!txcq_en) begin + txcq_prod_ptr_reg <= '0; + end + + if (!rxcq_en) begin + rxcq_prod_ptr_reg <= '0; + end + + irq_reg <= 1'b0; + + case (state_reg) + STATE_IDLE: begin + dma_wr_desc_req.req_src_addr <= '0; + + if (cpl_comb.tid == 0) begin + dma_wr_desc_req.req_dst_addr <= txcq_base_addr + 64'(16'(txcq_prod_ptr_reg & ({16{1'b1}} >> (16 - txcq_size))) * 16); + phase_tag_reg <= !txcq_prod_ptr_reg[txcq_size]; + if (cpl_comb.tvalid && !cpl_comb.tready) begin + txcq_prod_ptr_reg <= txcq_prod_ptr_reg + 1; + if (txcq_en) begin + dma_wr_desc_req.req_valid <= 1'b1; + state_reg <= STATE_WRITE_DATA; + end else begin + state_reg <= STATE_IDLE; + end + end + end else begin + dma_wr_desc_req.req_dst_addr <= rxcq_base_addr + 64'(16'(rxcq_prod_ptr_reg & ({16{1'b1}} >> (16 - rxcq_size))) * 16); + phase_tag_reg <= !rxcq_prod_ptr_reg[rxcq_size]; + if (cpl_comb.tvalid && !cpl_comb.tready) begin + rxcq_prod_ptr_reg <= rxcq_prod_ptr_reg + 1; + if (rxcq_en) begin + dma_wr_desc_req.req_valid <= 1'b1; + state_reg <= STATE_WRITE_DATA; + end else begin + state_reg <= STATE_IDLE; + end + end + end + end + STATE_WRITE_DATA: begin + if (dma_wr_desc_sts.sts_valid) begin + cpl_comb.tready <= 1'b1; + irq_reg <= 1'b1; + state_reg <= STATE_IDLE; + end + end + default: begin + state_reg <= STATE_IDLE; + end + endcase + + if (rst) begin + state_reg <= STATE_IDLE; + txcq_prod_ptr_reg <= '0; + rxcq_prod_ptr_reg <= '0; + irq_reg <= 1'b0; + end +end + +taxi_axis_arb_mux #( + .S_COUNT(2), + .UPDATE_TID(1), + .ARB_ROUND_ROBIN(1), + .ARB_LSB_HIGH_PRIO(1) +) +mux_inst ( + .clk(clk), + .rst(rst), + + /* + * AXI4-Stream input (sink) + */ + .s_axis(axis_cpl), + + /* + * AXI4-Stream output (source) + */ + .m_axis(cpl_comb) +); + +// extract parameters +localparam SEGS = dma_ram_rd.SEGS; +localparam SEG_ADDR_W = dma_ram_rd.SEG_ADDR_W; +localparam SEG_DATA_W = dma_ram_rd.SEG_DATA_W; +localparam SEG_BE_W = dma_ram_rd.SEG_BE_W; + +if (SEGS*SEG_DATA_W < 128) + $fatal(0, "Total segmented interface width must be at least 128 (instance %m)"); + +wire [SEGS-1:0][SEG_DATA_W-1:0] ram_data = (SEG_DATA_W*SEGS)'({phase_tag_reg, cpl_comb.tdata[126:0]}); + +for (genvar n = 0; n < SEGS; n = n + 1) begin + + logic [0:0] rd_resp_valid_pipe_reg = '0; + logic [SEG_DATA_W-1:0] rd_resp_data_pipe_reg[1]; + + initial begin + for (integer i = 0; i < 1; i = i + 1) begin + rd_resp_data_pipe_reg[i] = '0; + end + end + + always_ff @(posedge clk) begin + if (dma_ram_rd.rd_resp_ready[n]) begin + rd_resp_valid_pipe_reg[0] <= 1'b0; + end + + for (integer j = 0; j > 0; j = j - 1) begin + if (dma_ram_rd.rd_resp_ready[n] || (1'(~rd_resp_valid_pipe_reg) >> j) != 0) begin + rd_resp_valid_pipe_reg[j] <= rd_resp_valid_pipe_reg[j-1]; + rd_resp_data_pipe_reg[j] <= rd_resp_data_pipe_reg[j-1]; + rd_resp_valid_pipe_reg[j-1] <= 1'b0; + end + end + + if (dma_ram_rd.rd_cmd_valid[n] && dma_ram_rd.rd_cmd_ready[n]) begin + rd_resp_valid_pipe_reg[0] <= 1'b1; + rd_resp_data_pipe_reg[0] <= ram_data[0]; + end + + if (rst) begin + rd_resp_valid_pipe_reg <= '0; + end + end + + assign dma_ram_rd.rd_cmd_ready[n] = dma_ram_rd.rd_resp_ready[n] || &rd_resp_valid_pipe_reg == 0; + + assign dma_ram_rd.rd_resp_valid[n] = rd_resp_valid_pipe_reg[0]; + assign dma_ram_rd.rd_resp_data[n] = rd_resp_data_pipe_reg[0]; + +end + +endmodule + +`resetall diff --git a/src/cndm_proto/rtl/cndm_proto_desc_rd.sv b/src/cndm_proto/rtl/cndm_proto_desc_rd.sv new file mode 100644 index 0000000..33dab59 --- /dev/null +++ b/src/cndm_proto/rtl/cndm_proto_desc_rd.sv @@ -0,0 +1,266 @@ +// SPDX-License-Identifier: CERN-OHL-S-2.0 +/* + +Copyright (c) 2025 FPGA Ninja, LLC + +Authors: +- Alex Forencich + +*/ + +`resetall +`timescale 1ns / 1ps +`default_nettype none + +/* + * Corundum-proto descriptor read module + */ +module cndm_proto_desc_rd +( + input wire logic clk, + input wire logic rst, + + /* + * DMA + */ + taxi_dma_desc_if.req_src dma_rd_desc_req, + taxi_dma_desc_if.sts_snk dma_rd_desc_sts, + taxi_dma_ram_if.wr_slv dma_ram_wr, + + input wire logic txq_en, + input wire logic [3:0] txq_size, + input wire logic [63:0] txq_base_addr, + input wire logic [15:0] txq_prod, + output wire logic [15:0] txq_cons, + input wire logic rxq_en, + input wire logic [3:0] rxq_size, + input wire logic [63:0] rxq_base_addr, + input wire logic [15:0] rxq_prod, + output wire logic [15:0] rxq_cons, + + input wire logic [1:0] desc_req, + taxi_axis_if.src axis_desc[2] +); + +localparam RAM_ADDR_W = 16; + +taxi_dma_desc_if #( + .SRC_ADDR_W(RAM_ADDR_W), + .SRC_SEL_EN(1'b0), + .SRC_ASID_EN(1'b0), + .DST_ADDR_W(RAM_ADDR_W), + .DST_SEL_EN(1'b0), + .DST_ASID_EN(1'b0), + .IMM_EN(1'b0), + .LEN_W(5), + .TAG_W(1), + .ID_EN(0), + .DEST_EN(1), + .DEST_W(1), + .USER_EN(1), + .USER_W(1) +) dma_desc(); + +localparam [2:0] + STATE_IDLE = 0, + STATE_READ_DESC = 1, + STATE_READ_DATA = 2, + STATE_TX_DESC = 3; + +logic [2:0] state_reg = STATE_IDLE; + +logic [1:0] desc_req_reg = '0; + +logic [15:0] txq_cons_ptr_reg = '0; +logic [15:0] rxq_cons_ptr_reg = '0; + +assign txq_cons = txq_cons_ptr_reg; +assign rxq_cons = rxq_cons_ptr_reg; + +always_ff @(posedge clk) begin + // axis_desc.tready <= 1'b0; + + dma_rd_desc_req.req_src_sel <= '0; + dma_rd_desc_req.req_src_asid <= '0; + dma_rd_desc_req.req_dst_sel <= '0; + dma_rd_desc_req.req_dst_asid <= '0; + dma_rd_desc_req.req_imm <= '0; + dma_rd_desc_req.req_imm_en <= '0; + dma_rd_desc_req.req_len <= 16; + dma_rd_desc_req.req_tag <= '0; + dma_rd_desc_req.req_id <= '0; + dma_rd_desc_req.req_dest <= '0; + dma_rd_desc_req.req_user <= '0; + dma_rd_desc_req.req_valid <= dma_rd_desc_req.req_valid && !dma_rd_desc_req.req_ready; + + dma_desc.req_src_sel <= '0; + dma_desc.req_src_asid <= '0; + dma_desc.req_dst_addr <= '0; + dma_desc.req_dst_sel <= '0; + dma_desc.req_dst_asid <= '0; + dma_desc.req_imm <= '0; + dma_desc.req_imm_en <= '0; + dma_desc.req_len <= 16; + dma_desc.req_tag <= '0; + dma_desc.req_id <= '0; + dma_desc.req_user <= '0; + dma_desc.req_valid <= dma_desc.req_valid && !dma_desc.req_ready; + + desc_req_reg <= desc_req_reg | desc_req; + + if (!txq_en) begin + txq_cons_ptr_reg <= '0; + end + + if (!rxq_en) begin + rxq_cons_ptr_reg <= '0; + end + + case (state_reg) + STATE_IDLE: begin + if (desc_req_reg[1]) begin + dma_rd_desc_req.req_src_addr <= rxq_base_addr + 64'(16'(rxq_cons_ptr_reg & ({16{1'b1}} >> (16 - rxq_size))) * 16); + dma_desc.req_dest <= 1'b1; + desc_req_reg[1] <= 1'b0; + if (rxq_cons_ptr_reg == rxq_prod || !rxq_en) begin + dma_desc.req_user <= 1'b1; + dma_desc.req_valid <= 1'b1; + state_reg <= STATE_TX_DESC; + end else begin + dma_desc.req_user <= 1'b0; + dma_rd_desc_req.req_valid <= 1'b1; + rxq_cons_ptr_reg <= rxq_cons_ptr_reg + 1; + state_reg <= STATE_READ_DESC; + end + end else if (desc_req_reg[0]) begin + dma_rd_desc_req.req_src_addr <= txq_base_addr + 64'(16'(txq_cons_ptr_reg & ({16{1'b1}} >> (16 - txq_size))) * 16); + dma_desc.req_dest <= 1'b0; + desc_req_reg[0] <= 1'b0; + if (txq_cons_ptr_reg == txq_prod || !txq_en) begin + dma_desc.req_user <= 1'b1; + dma_desc.req_valid <= 1'b1; + state_reg <= STATE_TX_DESC; + end else begin + dma_desc.req_user <= 1'b0; + dma_rd_desc_req.req_valid <= 1'b1; + txq_cons_ptr_reg <= txq_cons_ptr_reg + 1; + state_reg <= STATE_READ_DESC; + end + end + end + STATE_READ_DESC: begin + if (dma_rd_desc_sts.sts_valid) begin + dma_desc.req_valid <= 1'b1; + state_reg <= STATE_TX_DESC; + end + end + STATE_TX_DESC: begin + if (dma_desc.sts_valid) begin + state_reg <= STATE_IDLE; + end + end + default: begin + state_reg <= STATE_IDLE; + end + endcase + + if (rst) begin + state_reg <= STATE_IDLE; + end +end + +taxi_dma_ram_if #( + .SEGS(dma_ram_wr.SEGS), + .SEG_ADDR_W(dma_ram_wr.SEG_ADDR_W), + .SEG_DATA_W(dma_ram_wr.SEG_DATA_W), + .SEG_BE_W(dma_ram_wr.SEG_BE_W) +) dma_ram_rd(); + +taxi_dma_psdpram #( + .SIZE(1024), + .PIPELINE(2) +) +ram_inst ( + .clk(clk), + .rst(rst), + + /* + * Write port + */ + .dma_ram_wr(dma_ram_wr), + + /* + * Read port + */ + .dma_ram_rd(dma_ram_rd) +); + +taxi_axis_if #( + .DATA_W(axis_desc[0].DATA_W), + .KEEP_EN(axis_desc[0].KEEP_EN), + .KEEP_W(axis_desc[0].KEEP_W), + .LAST_EN(axis_desc[0].LAST_EN), + .ID_EN(axis_desc[0].ID_EN), + .ID_W(axis_desc[0].ID_W), + .DEST_EN(1), + .DEST_W(1), + .USER_EN(axis_desc[0].USER_EN), + .USER_W(axis_desc[0].USER_W) +) m_axis_rd_data(); + +taxi_dma_client_axis_source +dma_inst ( + .clk(clk), + .rst(rst), + + /* + * DMA descriptor + */ + .desc_req(dma_desc), + .desc_sts(dma_desc), + + /* + * AXI stream read data output + */ + .m_axis_rd_data(m_axis_rd_data), + + /* + * RAM interface + */ + .dma_ram_rd(dma_ram_rd), + + /* + * Configuration + */ + .enable(1'b1) +); + +taxi_axis_demux #( + .M_COUNT(2), + .TDEST_ROUTE(1) +) +demux_inst ( + .clk(clk), + .rst(rst), + + /* + * AXI4-Stream input (sink) + */ + .s_axis(m_axis_rd_data), + + /* + * AXI4-Stream output (source) + */ + .m_axis(axis_desc), + + /* + * Control + */ + .enable(1'b1), + .drop(1'b0), + .select('0) +); + +endmodule + +`resetall diff --git a/src/cndm_proto/rtl/cndm_proto_pcie_us.f b/src/cndm_proto/rtl/cndm_proto_pcie_us.f new file mode 100644 index 0000000..8ab936e --- /dev/null +++ b/src/cndm_proto/rtl/cndm_proto_pcie_us.f @@ -0,0 +1,5 @@ +cndm_proto_pcie_us.sv +cndm_proto_core.f +../lib/taxi/src/pcie/rtl/taxi_pcie_us_axil_master.sv +../lib/taxi/src/pcie/rtl/taxi_pcie_us_msi.sv +../lib/taxi/src/dma/rtl/taxi_dma_if_pcie_us.f diff --git a/src/cndm_proto/rtl/cndm_proto_pcie_us.sv b/src/cndm_proto/rtl/cndm_proto_pcie_us.sv new file mode 100644 index 0000000..f082dd5 --- /dev/null +++ b/src/cndm_proto/rtl/cndm_proto_pcie_us.sv @@ -0,0 +1,465 @@ +// SPDX-License-Identifier: CERN-OHL-S-2.0 +/* + +Copyright (c) 2025 FPGA Ninja, LLC + +Authors: +- Alex Forencich + +*/ + +`resetall +`timescale 1ns / 1ps +`default_nettype none + +/* + * Corundum-proto core logic for UltraScale PCIe + */ +module cndm_proto_pcie_us #( + // simulation (set to avoid vendor primitives) + parameter logic SIM = 1'b0, + // vendor ("GENERIC", "XILINX", "ALTERA") + parameter string VENDOR = "XILINX", + // device family + parameter string FAMILY = "kintexuplus", + parameter PORTS = 2, + parameter RQ_SEQ_NUM_W = 6, + parameter BAR0_APERTURE = 24 +) +( + /* + * PCIe + */ + input wire logic pcie_clk, + input wire logic pcie_rst, + taxi_axis_if.snk s_axis_pcie_cq, + taxi_axis_if.src m_axis_pcie_cc, + taxi_axis_if.src m_axis_pcie_rq, + taxi_axis_if.snk s_axis_pcie_rc, + + input wire [RQ_SEQ_NUM_W-1:0] pcie_rq_seq_num0, + input wire pcie_rq_seq_num_vld0, + input wire [RQ_SEQ_NUM_W-1:0] pcie_rq_seq_num1, + input wire pcie_rq_seq_num_vld1, + + input wire [2:0] cfg_max_payload, + input wire [2:0] cfg_max_read_req, + input wire [3:0] cfg_rcb_status, + + output wire [9:0] cfg_mgmt_addr, + output wire [7:0] cfg_mgmt_function_number, + output wire cfg_mgmt_write, + output wire [31:0] cfg_mgmt_write_data, + output wire [3:0] cfg_mgmt_byte_enable, + output wire cfg_mgmt_read, + input wire [31:0] cfg_mgmt_read_data, + input wire cfg_mgmt_read_write_done, + + input wire [7:0] cfg_fc_ph, + input wire [11:0] cfg_fc_pd, + input wire [7:0] cfg_fc_nph, + input wire [11:0] cfg_fc_npd, + input wire [7:0] cfg_fc_cplh, + input wire [11:0] cfg_fc_cpld, + output wire [2:0] cfg_fc_sel, + + input wire [3:0] cfg_interrupt_msi_enable, + input wire [11:0] cfg_interrupt_msi_mmenable, + input wire cfg_interrupt_msi_mask_update, + input wire [31:0] cfg_interrupt_msi_data, + output wire [1:0] cfg_interrupt_msi_select, + output wire [31:0] cfg_interrupt_msi_int, + output wire [31:0] cfg_interrupt_msi_pending_status, + output wire cfg_interrupt_msi_pending_status_data_enable, + output wire [1:0] cfg_interrupt_msi_pending_status_function_num, + input wire cfg_interrupt_msi_sent, + input wire cfg_interrupt_msi_fail, + output wire [2:0] cfg_interrupt_msi_attr, + output wire cfg_interrupt_msi_tph_present, + output wire [1:0] cfg_interrupt_msi_tph_type, + output wire [7:0] cfg_interrupt_msi_tph_st_tag, + output wire [7:0] cfg_interrupt_msi_function_number, + + /* + * Ethernet + */ + input wire logic mac_tx_clk[PORTS], + input wire logic mac_tx_rst[PORTS], + taxi_axis_if.src mac_axis_tx[PORTS], + taxi_axis_if.snk mac_axis_tx_cpl[PORTS], + + input wire logic mac_rx_clk[PORTS], + input wire logic mac_rx_rst[PORTS], + taxi_axis_if.snk mac_axis_rx[PORTS] +); + +localparam CL_PORTS = $clog2(PORTS); + +localparam AXIL_DATA_W = 32; +localparam AXIL_ADDR_W = BAR0_APERTURE; + +taxi_axil_if #( + .DATA_W(AXIL_DATA_W), + .ADDR_W(AXIL_ADDR_W), + .AWUSER_EN(1'b0), + .WUSER_EN(1'b0), + .BUSER_EN(1'b0), + .ARUSER_EN(1'b0), + .RUSER_EN(1'b0) +) axil_ctrl_bar(); + +taxi_pcie_us_axil_master +pcie_axil_master_inst ( + .clk(pcie_clk), + .rst(pcie_rst), + + /* + * UltraScale PCIe interface + */ + .s_axis_cq(s_axis_pcie_cq), + .m_axis_cc(m_axis_pcie_cc), + + /* + * AXI Lite Master output + */ + .m_axil_wr(axil_ctrl_bar), + .m_axil_rd(axil_ctrl_bar), + + /* + * Configuration + */ + .completer_id('0), + .completer_id_en(1'b0), + + /* + * Status + */ + .stat_err_cor(), + .stat_err_uncor() +); + +localparam AXIS_PCIE_DATA_W = m_axis_pcie_rq.DATA_W; + +localparam PCIE_ADDR_W = 64; + +// TODO +localparam logic RQ_SEQ_NUM_EN = 1'b1; +localparam RAM_SEL_W = 2+CL_PORTS; +localparam RAM_ADDR_W = 16; +localparam RAM_SEGS = 2;//AXIS_PCIE_DATA_W > 256 ? AXIS_PCIE_DATA_W / 128 : 2; +localparam PCIE_TAG_CNT = 64;//AXIS_PCIE_RQ_USER_W == 60 ? 64 : 256, +localparam logic IMM_EN = 1'b0; +localparam IMM_W = 32; +localparam LEN_W = 20; +localparam TAG_W = 8; +localparam RD_OP_TBL_SIZE = PCIE_TAG_CNT; +localparam RD_TX_LIMIT = 2**(RQ_SEQ_NUM_W-1); +localparam logic RD_TX_FC_EN = 1'b1; +localparam RD_CPLH_FC_LIMIT = 512; +localparam RD_CPLD_FC_LIMIT = RD_CPLH_FC_LIMIT*4; +localparam WR_OP_TBL_SIZE = 2**(RQ_SEQ_NUM_W-1); +localparam WR_TX_LIMIT = 2**(RQ_SEQ_NUM_W-1); +localparam logic WR_TX_FC_EN = 1'b1; + +localparam RAM_DATA_W = AXIS_PCIE_DATA_W*2; +localparam RAM_SEG_DATA_W = RAM_DATA_W / RAM_SEGS; +localparam RAM_SEG_BE_W = RAM_SEG_DATA_W / 8; +localparam RAM_SEG_ADDR_W = RAM_ADDR_W - $clog2(RAM_SEGS*RAM_SEG_BE_W); + +logic [RQ_SEQ_NUM_W-1:0] s_axis_rq_seq_num_0; +logic s_axis_rq_seq_num_valid_0; +logic [RQ_SEQ_NUM_W-1:0] s_axis_rq_seq_num_1; +logic s_axis_rq_seq_num_valid_1; + +logic [7:0] pcie_tx_fc_nph_av; +logic [7:0] pcie_tx_fc_ph_av; +logic [11:0] pcie_tx_fc_pd_av; + +assign cfg_fc_sel = 3'b100; + +taxi_dma_desc_if #( + .SRC_ADDR_W(PCIE_ADDR_W), + .SRC_SEL_EN(1'b0), + .SRC_ASID_EN(1'b0), + .DST_ADDR_W(RAM_ADDR_W), + .DST_SEL_EN(1'b1), + .DST_SEL_W(RAM_SEL_W), + .DST_ASID_EN(1'b0), + .IMM_EN(1'b0), + .LEN_W(LEN_W), + .TAG_W(TAG_W), + .ID_EN(1'b0), + .DEST_EN(1'b0), + .USER_EN(1'b0) +) dma_rd_desc(); + +taxi_dma_desc_if #( + .SRC_ADDR_W(RAM_ADDR_W), + .SRC_SEL_EN(1'b1), + .SRC_SEL_W(RAM_SEL_W), + .SRC_ASID_EN(1'b0), + .DST_ADDR_W(PCIE_ADDR_W), + .DST_SEL_EN(1'b0), + .DST_ASID_EN(1'b0), + .IMM_EN(IMM_EN), + .IMM_W(IMM_W), + .LEN_W(LEN_W), + .TAG_W(TAG_W), + .ID_EN(1'b0), + .DEST_EN(1'b0), + .USER_EN(1'b0) +) dma_wr_desc(); + +taxi_dma_ram_if #( + .SEGS(RAM_SEGS), + .SEG_ADDR_W(RAM_SEG_ADDR_W), + .SEG_DATA_W(RAM_SEG_DATA_W), + .SEG_BE_W(RAM_SEG_BE_W), + .SEL_W(RAM_SEL_W) +) dma_ram(); + +logic stat_rd_busy; +logic stat_wr_busy; +logic stat_err_cor; +logic stat_err_uncor; + +logic [$clog2(RD_OP_TBL_SIZE)-1:0] stat_rd_op_start_tag; +logic stat_rd_op_start_valid; +logic [$clog2(RD_OP_TBL_SIZE)-1:0] stat_rd_op_finish_tag; +logic [3:0] stat_rd_op_finish_status; +logic stat_rd_op_finish_valid; +logic [$clog2(PCIE_TAG_CNT)-1:0] stat_rd_req_start_tag; +logic [12:0] stat_rd_req_start_len; +logic stat_rd_req_start_valid; +logic [$clog2(PCIE_TAG_CNT)-1:0] stat_rd_req_finish_tag; +logic [3:0] stat_rd_req_finish_status; +logic stat_rd_req_finish_valid; +logic stat_rd_req_timeout; +logic stat_rd_op_tbl_full; +logic stat_rd_no_tags; +logic stat_rd_tx_limit; +logic stat_rd_tx_stall; +logic [$clog2(WR_OP_TBL_SIZE)-1:0] stat_wr_op_start_tag; +logic stat_wr_op_start_valid; +logic [$clog2(WR_OP_TBL_SIZE)-1:0] stat_wr_op_finish_tag; +logic [3:0] stat_wr_op_finish_status; +logic stat_wr_op_finish_valid; +logic [$clog2(WR_OP_TBL_SIZE)-1:0] stat_wr_req_start_tag; +logic [12:0] stat_wr_req_start_len; +logic stat_wr_req_start_valid; +logic [$clog2(WR_OP_TBL_SIZE)-1:0] stat_wr_req_finish_tag; +logic [3:0] stat_wr_req_finish_status; +logic stat_wr_req_finish_valid; +logic stat_wr_op_tbl_full; +logic stat_wr_tx_limit; +logic stat_wr_tx_stall; + +// register to break timing path from PCIe HIP 500 MHz clock domain +logic [RQ_SEQ_NUM_W-1:0] pcie_rq_seq_num0_reg = '0; +logic pcie_rq_seq_num_vld0_reg = 1'b0; +logic [RQ_SEQ_NUM_W-1:0] pcie_rq_seq_num1_reg = '0; +logic pcie_rq_seq_num_vld1_reg = 1'b0; + +always_ff @(posedge pcie_clk) begin + pcie_rq_seq_num0_reg <= pcie_rq_seq_num0; + pcie_rq_seq_num_vld0_reg <= pcie_rq_seq_num_vld0; + pcie_rq_seq_num1_reg <= pcie_rq_seq_num1; + pcie_rq_seq_num_vld1_reg <= pcie_rq_seq_num_vld1; + + if (pcie_rst) begin + pcie_rq_seq_num_vld0_reg <= 1'b0; + pcie_rq_seq_num_vld1_reg <= 1'b0; + end +end + +taxi_dma_if_pcie_us #( + .RQ_SEQ_NUM_W(RQ_SEQ_NUM_W), + .RQ_SEQ_NUM_EN(RQ_SEQ_NUM_EN), + .PCIE_TAG_CNT(PCIE_TAG_CNT), + .RD_OP_TBL_SIZE(RD_OP_TBL_SIZE), + .RD_TX_LIMIT(RD_TX_LIMIT), + .RD_TX_FC_EN(RD_TX_FC_EN), + .RD_CPLH_FC_LIMIT(RD_CPLH_FC_LIMIT), + .RD_CPLD_FC_LIMIT(RD_CPLD_FC_LIMIT), + .WR_OP_TBL_SIZE(WR_OP_TBL_SIZE), + .WR_TX_LIMIT(WR_TX_LIMIT), + .WR_TX_FC_EN(WR_TX_FC_EN) +) +dma_if_inst ( + .clk(pcie_clk), + .rst(pcie_rst), + + /* + * UltraScale PCIe interface + */ + .m_axis_rq(m_axis_pcie_rq), + .s_axis_rc(s_axis_pcie_rc), + + /* + * Transmit sequence number input + */ + .s_axis_rq_seq_num_0(pcie_rq_seq_num0_reg), + .s_axis_rq_seq_num_valid_0(pcie_rq_seq_num_vld0_reg), + .s_axis_rq_seq_num_1(pcie_rq_seq_num1_reg), + .s_axis_rq_seq_num_valid_1(pcie_rq_seq_num_vld1_reg), + + /* + * Transmit flow control + */ + .pcie_tx_fc_nph_av(cfg_fc_nph), + .pcie_tx_fc_ph_av(cfg_fc_ph), + .pcie_tx_fc_pd_av(cfg_fc_pd), + + /* + * Read descriptor + */ + .rd_desc_req(dma_rd_desc), + .rd_desc_sts(dma_rd_desc), + + /* + * Write descriptor + */ + .wr_desc_req(dma_wr_desc), + .wr_desc_sts(dma_wr_desc), + + /* + * RAM interface + */ + .dma_ram_wr(dma_ram), + .dma_ram_rd(dma_ram), + + /* + * Configuration + */ + .read_enable(1'b1), + .write_enable(1'b1), + .ext_tag_en(1'b0), // TODO + .rcb_128b(cfg_rcb_status[0]), + .requester_id('0), + .requester_id_en(1'b0), + .max_rd_req_size(cfg_max_read_req), + .max_payload_size(cfg_max_payload), + + /* + * Status + */ + .stat_rd_busy(stat_rd_busy), + .stat_wr_busy(stat_wr_busy), + .stat_err_cor(stat_err_cor), + .stat_err_uncor(stat_err_uncor), + + /* + * Statistics + */ + .stat_rd_op_start_tag(stat_rd_op_start_tag), + .stat_rd_op_start_valid(stat_rd_op_start_valid), + .stat_rd_op_finish_tag(stat_rd_op_finish_tag), + .stat_rd_op_finish_status(stat_rd_op_finish_status), + .stat_rd_op_finish_valid(stat_rd_op_finish_valid), + .stat_rd_req_start_tag(stat_rd_req_start_tag), + .stat_rd_req_start_len(stat_rd_req_start_len), + .stat_rd_req_start_valid(stat_rd_req_start_valid), + .stat_rd_req_finish_tag(stat_rd_req_finish_tag), + .stat_rd_req_finish_status(stat_rd_req_finish_status), + .stat_rd_req_finish_valid(stat_rd_req_finish_valid), + .stat_rd_req_timeout(stat_rd_req_timeout), + .stat_rd_op_tbl_full(stat_rd_op_tbl_full), + .stat_rd_no_tags(stat_rd_no_tags), + .stat_rd_tx_limit(stat_rd_tx_limit), + .stat_rd_tx_stall(stat_rd_tx_stall), + .stat_wr_op_start_tag(stat_wr_op_start_tag), + .stat_wr_op_start_valid(stat_wr_op_start_valid), + .stat_wr_op_finish_tag(stat_wr_op_finish_tag), + .stat_wr_op_finish_status(stat_wr_op_finish_status), + .stat_wr_op_finish_valid(stat_wr_op_finish_valid), + .stat_wr_req_start_tag(stat_wr_req_start_tag), + .stat_wr_req_start_len(stat_wr_req_start_len), + .stat_wr_req_start_valid(stat_wr_req_start_valid), + .stat_wr_req_finish_tag(stat_wr_req_finish_tag), + .stat_wr_req_finish_status(stat_wr_req_finish_status), + .stat_wr_req_finish_valid(stat_wr_req_finish_valid), + .stat_wr_op_tbl_full(stat_wr_op_tbl_full), + .stat_wr_tx_limit(stat_wr_tx_limit), + .stat_wr_tx_stall(stat_wr_tx_stall) +); + +wire [PORTS-1:0] irq; +wire [31:0] msi_irq = 32'(irq); + +taxi_pcie_us_msi #( + .MSI_CNT(32) +) +msi_inst ( + .clk(pcie_clk), + .rst(pcie_rst), + + /* + * Interrupt request inputs + */ + .msi_irq(msi_irq), + + /* + * Interface to UltraScale PCIe IP core + */ + /* verilator lint_off WIDTHEXPAND */ + .cfg_interrupt_msi_enable(cfg_interrupt_msi_enable), + .cfg_interrupt_msi_vf_enable(), + .cfg_interrupt_msi_mmenable(cfg_interrupt_msi_mmenable), + .cfg_interrupt_msi_mask_update(cfg_interrupt_msi_mask_update), + .cfg_interrupt_msi_data(cfg_interrupt_msi_data), + .cfg_interrupt_msi_select(cfg_interrupt_msi_select), + .cfg_interrupt_msi_int(cfg_interrupt_msi_int), + .cfg_interrupt_msi_pending_status(cfg_interrupt_msi_pending_status), + .cfg_interrupt_msi_pending_status_data_enable(cfg_interrupt_msi_pending_status_data_enable), + .cfg_interrupt_msi_pending_status_function_num(cfg_interrupt_msi_pending_status_function_num), + .cfg_interrupt_msi_sent(cfg_interrupt_msi_sent), + .cfg_interrupt_msi_fail(cfg_interrupt_msi_fail), + .cfg_interrupt_msi_attr(cfg_interrupt_msi_attr), + .cfg_interrupt_msi_tph_present(cfg_interrupt_msi_tph_present), + .cfg_interrupt_msi_tph_type(cfg_interrupt_msi_tph_type), + .cfg_interrupt_msi_tph_st_tag(cfg_interrupt_msi_tph_st_tag), + .cfg_interrupt_msi_function_number(cfg_interrupt_msi_function_number) + /* verilator lint_on WIDTHEXPAND */ +); + +cndm_proto_core #( + .PORTS(PORTS) +) +core_inst ( + .clk(pcie_clk), + .rst(pcie_rst), + + /* + * Control register interface + */ + .s_axil_wr(axil_ctrl_bar), + .s_axil_rd(axil_ctrl_bar), + + /* + * DMA + */ + .dma_rd_desc_req(dma_rd_desc), + .dma_rd_desc_sts(dma_rd_desc), + .dma_wr_desc_req(dma_wr_desc), + .dma_wr_desc_sts(dma_wr_desc), + .dma_ram_wr(dma_ram), + .dma_ram_rd(dma_ram), + + .irq(irq), + + /* + * Ethernet + */ + .mac_tx_clk(mac_tx_clk), + .mac_tx_rst(mac_tx_rst), + .mac_axis_tx(mac_axis_tx), + .mac_axis_tx_cpl(mac_axis_tx_cpl), + + .mac_rx_clk(mac_rx_clk), + .mac_rx_rst(mac_rx_rst), + .mac_axis_rx(mac_axis_rx) +); + +endmodule + +`resetall diff --git a/src/cndm_proto/rtl/cndm_proto_port.sv b/src/cndm_proto/rtl/cndm_proto_port.sv new file mode 100644 index 0000000..a4f0e19 --- /dev/null +++ b/src/cndm_proto/rtl/cndm_proto_port.sv @@ -0,0 +1,544 @@ +// SPDX-License-Identifier: CERN-OHL-S-2.0 +/* + +Copyright (c) 2025 FPGA Ninja, LLC + +Authors: +- Alex Forencich + +*/ + +`resetall +`timescale 1ns / 1ps +`default_nettype none + +/* + * Corundum-proto port logic + */ +module cndm_proto_port #( + parameter PORTS = 2 +) +( + input wire logic clk, + input wire logic rst, + + /* + * Control register interface + */ + taxi_axil_if.wr_slv s_axil_wr, + taxi_axil_if.rd_slv s_axil_rd, + + /* + * DMA + */ + taxi_dma_desc_if.req_src dma_rd_desc_req, + taxi_dma_desc_if.sts_snk dma_rd_desc_sts, + taxi_dma_desc_if.req_src dma_wr_desc_req, + taxi_dma_desc_if.sts_snk dma_wr_desc_sts, + taxi_dma_ram_if.wr_slv dma_ram_wr, + taxi_dma_ram_if.rd_slv dma_ram_rd, + + output wire logic irq, + + /* + * Ethernet + */ + input wire logic mac_tx_clk, + input wire logic mac_tx_rst, + taxi_axis_if.src mac_axis_tx, + taxi_axis_if.snk mac_axis_tx_cpl, + + input wire logic mac_rx_clk, + input wire logic mac_rx_rst, + taxi_axis_if.snk mac_axis_rx +); + +localparam AXIL_ADDR_W = s_axil_wr.ADDR_W; +localparam AXIL_DATA_W = s_axil_wr.DATA_W; + +localparam RAM_SEGS = dma_ram_wr.SEGS; +localparam RAM_SEG_ADDR_W = dma_ram_wr.SEG_ADDR_W; +localparam RAM_SEG_DATA_W = dma_ram_wr.SEG_DATA_W; +localparam RAM_SEG_BE_W = dma_ram_wr.SEG_BE_W; +localparam RAM_SEL_W = dma_ram_wr.SEL_W; + +logic txq_en_reg = '0; +logic [3:0] txq_size_reg = '0; +logic [63:0] txq_base_addr_reg = '0; +logic [15:0] txq_prod_reg = '0; +wire [15:0] txq_cons; +logic rxq_en_reg = '0; +logic [3:0] rxq_size_reg = '0; +logic [63:0] rxq_base_addr_reg = '0; +logic [15:0] rxq_prod_reg = '0; +wire [15:0] rxq_cons; + +logic txcq_en_reg = '0; +logic [3:0] txcq_size_reg = '0; +logic [63:0] txcq_base_addr_reg = '0; +wire [15:0] txcq_prod; +logic rxcq_en_reg = '0; +logic [3:0] rxcq_size_reg = '0; +logic [63:0] rxcq_base_addr_reg = '0; +wire [15:0] rxcq_prod; + +logic s_axil_awready_reg = 1'b0; +logic s_axil_wready_reg = 1'b0; +logic s_axil_bvalid_reg = 1'b0; + +logic s_axil_arready_reg = 1'b0; +logic [AXIL_DATA_W-1:0] s_axil_rdata_reg = '0; +logic s_axil_rvalid_reg = 1'b0; + +assign s_axil_wr.awready = s_axil_awready_reg; +assign s_axil_wr.wready = s_axil_wready_reg; +assign s_axil_wr.bresp = '0; +assign s_axil_wr.buser = '0; +assign s_axil_wr.bvalid = s_axil_bvalid_reg; + +assign s_axil_rd.arready = s_axil_arready_reg; +assign s_axil_rd.rdata = s_axil_rdata_reg; +assign s_axil_rd.rresp = '0; +assign s_axil_rd.ruser = '0; +assign s_axil_rd.rvalid = s_axil_rvalid_reg; + +always_ff @(posedge clk) begin + s_axil_awready_reg <= 1'b0; + s_axil_wready_reg <= 1'b0; + s_axil_bvalid_reg <= s_axil_bvalid_reg && !s_axil_wr.bready; + + s_axil_arready_reg <= 1'b0; + s_axil_rvalid_reg <= s_axil_rvalid_reg && !s_axil_rd.rready; + + if (s_axil_wr.awvalid && s_axil_wr.wvalid && !s_axil_bvalid_reg) begin + s_axil_awready_reg <= 1'b1; + s_axil_wready_reg <= 1'b1; + s_axil_bvalid_reg <= 1'b1; + + case ({s_axil_wr.awaddr[15:2], 2'b00}) + 16'h0100: begin + txq_en_reg <= s_axil_wr.wdata[0]; + txq_size_reg <= s_axil_wr.wdata[19:16]; + end + 16'h0104: txq_prod_reg <= s_axil_wr.wdata[15:0]; + 16'h0108: txq_base_addr_reg[31:0] <= s_axil_wr.wdata; + 16'h010c: txq_base_addr_reg[63:32] <= s_axil_wr.wdata; + + 16'h0200: begin + rxq_en_reg <= s_axil_wr.wdata[0]; + rxq_size_reg <= s_axil_wr.wdata[19:16]; + end + 16'h0204: rxq_prod_reg <= s_axil_wr.wdata[15:0]; + 16'h0208: rxq_base_addr_reg[31:0] <= s_axil_wr.wdata; + 16'h020c: rxq_base_addr_reg[63:32] <= s_axil_wr.wdata; + + 16'h0300: begin + txcq_en_reg <= s_axil_wr.wdata[0]; + txcq_size_reg <= s_axil_wr.wdata[19:16]; + end + 16'h0308: txcq_base_addr_reg[31:0] <= s_axil_wr.wdata; + 16'h030c: txcq_base_addr_reg[63:32] <= s_axil_wr.wdata; + + 16'h0400: begin + rxcq_en_reg <= s_axil_wr.wdata[0]; + rxcq_size_reg <= s_axil_wr.wdata[19:16]; + end + 16'h0408: rxcq_base_addr_reg[31:0] <= s_axil_wr.wdata; + 16'h040c: rxcq_base_addr_reg[63:32] <= s_axil_wr.wdata; + default: begin end + endcase + end + + if (s_axil_rd.arvalid && !s_axil_rvalid_reg) begin + s_axil_rdata_reg <= '0; + + s_axil_arready_reg <= 1'b1; + s_axil_rvalid_reg <= 1'b1; + + case ({s_axil_rd.araddr[15:2], 2'b00}) + 16'h0100: begin + s_axil_rdata_reg[0] <= txq_en_reg; + s_axil_rdata_reg[19:16] <= txq_size_reg; + end + 16'h0104: begin + s_axil_rdata_reg[15:0] <= txq_prod_reg; + s_axil_rdata_reg[31:16] <= txq_cons; + end + 16'h0108: s_axil_rdata_reg <= txq_base_addr_reg[31:0]; + 16'h010c: s_axil_rdata_reg <= txq_base_addr_reg[63:32]; + + 16'h0200: begin + s_axil_rdata_reg[0] <= rxq_en_reg; + s_axil_rdata_reg[19:16] <= rxq_size_reg; + end + 16'h0204: begin + s_axil_rdata_reg[15:0] <= rxq_prod_reg; + s_axil_rdata_reg[31:16] <= rxq_cons; + end + 16'h0208: s_axil_rdata_reg <= rxq_base_addr_reg[31:0]; + 16'h020c: s_axil_rdata_reg <= rxq_base_addr_reg[63:32]; + + 16'h0300: begin + s_axil_rdata_reg[0] <= txcq_en_reg; + s_axil_rdata_reg[19:16] <= txcq_size_reg; + end + 16'h0304: s_axil_rdata_reg[15:0] <= txcq_prod; + 16'h0308: s_axil_rdata_reg <= txcq_base_addr_reg[31:0]; + 16'h030c: s_axil_rdata_reg <= txcq_base_addr_reg[63:32]; + + 16'h0400: begin + s_axil_rdata_reg[0] <= rxcq_en_reg; + s_axil_rdata_reg[19:16] <= rxcq_size_reg; + end + 16'h0404: s_axil_rdata_reg[15:0] <= rxcq_prod; + 16'h0408: s_axil_rdata_reg <= rxcq_base_addr_reg[31:0]; + 16'h040c: s_axil_rdata_reg <= rxcq_base_addr_reg[63:32]; + default: begin end + endcase + end + + if (rst) begin + s_axil_awready_reg <= 1'b0; + s_axil_wready_reg <= 1'b0; + s_axil_bvalid_reg <= 1'b0; + + s_axil_arready_reg <= 1'b0; + s_axil_rvalid_reg <= 1'b0; + end +end + +taxi_dma_desc_if #( + .SRC_ADDR_W(dma_rd_desc_req.SRC_ADDR_W), + .SRC_SEL_EN(dma_rd_desc_req.SRC_SEL_EN), + .SRC_SEL_W(dma_rd_desc_req.SRC_SEL_W), + .SRC_ASID_EN(dma_rd_desc_req.SRC_ASID_EN), + .DST_ADDR_W(dma_rd_desc_req.DST_ADDR_W), + .DST_SEL_EN(dma_rd_desc_req.DST_SEL_EN), + .DST_SEL_W(dma_rd_desc_req.DST_SEL_W-1), + .DST_ASID_EN(dma_rd_desc_req.DST_ASID_EN), + .IMM_EN(dma_rd_desc_req.IMM_EN), + .LEN_W(dma_rd_desc_req.LEN_W), + .TAG_W(dma_rd_desc_req.TAG_W-1), + .ID_EN(dma_rd_desc_req.ID_EN), + .DEST_EN(dma_rd_desc_req.DEST_EN), + .USER_EN(dma_rd_desc_req.USER_EN) +) dma_rd_desc_int[2](); + +taxi_dma_ram_if #( + .SEGS(RAM_SEGS), + .SEG_ADDR_W(RAM_SEG_ADDR_W), + .SEG_DATA_W(RAM_SEG_DATA_W), + .SEG_BE_W(RAM_SEG_BE_W), + .SEL_W(RAM_SEL_W-1) +) dma_ram_wr_int[2](); + +taxi_dma_if_mux_rd #( + .PORTS(2), + .ARB_ROUND_ROBIN(1), + .ARB_LSB_HIGH_PRIO(1) +) +rd_dma_mux_inst ( + .clk(clk), + .rst(rst), + + /* + * DMA descriptors from clients + */ + .client_req(dma_rd_desc_int), + .client_sts(dma_rd_desc_int), + + /* + * DMA descriptors to DMA engines + */ + .dma_req(dma_rd_desc_req), + .dma_sts(dma_rd_desc_sts), + + /* + * RAM interface (from DMA interface) + */ + .dma_ram_wr(dma_ram_wr), + + /* + * RAM interface (towards RAM) + */ + .client_ram_wr(dma_ram_wr_int) +); + +taxi_dma_desc_if #( + .SRC_ADDR_W(dma_wr_desc_req.SRC_ADDR_W), + .SRC_SEL_EN(dma_wr_desc_req.SRC_SEL_EN), + .SRC_SEL_W(dma_wr_desc_req.SRC_SEL_W-1), + .SRC_ASID_EN(dma_wr_desc_req.SRC_ASID_EN), + .DST_ADDR_W(dma_wr_desc_req.DST_ADDR_W), + .DST_SEL_EN(dma_wr_desc_req.DST_SEL_EN), + .DST_SEL_W(dma_wr_desc_req.DST_SEL_W), + .DST_ASID_EN(dma_wr_desc_req.DST_ASID_EN), + .IMM_EN(dma_wr_desc_req.IMM_EN), + .IMM_W(dma_wr_desc_req.IMM_W), + .LEN_W(dma_wr_desc_req.LEN_W), + .TAG_W(dma_wr_desc_req.TAG_W-1), + .ID_EN(dma_wr_desc_req.ID_EN), + .DEST_EN(dma_wr_desc_req.DEST_EN), + .USER_EN(dma_wr_desc_req.USER_EN) +) dma_wr_desc_int[2](); + +taxi_dma_ram_if #( + .SEGS(RAM_SEGS), + .SEG_ADDR_W(RAM_SEG_ADDR_W), + .SEG_DATA_W(RAM_SEG_DATA_W), + .SEG_BE_W(RAM_SEG_BE_W), + .SEL_W(RAM_SEL_W-1) +) dma_ram_rd_int[2](); + +taxi_dma_if_mux_wr #( + .PORTS(2), + .ARB_ROUND_ROBIN(1), + .ARB_LSB_HIGH_PRIO(1) +) +wr_dma_mux_inst ( + .clk(clk), + .rst(rst), + + /* + * DMA descriptors from clients + */ + .client_req(dma_wr_desc_int), + .client_sts(dma_wr_desc_int), + + /* + * DMA descriptors to DMA engines + */ + .dma_req(dma_wr_desc_req), + .dma_sts(dma_wr_desc_sts), + + /* + * RAM interface (from DMA interface) + */ + .dma_ram_rd(dma_ram_rd), + + /* + * RAM interface (towards RAM) + */ + .client_ram_rd(dma_ram_rd_int) +); + +wire [1:0] desc_req; + +taxi_axis_if #( + .DATA_W(16*8), + .KEEP_EN(1), + .LAST_EN(1), + .ID_EN(0), + .DEST_EN(1), // TODO + .USER_EN(1), + .USER_W(1) +) axis_desc[2](); + +taxi_axis_if #( + .DATA_W(16*8), + .KEEP_EN(1), + .LAST_EN(1), + .ID_EN(1), // TODO + .DEST_EN(0), + .USER_EN(0) +) axis_cpl[2](); + +cndm_proto_desc_rd +desc_rd_inst ( + .clk(clk), + .rst(rst), + + /* + * DMA + */ + .dma_rd_desc_req(dma_rd_desc_int[0]), + .dma_rd_desc_sts(dma_rd_desc_int[0]), + .dma_ram_wr(dma_ram_wr_int[0]), + + .txq_en(txq_en_reg), + .txq_size(txq_size_reg), + .txq_base_addr(txq_base_addr_reg), + .txq_prod(txq_prod_reg), + .txq_cons(txq_cons), + .rxq_en(rxq_en_reg), + .rxq_size(rxq_size_reg), + .rxq_base_addr(rxq_base_addr_reg), + .rxq_prod(rxq_prod_reg), + .rxq_cons(rxq_cons), + + .desc_req(desc_req), + .axis_desc(axis_desc) +); + +cndm_proto_cpl_wr +cpl_wr_inst ( + .clk(clk), + .rst(rst), + + /* + * DMA + */ + .dma_wr_desc_req(dma_wr_desc_int[0]), + .dma_wr_desc_sts(dma_wr_desc_int[0]), + .dma_ram_rd(dma_ram_rd_int[0]), + + .txcq_en(txcq_en_reg), + .txcq_size(txcq_size_reg), + .txcq_base_addr(txcq_base_addr_reg), + .txcq_prod(txcq_prod), + .rxcq_en(rxcq_en_reg), + .rxcq_size(rxcq_size_reg), + .rxcq_base_addr(rxcq_base_addr_reg), + .rxcq_prod(rxcq_prod), + + .axis_cpl(axis_cpl), + .irq(irq) +); + +taxi_axis_if #( + .DATA_W(mac_axis_tx.DATA_W), + .USER_EN(1), + .USER_W(1) +) mac_tx_int(); + +taxi_axis_async_fifo #( + .DEPTH(16384), + .RAM_PIPELINE(2), + .FRAME_FIFO(1), + .USER_BAD_FRAME_VALUE(1'b1), + .USER_BAD_FRAME_MASK(1'b1), + .DROP_OVERSIZE_FRAME(1), + .DROP_BAD_FRAME(1), + .DROP_WHEN_FULL(1) +) +tx_fifo ( + /* + * AXI4-Stream input (sink) + */ + .s_clk(clk), + .s_rst(rst), + .s_axis(mac_tx_int), + + /* + * AXI4-Stream output (source) + */ + .m_clk(mac_tx_clk), + .m_rst(mac_tx_rst), + .m_axis(mac_axis_tx), + + /* + * Pause + */ + .s_pause_req(1'b0), + .s_pause_ack(), + .m_pause_req(1'b0), + .m_pause_ack(), + + /* + * Status + */ + .s_status_depth(), + .s_status_depth_commit(), + .s_status_overflow(), + .s_status_bad_frame(), + .s_status_good_frame(), + .m_status_depth(), + .m_status_depth_commit(), + .m_status_overflow(), + .m_status_bad_frame(), + .m_status_good_frame() +); + +cndm_proto_tx +tx_inst ( + .clk(clk), + .rst(rst), + + /* + * DMA + */ + .dma_rd_desc_req(dma_rd_desc_int[1]), + .dma_rd_desc_sts(dma_rd_desc_int[1]), + .dma_ram_wr(dma_ram_wr_int[1]), + + .desc_req(desc_req[0]), + .axis_desc(axis_desc[0]), + .tx_data(mac_tx_int), + .axis_cpl(axis_cpl[0]) +); + +taxi_axis_if #( + .DATA_W(mac_axis_rx.DATA_W), + .USER_EN(1), + .USER_W(1) +) mac_rx_int(); + +taxi_axis_async_fifo #( + .DEPTH(16384), + .RAM_PIPELINE(2), + .FRAME_FIFO(1), + .USER_BAD_FRAME_VALUE(1'b1), + .USER_BAD_FRAME_MASK(1'b1), + .DROP_OVERSIZE_FRAME(1), + .DROP_BAD_FRAME(1), + .DROP_WHEN_FULL(1) +) +rx_fifo ( + /* + * AXI4-Stream input (sink) + */ + .s_clk(mac_rx_clk), + .s_rst(mac_rx_rst), + .s_axis(mac_axis_rx), + + /* + * AXI4-Stream output (source) + */ + .m_clk(clk), + .m_rst(rst), + .m_axis(mac_rx_int), + + /* + * Pause + */ + .s_pause_req(1'b0), + .s_pause_ack(), + .m_pause_req(1'b0), + .m_pause_ack(), + + /* + * Status + */ + .s_status_depth(), + .s_status_depth_commit(), + .s_status_overflow(), + .s_status_bad_frame(), + .s_status_good_frame(), + .m_status_depth(), + .m_status_depth_commit(), + .m_status_overflow(), + .m_status_bad_frame(), + .m_status_good_frame() +); + +cndm_proto_rx +rx_inst ( + .clk(clk), + .rst(rst), + + /* + * DMA + */ + .dma_wr_desc_req(dma_wr_desc_int[1]), + .dma_wr_desc_sts(dma_wr_desc_int[1]), + .dma_ram_rd(dma_ram_rd_int[1]), + + .rx_data(mac_rx_int), + .desc_req(desc_req[1]), + .axis_desc(axis_desc[1]), + .axis_cpl(axis_cpl[1]) +); + +endmodule + +`resetall diff --git a/src/cndm_proto/rtl/cndm_proto_rx.sv b/src/cndm_proto/rtl/cndm_proto_rx.sv new file mode 100644 index 0000000..eef6f6f --- /dev/null +++ b/src/cndm_proto/rtl/cndm_proto_rx.sv @@ -0,0 +1,211 @@ +// SPDX-License-Identifier: CERN-OHL-S-2.0 +/* + +Copyright (c) 2025 FPGA Ninja, LLC + +Authors: +- Alex Forencich + +*/ + +`resetall +`timescale 1ns / 1ps +`default_nettype none + +/* + * Corundum-proto receive datapath + */ +module cndm_proto_rx +( + input wire logic clk, + input wire logic rst, + + /* + * DMA + */ + taxi_dma_desc_if.req_src dma_wr_desc_req, + taxi_dma_desc_if.sts_snk dma_wr_desc_sts, + taxi_dma_ram_if.rd_slv dma_ram_rd, + + taxi_axis_if.snk rx_data, + output wire logic desc_req, + taxi_axis_if.snk axis_desc, + taxi_axis_if.src axis_cpl +); + +localparam RAM_ADDR_W = 16; + +taxi_dma_desc_if #( + .SRC_ADDR_W(RAM_ADDR_W), + .SRC_SEL_EN(1'b0), + .SRC_ASID_EN(1'b0), + .DST_ADDR_W(RAM_ADDR_W), + .DST_SEL_EN(1'b0), + .DST_ASID_EN(1'b0), + .IMM_EN(1'b0), + .LEN_W(16), + .TAG_W(1), + .ID_EN(0), + .DEST_EN(0), + .USER_EN(1), + .USER_W(1) +) dma_desc(); + +localparam [2:0] + STATE_IDLE = 0, + STATE_RX_DATA = 1, + STATE_READ_DESC = 2, + STATE_WRITE_DATA = 3; + +logic [2:0] state_reg = STATE_IDLE; + +logic desc_req_reg = 1'b0; + +assign desc_req = desc_req_reg; + +always_ff @(posedge clk) begin + desc_req_reg <= 1'b0; + + axis_desc.tready <= 1'b0; + + dma_wr_desc_req.req_src_sel <= '0; + dma_wr_desc_req.req_src_asid <= '0; + dma_wr_desc_req.req_dst_sel <= '0; + dma_wr_desc_req.req_dst_asid <= '0; + dma_wr_desc_req.req_imm <= '0; + dma_wr_desc_req.req_imm_en <= '0; + dma_wr_desc_req.req_tag <= '0; + dma_wr_desc_req.req_id <= '0; + dma_wr_desc_req.req_dest <= '0; + dma_wr_desc_req.req_user <= '0; + dma_wr_desc_req.req_valid <= dma_wr_desc_req.req_valid && !dma_wr_desc_req.req_ready; + + dma_desc.req_src_addr <= '0; + dma_desc.req_src_sel <= '0; + dma_desc.req_src_asid <= '0; + dma_desc.req_dst_addr <= '0; + dma_desc.req_dst_sel <= '0; + dma_desc.req_dst_asid <= '0; + dma_desc.req_imm <= '0; + dma_desc.req_imm_en <= '0; + dma_desc.req_len <= 4096; + dma_desc.req_tag <= '0; + dma_desc.req_id <= '0; + dma_desc.req_dest <= '0; + dma_desc.req_user <= '0; + dma_desc.req_valid <= dma_desc.req_valid && !dma_desc.req_ready; + + axis_cpl.tkeep <= '0; + axis_cpl.tid <= '0; + axis_cpl.tdest <= '0; + axis_cpl.tuser <= '0; + axis_cpl.tlast <= 1'b1; + axis_cpl.tvalid <= axis_cpl.tvalid && !axis_cpl.tready; + + case (state_reg) + STATE_IDLE: begin + dma_desc.req_valid <= 1'b1; + state_reg <= STATE_RX_DATA; + end + STATE_RX_DATA: begin + dma_wr_desc_req.req_len <= 20'(dma_desc.sts_len); + axis_cpl.tdata[47:32] <= 16'(dma_desc.sts_len); + if (dma_desc.sts_valid) begin + desc_req_reg <= 1'b1; + state_reg <= STATE_READ_DESC; + end + end + STATE_READ_DESC: begin + axis_desc.tready <= 1'b1; + + dma_wr_desc_req.req_src_addr <= '0; + dma_wr_desc_req.req_dst_addr <= axis_desc.tdata[127:64]; + + if (axis_desc.tvalid && axis_desc.tready) begin + if (dma_wr_desc_req.req_len > 20'(axis_desc.tdata[47:32])) begin + dma_wr_desc_req.req_len <= 20'(axis_desc.tdata[47:32]); + end + + if (axis_desc.tuser) begin + // failed to read desc + state_reg <= STATE_IDLE; + end else begin + dma_wr_desc_req.req_valid <= 1'b1; + state_reg <= STATE_WRITE_DATA; + end + end + end + STATE_WRITE_DATA: begin + if (dma_wr_desc_sts.sts_valid) begin + axis_cpl.tvalid <= 1'b1; + state_reg <= STATE_IDLE; + end + end + default: begin + state_reg <= STATE_IDLE; + end + endcase + + if (rst) begin + state_reg <= STATE_IDLE; + end +end + +taxi_dma_ram_if #( + .SEGS(dma_ram_rd.SEGS), + .SEG_ADDR_W(dma_ram_rd.SEG_ADDR_W), + .SEG_DATA_W(dma_ram_rd.SEG_DATA_W), + .SEG_BE_W(dma_ram_rd.SEG_BE_W) +) dma_ram_wr(); + +taxi_dma_psdpram #( + .SIZE(4096), + .PIPELINE(2) +) +ram_inst ( + .clk(clk), + .rst(rst), + + /* + * Write port + */ + .dma_ram_wr(dma_ram_wr), + + /* + * Read port + */ + .dma_ram_rd(dma_ram_rd) +); + +taxi_dma_client_axis_sink +dma_inst ( + .clk(clk), + .rst(rst), + + /* + * DMA descriptor + */ + .desc_req(dma_desc), + .desc_sts(dma_desc), + + /* + * AXI stream write data input + */ + .s_axis_wr_data(rx_data), + + /* + * RAM interface + */ + .dma_ram_wr(dma_ram_wr), + + /* + * Configuration + */ + .enable(1), + .abort(0) +); + + +endmodule + +`resetall diff --git a/src/cndm_proto/rtl/cndm_proto_tx.sv b/src/cndm_proto/rtl/cndm_proto_tx.sv new file mode 100644 index 0000000..6ffd2ad --- /dev/null +++ b/src/cndm_proto/rtl/cndm_proto_tx.sv @@ -0,0 +1,205 @@ +// SPDX-License-Identifier: CERN-OHL-S-2.0 +/* + +Copyright (c) 2025 FPGA Ninja, LLC + +Authors: +- Alex Forencich + +*/ + +`resetall +`timescale 1ns / 1ps +`default_nettype none + +/* + * Corundum-proto transmit path + */ +module cndm_proto_tx +( + input wire logic clk, + input wire logic rst, + + /* + * DMA + */ + taxi_dma_desc_if.req_src dma_rd_desc_req, + taxi_dma_desc_if.sts_snk dma_rd_desc_sts, + taxi_dma_ram_if.wr_slv dma_ram_wr, + + output wire logic desc_req, + taxi_axis_if.snk axis_desc, + taxi_axis_if.src tx_data, + taxi_axis_if.src axis_cpl +); + +localparam RAM_ADDR_W = 16; + +taxi_dma_desc_if #( + .SRC_ADDR_W(RAM_ADDR_W), + .SRC_SEL_EN(1'b0), + .SRC_ASID_EN(1'b0), + .DST_ADDR_W(RAM_ADDR_W), + .DST_SEL_EN(1'b0), + .DST_ASID_EN(1'b0), + .IMM_EN(1'b0), + .LEN_W(16), + .TAG_W(1), + .ID_EN(0), + .DEST_EN(0), + .USER_EN(1), + .USER_W(1) +) dma_desc(); + +localparam [2:0] + STATE_IDLE = 0, + STATE_READ_DESC = 1, + STATE_READ_DATA = 2, + STATE_TX_DATA = 3; + +logic [2:0] state_reg = STATE_IDLE; + +logic desc_req_reg = 1'b0; + +assign desc_req = desc_req_reg; + +always_ff @(posedge clk) begin + desc_req_reg <= 1'b0; + + axis_desc.tready <= 1'b0; + + dma_rd_desc_req.req_src_sel <= '0; + dma_rd_desc_req.req_src_asid <= '0; + dma_rd_desc_req.req_dst_sel <= '0; + dma_rd_desc_req.req_dst_asid <= '0; + dma_rd_desc_req.req_imm <= '0; + dma_rd_desc_req.req_imm_en <= '0; + dma_rd_desc_req.req_tag <= '0; + dma_rd_desc_req.req_id <= '0; + dma_rd_desc_req.req_dest <= '0; + dma_rd_desc_req.req_user <= '0; + dma_rd_desc_req.req_valid <= dma_rd_desc_req.req_valid && !dma_rd_desc_req.req_ready; + + dma_desc.req_src_sel <= '0; + dma_desc.req_src_asid <= '0; + dma_desc.req_dst_addr <= '0; + dma_desc.req_dst_sel <= '0; + dma_desc.req_dst_asid <= '0; + dma_desc.req_imm <= '0; + dma_desc.req_imm_en <= '0; + dma_desc.req_tag <= '0; + dma_desc.req_id <= '0; + dma_desc.req_dest <= '0; + dma_desc.req_user <= '0; + dma_desc.req_valid <= dma_desc.req_valid && !dma_desc.req_ready; + + axis_cpl.tkeep <= '0; + axis_cpl.tid <= '0; + axis_cpl.tdest <= '0; + axis_cpl.tuser <= '0; + axis_cpl.tlast <= 1'b1; + axis_cpl.tvalid <= axis_cpl.tvalid && !axis_cpl.tready; + + case (state_reg) + STATE_IDLE: begin + desc_req_reg <= 1'b1; + state_reg <= STATE_READ_DESC; + end + STATE_READ_DESC: begin + axis_desc.tready <= 1'b1; + + dma_rd_desc_req.req_src_addr <= axis_desc.tdata[127:64]; + dma_rd_desc_req.req_dst_addr <= '0; + dma_rd_desc_req.req_len <= 20'(axis_desc.tdata[47:32]); + + dma_desc.req_src_addr <= '0; + dma_desc.req_len <= axis_desc.tdata[47:32]; + + if (axis_desc.tvalid && axis_desc.tready) begin + if (axis_desc.tuser) begin + // failed to read desc + state_reg <= STATE_IDLE; + end else begin + dma_rd_desc_req.req_valid <= 1'b1; + state_reg <= STATE_READ_DATA; + end + end + end + STATE_READ_DATA: begin + if (dma_rd_desc_sts.sts_valid) begin + dma_desc.req_valid <= 1'b1; + state_reg <= STATE_TX_DATA; + end + end + STATE_TX_DATA: begin + if (dma_desc.sts_valid) begin + axis_cpl.tvalid <= 1'b1; + state_reg <= STATE_IDLE; + end + end + default: begin + state_reg <= STATE_IDLE; + end + endcase + + if (rst) begin + state_reg <= STATE_IDLE; + end +end + +taxi_dma_ram_if #( + .SEGS(dma_ram_wr.SEGS), + .SEG_ADDR_W(dma_ram_wr.SEG_ADDR_W), + .SEG_DATA_W(dma_ram_wr.SEG_DATA_W), + .SEG_BE_W(dma_ram_wr.SEG_BE_W) +) dma_ram_rd(); + +taxi_dma_psdpram #( + .SIZE(4096), + .PIPELINE(2) +) +ram_inst ( + .clk(clk), + .rst(rst), + + /* + * Write port + */ + .dma_ram_wr(dma_ram_wr), + + /* + * Read port + */ + .dma_ram_rd(dma_ram_rd) +); + +taxi_dma_client_axis_source +dma_inst ( + .clk(clk), + .rst(rst), + + /* + * DMA descriptor + */ + .desc_req(dma_desc), + .desc_sts(dma_desc), + + /* + * AXI stream read data output + */ + .m_axis_rd_data(tx_data), + + /* + * RAM interface + */ + .dma_ram_rd(dma_ram_rd), + + /* + * Configuration + */ + .enable(1'b1) +); + +endmodule + +`resetall diff --git a/src/cndm_proto/tb/cndm_proto.py b/src/cndm_proto/tb/cndm_proto.py new file mode 100644 index 0000000..a0f3d8d --- /dev/null +++ b/src/cndm_proto/tb/cndm_proto.py @@ -0,0 +1,276 @@ +# SPDX-License-Identifier: CERN-OHL-S-2.0 +""" + +Copyright (c) 2025 FPGA Ninja, LLC + +Authors: +- Alex Forencich + +""" + +import logging +import struct +from collections import deque + +from cocotb.queue import Queue + +class Port: + def __init__(self, driver, index, hw_regs): + self.driver = driver + self.log = driver.log + self.index = index + self.hw_regs = hw_regs + + self.rxq_log_size = (256).bit_length()-1 + self.rxq_size = 2**self.rxq_log_size + self.rxq_mask = self.rxq_size-1 + self.rxq = None + self.rxq_prod = 0 + self.rxq_cons = 0 + + self.rx_info = [None] * self.rxq_size + + self.rxcq_log_size = (256).bit_length()-1 + self.rxcq_size = 2**self.rxcq_log_size + self.rxcq_mask = self.rxcq_size-1 + self.rxcq = None + self.rxcq_prod = 0 + self.rxcq_cons = 0 + + self.txq_log_size = (256).bit_length()-1 + self.txq_size = 2**self.txq_log_size + self.txq_mask = self.txq_size-1 + self.txq = None + self.txq_prod = 0 + self.txq_cons = 0 + + self.tx_info = [None] * self.txq_size + + self.txcq_log_size = (256).bit_length()-1 + self.txcq_size = 2**self.txcq_log_size + self.txcq_mask = self.txcq_size-1 + self.txcq = None + self.txcq_prod = 0 + self.txcq_cons = 0 + + self.rx_queue = Queue() + + async def init(self): + + self.rxq = self.driver.pool.alloc_region(self.rxq_size*16) + addr = self.rxq.get_absolute_address(0) + await self.hw_regs.write_dword(0x0200, 0x00000000) + await self.hw_regs.write_dword(0x0204, 0x00000000) + await self.hw_regs.write_dword(0x0208, addr & 0xffffffff) + await self.hw_regs.write_dword(0x020c, addr >> 32) + await self.hw_regs.write_dword(0x0200, 0x00000001 | (self.rxq_log_size << 16)) + + self.rxcq = self.driver.pool.alloc_region(self.rxcq_size*16) + addr = self.rxcq.get_absolute_address(0) + await self.hw_regs.write_dword(0x0400, 0x00000000) + await self.hw_regs.write_dword(0x0408, addr & 0xffffffff) + await self.hw_regs.write_dword(0x040c, addr >> 32) + await self.hw_regs.write_dword(0x0400, 0x00000001 | (self.rxcq_log_size << 16)) + + self.txq = self.driver.pool.alloc_region(self.txq_size*16) + addr = self.txq.get_absolute_address(0) + await self.hw_regs.write_dword(0x0100, 0x00000000) + await self.hw_regs.write_dword(0x0104, 0x00000000) + await self.hw_regs.write_dword(0x0108, addr & 0xffffffff) + await self.hw_regs.write_dword(0x010c, addr >> 32) + await self.hw_regs.write_dword(0x0100, 0x00000001 | (self.txq_log_size << 16)) + + self.txcq = self.driver.pool.alloc_region(self.txcq_size*16) + addr = self.txcq.get_absolute_address(0) + await self.hw_regs.write_dword(0x0300, 0x00000000) + await self.hw_regs.write_dword(0x0308, addr & 0xffffffff) + await self.hw_regs.write_dword(0x030c, addr >> 32) + await self.hw_regs.write_dword(0x0300, 0x00000001 | (self.txcq_log_size << 16)) + + # wait for writes to complete + await self.hw_regs.read_dword(0) + + await self.refill_rx_buffers() + + async def start_xmit(self, data): + headroom = 10 + tx_buf = self.driver.alloc_pkt() + await tx_buf.write(headroom, data) + index = self.txq_prod & self.txq_mask + ptr = tx_buf.get_absolute_address(0) + struct.pack_into('