cndm_proto: Initial commit of corundum-proto

Signed-off-by: Alex Forencich <alex@alexforencich.com>
This commit is contained in:
Alex Forencich
2025-12-31 17:32:18 -08:00
parent 4da6771603
commit 74e49a77e2
22 changed files with 4211 additions and 0 deletions

1
src/cndm_proto/lib/taxi Symbolic link
View File

@@ -0,0 +1 @@
../../../

View File

@@ -0,0 +1,31 @@
# SPDX-License-Identifier: GPL
# Copyright (c) 2025 FPGA Ninja
ifneq ($(KERNELRELEASE),)
obj-m += cndm_proto.o
cndm_proto-y += cndm_proto_main.o
cndm_proto-y += cndm_proto_netdev.o
cndm_proto-y += cndm_proto_tx.o
cndm_proto-y += cndm_proto_rx.o
ifneq ($(DEBUG),)
ccflags-y += -DDEBUG
endif
else
ifneq ($(KERNEL_SRC),)
KDIR ?= $(KERNEL_SRC)
endif
KDIR ?= /lib/modules/$(shell uname -r)/build
all: modules
help modules modules_install clean:
$(MAKE) -C $(KDIR) M=$(shell pwd) $@
install: modules_install
endif

View File

@@ -0,0 +1,125 @@
/* SPDX-License-Identifier: GPL */
// Copyright (c) 2025 FPGA Ninja
#ifndef CNDM_PROTO_H
#define CNDM_PROTO_H
#include <linux/kernel.h>
#include <linux/pci.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#define DRIVER_NAME "cndm_proto"
#define DRIVER_VERSION "0.1"
struct cndm_proto_dev {
struct pci_dev *pdev;
struct device *dev;
struct net_device *ndev[32];
void __iomem *bar;
resource_size_t bar_len;
u32 port_count;
u32 port_offset;
u32 port_stride;
};
struct cndm_proto_tx_info {
struct sk_buff *skb;
dma_addr_t dma_addr;
u32 len;
};
struct cndm_proto_rx_info {
struct page *page;
dma_addr_t dma_addr;
u32 len;
};
struct cndm_proto_priv {
struct device *dev;
struct net_device *ndev;
struct cndm_proto_dev *cdev;
bool registered;
bool port_up;
void __iomem *hw_addr;
size_t txq_region_len;
void *txq_region;
dma_addr_t txq_region_addr;
struct cndm_proto_tx_info *tx_info;
struct cndm_proto_rx_info *rx_info;
struct netdev_queue *tx_queue;
struct napi_struct tx_napi;
struct napi_struct rx_napi;
u32 txq_log_size;
u32 txq_size;
u32 txq_mask;
u32 txq_prod;
u32 txq_cons;
size_t rxq_region_len;
void *rxq_region;
dma_addr_t rxq_region_addr;
u32 rxq_log_size;
u32 rxq_size;
u32 rxq_mask;
u32 rxq_prod;
u32 rxq_cons;
size_t txcq_region_len;
void *txcq_region;
dma_addr_t txcq_region_addr;
u32 txcq_log_size;
u32 txcq_size;
u32 txcq_mask;
u32 txcq_prod;
u32 txcq_cons;
size_t rxcq_region_len;
void *rxcq_region;
dma_addr_t rxcq_region_addr;
u32 rxcq_log_size;
u32 rxcq_size;
u32 rxcq_mask;
u32 rxcq_prod;
u32 rxcq_cons;
};
struct cndm_proto_desc {
__u8 rsvd[4];
__le32 len;
__le64 addr;
};
struct cndm_proto_cpl {
__u8 rsvd[4];
__le32 len;
__u8 rsvd2[7];
__u8 phase;
};
irqreturn_t cndm_proto_irq(int irqn, void *data);
struct net_device *cndm_proto_create_netdev(struct cndm_proto_dev *cdev, int port, void __iomem *hw_addr);
void cndm_proto_destroy_netdev(struct net_device *ndev);
int cndm_proto_free_tx_buf(struct cndm_proto_priv *priv);
int cndm_proto_poll_tx_cq(struct napi_struct *napi, int budget);
int cndm_proto_start_xmit(struct sk_buff *skb, struct net_device *ndev);
int cndm_proto_free_rx_buf(struct cndm_proto_priv *priv);
int cndm_proto_refill_rx_buffers(struct cndm_proto_priv *priv);
int cndm_proto_poll_rx_cq(struct napi_struct *napi, int budget);
#endif

View File

@@ -0,0 +1,28 @@
#!/bin/sh
module=cndm_proto
control=/proc/dynamic_debug/control
if ! test -f $control; then
control=/sys/kernel/debug/dynamic_debug/control
fi
if ! test -f $control; then
>&2 echo "Error: dynamic debug control file not found"
exit 1
fi
if [ $# -eq 0 ]; then
>&2 echo "Error: no argument provided"
>&2 echo "usage: $0 [stmt]"
>&2 echo "Disable all debug print statements: $0 =_"
>&2 echo "Enable all debug print statements: $0 =p"
>&2 echo "More verbose: $0 =pflmt"
>&2 echo "Pattern match: $0 format \"some-string\" =p"
>&2 echo "Current configuration:"
grep "\[$module\]" $control >&2
exit 1
fi
echo module $module "${@@Q}" > $control

View File

@@ -0,0 +1,168 @@
// SPDX-License-Identifier: GPL
// Copyright (c) 2025 FPGA Ninja
#include "cndm_proto.h"
#include <linux/module.h>
#include <linux/delay.h>
MODULE_DESCRIPTION("Corundum-proto device driver");
MODULE_AUTHOR("FPGA Ninja");
MODULE_LICENSE("GPL");
MODULE_VERSION(DRIVER_VERSION);
static int cndm_proto_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
{
struct device *dev = &pdev->dev;
struct cndm_proto_dev *cdev;
int ret = 0;
int k;
dev_info(dev, DRIVER_NAME " PCI probe");
dev_info(dev, "Corundum-proto device driver");
dev_info(dev, "Version " DRIVER_VERSION);
dev_info(dev, "Copyright (c) 2025 FPGA Ninja");
dev_info(dev, "https://fpga.ninja/");
pcie_print_link_status(pdev);
cdev = devm_kzalloc(dev, sizeof(struct cndm_proto_dev), GFP_KERNEL);
if (!cdev)
return -ENOMEM;
cdev->pdev = pdev;
cdev->dev = dev;
pci_set_drvdata(pdev, cdev);
ret = pci_enable_device_mem(pdev);
if (ret) {
dev_err(dev, "Failed to enable device");
goto fail_enable_device;
}
pci_set_master(pdev);
ret = pci_request_regions(pdev, DRIVER_NAME);
if (ret) {
dev_err(dev, "Failed to reserve regions");
goto fail_regions;
}
cdev->bar_len = pci_resource_len(pdev, 0);
dev_info(dev, "BAR size: %llu", cdev->bar_len);
cdev->bar = pci_ioremap_bar(pdev, 0);
if (!cdev->bar) {
ret = -ENOMEM;
dev_err(dev, "Failed to map BAR 0");
goto fail_map_bars;
}
if (ioread32(cdev->bar + 0x0000) == 0xffffffff) {
ret = -EIO;
dev_err(dev, "Device needs to be reset");
goto fail_map_bars;
}
ret = pci_alloc_irq_vectors(pdev, 1, 32, PCI_IRQ_MSI | PCI_IRQ_MSIX);
if (ret < 0) {
dev_err(dev, "Failed to allocate IRQs");
goto fail_map_bars;
}
cdev->port_count = ioread32(cdev->bar + 0x0100);
cdev->port_offset = ioread32(cdev->bar + 0x0104);
cdev->port_stride = ioread32(cdev->bar + 0x0108);
dev_info(dev, "Port count: %d", cdev->port_count);
dev_info(dev, "Port offset: 0x%x", cdev->port_offset);
dev_info(dev, "Port stride: 0x%x", cdev->port_stride);
for (k = 0; k < cdev->port_count; k++) {
struct net_device *ndev;
ndev = cndm_proto_create_netdev(cdev, k, cdev->bar + cdev->port_offset + (cdev->port_stride*k));
if (IS_ERR_OR_NULL(ndev)) {
ret = PTR_ERR(ndev);
goto fail_netdev;
}
ret = pci_request_irq(pdev, k, cndm_proto_irq, 0, ndev, DRIVER_NAME);
if (ret < 0) {
dev_err(dev, "Failed to request IRQ");
cndm_proto_destroy_netdev(ndev);
goto fail_netdev;
}
cdev->ndev[k] = ndev;
}
return 0;
fail_netdev:
for (k = 0; k < 32; k++) {
if (cdev->ndev[k]) {
pci_free_irq(pdev, k, cdev->ndev[k]);
cndm_proto_destroy_netdev(cdev->ndev[k]);
cdev->ndev[k] = NULL;
}
}
pci_free_irq_vectors(pdev);
fail_map_bars:
if (cdev->bar)
pci_iounmap(pdev, cdev->bar);
pci_release_regions(pdev);
fail_regions:
pci_clear_master(pdev);
pci_disable_device(pdev);
fail_enable_device:
return ret;
}
static void cndm_proto_pci_remove(struct pci_dev *pdev)
{
struct device *dev = &pdev->dev;
struct cndm_proto_dev *cdev = pci_get_drvdata(pdev);
int k;
dev_info(dev, DRIVER_NAME " PCI remove");
for (k = 0; k < 32; k++) {
if (cdev->ndev[k]) {
pci_free_irq(pdev, k, cdev->ndev[k]);
cndm_proto_destroy_netdev(cdev->ndev[k]);
cdev->ndev[k] = NULL;
}
}
pci_free_irq_vectors(pdev);
if (cdev->bar)
pci_iounmap(pdev, cdev->bar);
pci_release_regions(pdev);
pci_clear_master(pdev);
pci_disable_device(pdev);
}
static const struct pci_device_id cndm_proto_pci_id_table[] = {
{PCI_DEVICE(0x1234, 0xC070)},
{0}
};
static struct pci_driver cndm_proto_driver = {
.name = DRIVER_NAME,
.id_table = cndm_proto_pci_id_table,
.probe = cndm_proto_pci_probe,
.remove = cndm_proto_pci_remove
};
static int __init cndm_proto_init(void)
{
return pci_register_driver(&cndm_proto_driver);
}
static void __exit cndm_proto_exit(void)
{
pci_unregister_driver(&cndm_proto_driver);
}
module_init(cndm_proto_init);
module_exit(cndm_proto_exit);

View File

@@ -0,0 +1,243 @@
// SPDX-License-Identifier: GPL
// Copyright (c) 2025 FPGA Ninja
#include "cndm_proto.h"
static int cndm_proto_open(struct net_device *ndev)
{
struct cndm_proto_priv *priv = netdev_priv(ndev);
cndm_proto_refill_rx_buffers(priv);
priv->tx_queue = netdev_get_tx_queue(ndev, 0);
netif_napi_add_tx(ndev, &priv->tx_napi, cndm_proto_poll_tx_cq);
napi_enable(&priv->tx_napi);
netif_napi_add(ndev, &priv->rx_napi, cndm_proto_poll_rx_cq);
napi_enable(&priv->rx_napi);
netif_tx_start_all_queues(ndev);
netif_carrier_on(ndev);
netif_device_attach(ndev);
priv->port_up = 1;
return 0;
}
static int cndm_proto_close(struct net_device *ndev)
{
struct cndm_proto_priv *priv = netdev_priv(ndev);
priv->port_up = 0;
napi_disable(&priv->tx_napi);
netif_napi_del(&priv->tx_napi);
napi_disable(&priv->rx_napi);
netif_napi_del(&priv->rx_napi);
netif_tx_stop_all_queues(ndev);
netif_carrier_off(ndev);
netif_tx_disable(ndev);
return 0;
}
static const struct net_device_ops cndm_proto_netdev_ops = {
.ndo_open = cndm_proto_open,
.ndo_stop = cndm_proto_close,
.ndo_start_xmit = cndm_proto_start_xmit,
};
irqreturn_t cndm_proto_irq(int irqn, void *data)
{
struct net_device *ndev = data;
struct cndm_proto_priv *priv = netdev_priv(ndev);
netdev_dbg(ndev, "Interrupt");
if (priv->port_up) {
napi_schedule_irqoff(&priv->tx_napi);
napi_schedule_irqoff(&priv->rx_napi);
}
return IRQ_HANDLED;
}
struct net_device *cndm_proto_create_netdev(struct cndm_proto_dev *cdev, int port, void __iomem *hw_addr)
{
struct device *dev = cdev->dev;
struct net_device *ndev;
struct cndm_proto_priv *priv;
int ret = 0;
ndev = alloc_etherdev_mqs(sizeof(*priv), 1, 1);
if (!ndev) {
dev_err(dev, "Failed to allocate net_device");
return ERR_PTR(-ENOMEM);
}
SET_NETDEV_DEV(ndev, dev);
ndev->dev_port = port;
priv = netdev_priv(ndev);
memset(priv, 0, sizeof(*priv));
priv->dev = dev;
priv->ndev = ndev;
priv->cdev = cdev;
priv->hw_addr = hw_addr;
netif_set_real_num_tx_queues(ndev, 1);
netif_set_real_num_rx_queues(ndev, 1);
ndev->addr_len = ETH_ALEN;
eth_hw_addr_random(ndev);
ndev->netdev_ops = &cndm_proto_netdev_ops;
ndev->hw_features = 0;
ndev->features = 0;
ndev->min_mtu = ETH_MIN_MTU;
ndev->max_mtu = 1500;
priv->rxq_log_size = ilog2(256);
priv->rxq_size = 1 << priv->rxq_log_size;
priv->rxq_mask = priv->rxq_size-1;
priv->rxq_prod = 0;
priv->rxq_cons = 0;
priv->txq_log_size = ilog2(256);
priv->txq_size = 1 << priv->txq_log_size;
priv->txq_mask = priv->txq_size-1;
priv->txq_prod = 0;
priv->txq_cons = 0;
priv->rxcq_log_size = ilog2(256);
priv->rxcq_size = 1 << priv->rxcq_log_size;
priv->rxcq_mask = priv->rxcq_size-1;
priv->rxcq_prod = 0;
priv->rxcq_cons = 0;
priv->txcq_log_size = ilog2(256);
priv->txcq_size = 1 << priv->txcq_log_size;
priv->txcq_mask = priv->txcq_size-1;
priv->txcq_prod = 0;
priv->txcq_cons = 0;
// allocate DMA buffers
priv->txq_region_len = priv->txq_size*16;
priv->txq_region = dma_alloc_coherent(dev, priv->txq_region_len, &priv->txq_region_addr, GFP_KERNEL | __GFP_ZERO);
if (!priv->txq_region) {
ret = -ENOMEM;
goto fail;
}
priv->rxq_region_len = priv->rxq_size*16;
priv->rxq_region = dma_alloc_coherent(dev, priv->rxq_region_len, &priv->rxq_region_addr, GFP_KERNEL | __GFP_ZERO);
if (!priv->rxq_region) {
ret = -ENOMEM;
goto fail;
}
priv->txcq_region_len = priv->txcq_size*16;
priv->txcq_region = dma_alloc_coherent(dev, priv->txcq_region_len, &priv->txcq_region_addr, GFP_KERNEL | __GFP_ZERO);
if (!priv->txcq_region) {
ret = -ENOMEM;
goto fail;
}
priv->rxcq_region_len = priv->rxcq_size*16;
priv->rxcq_region = dma_alloc_coherent(dev, priv->rxcq_region_len, &priv->rxcq_region_addr, GFP_KERNEL | __GFP_ZERO);
if (!priv->rxcq_region) {
ret = -ENOMEM;
goto fail;
}
// allocate info rings
priv->tx_info = kvzalloc(sizeof(*priv->tx_info) * priv->txq_size, GFP_KERNEL);
if (!priv->tx_info) {
ret = -ENOMEM;
goto fail;
}
priv->rx_info = kvzalloc(sizeof(*priv->rx_info) * priv->rxq_size, GFP_KERNEL);
if (!priv->tx_info) {
ret = -ENOMEM;
goto fail;
}
iowrite32(0x00000000, priv->hw_addr + 0x200);
iowrite32(priv->rxq_prod & 0xffff, priv->hw_addr + 0x204);
iowrite32(priv->rxq_region_addr & 0xffffffff, priv->hw_addr + 0x208);
iowrite32(priv->rxq_region_addr >> 32, priv->hw_addr + 0x20c);
iowrite32(0x00000001 | (priv->rxq_log_size << 16), priv->hw_addr + 0x200);
iowrite32(0x00000000, priv->hw_addr + 0x100);
iowrite32(priv->txq_prod & 0xffff, priv->hw_addr + 0x104);
iowrite32(priv->txq_region_addr & 0xffffffff, priv->hw_addr + 0x108);
iowrite32(priv->txq_region_addr >> 32, priv->hw_addr + 0x10c);
iowrite32(0x00000001 | (priv->txq_log_size << 16), priv->hw_addr + 0x100);
iowrite32(0x00000000, priv->hw_addr + 0x400);
iowrite32(priv->rxcq_region_addr & 0xffffffff, priv->hw_addr + 0x408);
iowrite32(priv->rxcq_region_addr >> 32, priv->hw_addr + 0x40c);
iowrite32(0x00000001 | (priv->rxcq_log_size << 16), priv->hw_addr + 0x400);
iowrite32(0x00000000, priv->hw_addr + 0x300);
iowrite32(priv->txcq_region_addr & 0xffffffff, priv->hw_addr + 0x308);
iowrite32(priv->txcq_region_addr >> 32, priv->hw_addr + 0x30c);
iowrite32(0x00000001 | (priv->txcq_log_size << 16), priv->hw_addr + 0x300);
netif_carrier_off(ndev);
ret = register_netdev(ndev);
if (ret) {
dev_err(dev, "netdev registration failed");
goto fail;
}
priv->registered = 1;
return ndev;
fail:
cndm_proto_destroy_netdev(ndev);
return ERR_PTR(ret);
}
void cndm_proto_destroy_netdev(struct net_device *ndev)
{
struct cndm_proto_priv *priv = netdev_priv(ndev);
struct device *dev = priv->dev;
iowrite32(0x00000000, priv->hw_addr + 0x200);
iowrite32(0x00000000, priv->hw_addr + 0x100);
iowrite32(0x00000000, priv->hw_addr + 0x400);
iowrite32(0x00000000, priv->hw_addr + 0x300);
if (priv->registered)
unregister_netdev(ndev);
if (priv->tx_info) {
cndm_proto_free_tx_buf(priv);
kvfree(priv->tx_info);
}
if (priv->rx_info) {
cndm_proto_free_rx_buf(priv);
kvfree(priv->rx_info);
}
if (priv->txq_region)
dma_free_coherent(dev, priv->txq_region_len, priv->txq_region, priv->txq_region_addr);
if (priv->rxq_region)
dma_free_coherent(dev, priv->rxq_region_len, priv->rxq_region, priv->rxq_region_addr);
if (priv->txcq_region)
dma_free_coherent(dev, priv->txcq_region_len, priv->txcq_region, priv->txcq_region_addr);
if (priv->rxcq_region)
dma_free_coherent(dev, priv->rxcq_region_len, priv->rxcq_region, priv->rxcq_region_addr);
free_netdev(ndev);
}

View File

@@ -0,0 +1,186 @@
// SPDX-License-Identifier: GPL
// Copyright (c) 2025 FPGA Ninja
#include "cndm_proto.h"
static void cndm_proto_free_rx_desc(struct cndm_proto_priv *priv, int index)
{
struct device *dev = priv->dev;
struct cndm_proto_rx_info *rx_info = &priv->rx_info[index];
netdev_dbg(priv->ndev, "Free RX desc index %d", index);
if (!rx_info->page)
return;
dma_unmap_page(dev, rx_info->dma_addr, rx_info->len, DMA_FROM_DEVICE);
rx_info->dma_addr = 0;
__free_pages(rx_info->page, 0);
rx_info->page = NULL;
}
int cndm_proto_free_rx_buf(struct cndm_proto_priv *priv)
{
u32 index;
int cnt = 0;
while (priv->rxq_prod != priv->rxq_cons) {
index = priv->rxq_cons & priv->rxq_mask;
cndm_proto_free_rx_desc(priv, index);
priv->rxq_cons++;
cnt++;
}
return cnt;
}
static int cndm_proto_prepare_rx_desc(struct cndm_proto_priv *priv, int index)
{
struct device *dev = priv->dev;
struct cndm_proto_rx_info *rx_info = &priv->rx_info[index];
struct cndm_proto_desc *rx_desc = (struct cndm_proto_desc *)(priv->rxq_region + index*16);
struct page *page;
u32 len = PAGE_SIZE;
dma_addr_t dma_addr;
netdev_dbg(priv->ndev, "Prepare RX desc index %d", index);
page = dev_alloc_pages(0);
if (unlikely(!page)) {
netdev_err(priv->ndev, "Failed to allocate page");
return -ENOMEM;
}
dma_addr = dma_map_page(dev, page, 0, len, DMA_FROM_DEVICE);
if (unlikely(dma_mapping_error(dev, dma_addr))) {
netdev_err(priv->ndev, "Mapping failed");
__free_pages(page, 0);
return -1;
}
rx_desc->len = cpu_to_le32(len);
rx_desc->addr = cpu_to_le64(dma_addr);
rx_info->page = page;
rx_info->len = len;
rx_info->dma_addr = dma_addr;
return 0;
}
int cndm_proto_refill_rx_buffers(struct cndm_proto_priv *priv)
{
u32 missing = 128 - (priv->rxq_prod - priv->rxq_cons); // TODO
int ret = 0;
if (missing < 8)
return 0;
for (; missing-- > 0;) {
ret = cndm_proto_prepare_rx_desc(priv, priv->rxq_prod & priv->rxq_mask);
if (ret)
break;
priv->rxq_prod++;
}
dma_wmb();
iowrite32(priv->rxq_prod & 0xffff, priv->hw_addr + 0x204);
return ret;
}
static int cndm_proto_process_rx_cq(struct net_device *ndev, int napi_budget)
{
struct cndm_proto_priv *priv = netdev_priv(ndev);
struct cndm_proto_cpl *cpl;
struct cndm_proto_rx_info *rx_info;
struct sk_buff *skb;
struct page *page;
int done = 0;
u32 len;
u32 cq_cons_ptr;
u32 cq_index;
u32 cons_ptr;
u32 index;
cq_cons_ptr = priv->rxcq_cons;
cons_ptr = priv->rxq_cons;
while (done < napi_budget) {
cq_index = cq_cons_ptr & priv->rxcq_mask;
cpl = (struct cndm_proto_cpl *)(priv->rxcq_region + cq_index * 16);
if (!!(cpl->phase & 0x80) == !!(cq_cons_ptr & priv->rxcq_size))
break;
dma_rmb();
index = cons_ptr & priv->rxq_mask;
rx_info = &priv->rx_info[index];
page = rx_info->page;
len = min_t(u32, le16_to_cpu(cpl->len), rx_info->len);
netdev_dbg(priv->ndev, "Process RX cpl index %d", index);
if (!page) {
netdev_err(priv->ndev, "Null page at index %d", index);
break;
}
dma_unmap_page(priv->dev, rx_info->dma_addr, rx_info->len, DMA_FROM_DEVICE);
rx_info->dma_addr = 0;
rx_info->page = NULL;
if (len < ETH_HLEN) {
netdev_warn(priv->ndev, "Dropping short frame (len %d)", len);
goto rx_drop;
}
skb = napi_get_frags(&priv->rx_napi);
if (!skb) {
netdev_err(priv->ndev, "Failed to allocate skb %d", index);
break;
}
__skb_fill_page_desc(skb, 0, page, 0, len);
skb_shinfo(skb)->nr_frags = 1;
skb->len = len;
skb->data_len = len;
skb->truesize = rx_info->len;
napi_gro_frags(&priv->rx_napi);
rx_drop:
done++;
cq_cons_ptr++;
cons_ptr++;
}
priv->rxcq_cons = cq_cons_ptr;
priv->rxq_cons = cons_ptr;
cndm_proto_refill_rx_buffers(priv);
return done;
}
int cndm_proto_poll_rx_cq(struct napi_struct *napi, int budget)
{
struct cndm_proto_priv *priv = container_of(napi, struct cndm_proto_priv, rx_napi);
int done;
done = cndm_proto_process_rx_cq(priv->ndev, budget);
if (done == budget)
return done;
napi_complete(napi);
// TODO re-enable interrupts
return done;
}

View File

@@ -0,0 +1,156 @@
// SPDX-License-Identifier: GPL
// Copyright (c) 2025 FPGA Ninja
#include "cndm_proto.h"
static void cndm_proto_free_tx_desc(struct cndm_proto_priv *priv, int index, int napi_budget)
{
struct device *dev = priv->dev;
struct cndm_proto_tx_info *tx_info = &priv->tx_info[index];
struct sk_buff *skb = tx_info->skb;
netdev_dbg(priv->ndev, "Free TX desc index %d", index);
dma_unmap_single(dev, tx_info->dma_addr, tx_info->len, DMA_TO_DEVICE);
tx_info->dma_addr = 0;
napi_consume_skb(skb, napi_budget);
tx_info->skb = NULL;
}
int cndm_proto_free_tx_buf(struct cndm_proto_priv *priv)
{
u32 index;
int cnt = 0;
while (priv->txq_prod != priv->txq_cons) {
index = priv->txq_cons & priv->txq_mask;
cndm_proto_free_tx_desc(priv, index, 0);
priv->txq_cons++;
cnt++;
}
return cnt;
}
static int cndm_proto_process_tx_cq(struct net_device *ndev, int napi_budget)
{
struct cndm_proto_priv *priv = netdev_priv(ndev);
struct cndm_proto_cpl *cpl;
int done = 0;
u32 cq_cons_ptr;
u32 cq_index;
u32 cons_ptr;
u32 index;
cq_cons_ptr = priv->txcq_cons;
cons_ptr = priv->txq_cons;
while (done < napi_budget) {
cq_index = cq_cons_ptr & priv->txcq_mask;
cpl = (struct cndm_proto_cpl *)(priv->txcq_region + cq_index * 16);
if (!!(cpl->phase & 0x80) == !!(cq_cons_ptr & priv->txcq_size))
break;
dma_rmb();
index = cons_ptr & priv->txq_mask;
cndm_proto_free_tx_desc(priv, index, napi_budget);
done++;
cq_cons_ptr++;
cons_ptr++;
}
priv->txcq_cons = cq_cons_ptr;
priv->txq_cons = cons_ptr;
if (netif_tx_queue_stopped(priv->tx_queue) && (done != 0 || priv->txq_prod == priv->txq_cons))
netif_tx_wake_queue(priv->tx_queue);
return done;
}
int cndm_proto_poll_tx_cq(struct napi_struct *napi, int budget)
{
struct cndm_proto_priv *priv = container_of(napi, struct cndm_proto_priv, tx_napi);
int done;
done = cndm_proto_process_tx_cq(priv->ndev, budget);
if (done == budget)
return done;
napi_complete(napi);
// TODO re-enable interrupts
return done;
}
int cndm_proto_start_xmit(struct sk_buff *skb, struct net_device *ndev)
{
struct cndm_proto_priv *priv = netdev_priv(ndev);
struct device *dev = priv->dev;
u32 index;
u32 cons_ptr;
u32 len;
dma_addr_t dma_addr;
struct cndm_proto_desc *tx_desc;
struct cndm_proto_tx_info *tx_info;
netdev_dbg(ndev, "Got packet for TX");
// TODO workaround for MAC padding bug
if (skb_put_padto(skb, 60))
goto tx_drop;
if (skb->len < ETH_HLEN) {
netdev_warn(ndev, "Dropping short frame");
goto tx_drop;
}
cons_ptr = READ_ONCE(priv->txq_cons);
index = priv->txq_prod & priv->txq_mask;
tx_desc = (struct cndm_proto_desc *)(priv->txq_region + index*16);
tx_info = &priv->tx_info[index];
len = skb_headlen(skb);
dma_addr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
if (unlikely(dma_mapping_error(dev, dma_addr))) {
netdev_err(ndev, "Mapping failed");
goto tx_drop;
}
tx_desc->len = cpu_to_le32(len);
tx_desc->addr = cpu_to_le64(dma_addr);
tx_info->skb = skb;
tx_info->len = len;
tx_info->dma_addr = dma_addr;
netdev_dbg(ndev, "Write desc index %d len %d", index, len);
priv->txq_prod++;
if (priv->txq_prod - priv->txq_cons >= 128) {
netdev_dbg(ndev, "TX ring full");
netif_tx_stop_queue(priv->tx_queue);
}
dma_wmb();
iowrite32(priv->txq_prod & 0xffff, priv->hw_addr + 0x104);
return NETDEV_TX_OK;
tx_drop:
dev_kfree_skb_any(skb);
return NETDEV_TX_OK;
}

View File

@@ -0,0 +1,14 @@
cndm_proto_core.sv
cndm_proto_port.sv
cndm_proto_rx.sv
cndm_proto_tx.sv
cndm_proto_desc_rd.sv
cndm_proto_cpl_wr.sv
../lib/taxi/src/dma/rtl/taxi_dma_client_axis_source.sv
../lib/taxi/src/dma/rtl/taxi_dma_client_axis_sink.sv
../lib/taxi/src/dma/rtl/taxi_dma_if_mux.f
../lib/taxi/src/dma/rtl/taxi_dma_psdpram.sv
../lib/taxi/src/axi/rtl/taxi_axil_interconnect_1s.f
../lib/taxi/src/axis/rtl/taxi_axis_async_fifo.f
../lib/taxi/src/axis/rtl/taxi_axis_arb_mux.f
../lib/taxi/src/axis/rtl/taxi_axis_demux.sv

View File

@@ -0,0 +1,299 @@
// SPDX-License-Identifier: CERN-OHL-S-2.0
/*
Copyright (c) 2025 FPGA Ninja, LLC
Authors:
- Alex Forencich
*/
`resetall
`timescale 1ns / 1ps
`default_nettype none
/*
* Corundum-proto core logic
*/
module cndm_proto_core #(
parameter PORTS = 2
)
(
input wire logic clk,
input wire logic rst,
/*
* Control register interface
*/
taxi_axil_if.wr_slv s_axil_wr,
taxi_axil_if.rd_slv s_axil_rd,
/*
* DMA
*/
taxi_dma_desc_if.req_src dma_rd_desc_req,
taxi_dma_desc_if.sts_snk dma_rd_desc_sts,
taxi_dma_desc_if.req_src dma_wr_desc_req,
taxi_dma_desc_if.sts_snk dma_wr_desc_sts,
taxi_dma_ram_if.wr_slv dma_ram_wr,
taxi_dma_ram_if.rd_slv dma_ram_rd,
output wire logic [PORTS-1:0] irq,
/*
* Ethernet
*/
input wire logic mac_tx_clk[PORTS],
input wire logic mac_tx_rst[PORTS],
taxi_axis_if.src mac_axis_tx[PORTS],
taxi_axis_if.snk mac_axis_tx_cpl[PORTS],
input wire logic mac_rx_clk[PORTS],
input wire logic mac_rx_rst[PORTS],
taxi_axis_if.snk mac_axis_rx[PORTS]
);
localparam CL_PORTS = $clog2(PORTS);
localparam AXIL_ADDR_W = s_axil_wr.ADDR_W;
localparam AXIL_DATA_W = s_axil_wr.DATA_W;
localparam RAM_SEGS = dma_ram_wr.SEGS;
localparam RAM_SEG_ADDR_W = dma_ram_wr.SEG_ADDR_W;
localparam RAM_SEG_DATA_W = dma_ram_wr.SEG_DATA_W;
localparam RAM_SEG_BE_W = dma_ram_wr.SEG_BE_W;
localparam RAM_SEL_W = dma_ram_wr.SEL_W;
localparam PORT_OFFSET = 1;
taxi_axil_if #(
.DATA_W(s_axil_wr.DATA_W),
.ADDR_W(16),
.STRB_W(s_axil_wr.STRB_W),
.AWUSER_EN(s_axil_wr.AWUSER_EN),
.AWUSER_W(s_axil_wr.AWUSER_W),
.WUSER_EN(s_axil_wr.WUSER_EN),
.WUSER_W(s_axil_wr.WUSER_W),
.BUSER_EN(s_axil_wr.BUSER_EN),
.BUSER_W(s_axil_wr.BUSER_W),
.ARUSER_EN(s_axil_wr.ARUSER_EN),
.ARUSER_W(s_axil_wr.ARUSER_W),
.RUSER_EN(s_axil_wr.RUSER_EN),
.RUSER_W(s_axil_wr.RUSER_W)
)
s_axil_ctrl[PORTS+PORT_OFFSET]();
taxi_axil_interconnect_1s #(
.M_COUNT($size(s_axil_ctrl)),
.ADDR_W(s_axil_wr.ADDR_W),
.M_REGIONS(1),
.M_BASE_ADDR('0),
.M_ADDR_W({$size(s_axil_ctrl){{1{32'd16}}}}),
.M_SECURE({$size(s_axil_ctrl){1'b0}})
)
port_intercon_inst (
.clk(clk),
.rst(rst),
/*
* AXI4-lite slave interface
*/
.s_axil_wr(s_axil_wr),
.s_axil_rd(s_axil_rd),
/*
* AXI4-lite master interfaces
*/
.m_axil_wr(s_axil_ctrl),
.m_axil_rd(s_axil_ctrl)
);
logic s_axil_awready_reg = 1'b0;
logic s_axil_wready_reg = 1'b0;
logic s_axil_bvalid_reg = 1'b0;
logic s_axil_arready_reg = 1'b0;
logic [AXIL_DATA_W-1:0] s_axil_rdata_reg = '0;
logic s_axil_rvalid_reg = 1'b0;
assign s_axil_ctrl[0].awready = s_axil_awready_reg;
assign s_axil_ctrl[0].wready = s_axil_wready_reg;
assign s_axil_ctrl[0].bresp = '0;
assign s_axil_ctrl[0].buser = '0;
assign s_axil_ctrl[0].bvalid = s_axil_bvalid_reg;
assign s_axil_ctrl[0].arready = s_axil_arready_reg;
assign s_axil_ctrl[0].rdata = s_axil_rdata_reg;
assign s_axil_ctrl[0].rresp = '0;
assign s_axil_ctrl[0].ruser = '0;
assign s_axil_ctrl[0].rvalid = s_axil_rvalid_reg;
always_ff @(posedge clk) begin
s_axil_awready_reg <= 1'b0;
s_axil_wready_reg <= 1'b0;
s_axil_bvalid_reg <= s_axil_bvalid_reg && !s_axil_ctrl[0].bready;
s_axil_arready_reg <= 1'b0;
s_axil_rvalid_reg <= s_axil_rvalid_reg && !s_axil_ctrl[0].rready;
if (s_axil_ctrl[0].awvalid && s_axil_ctrl[0].wvalid && !s_axil_bvalid_reg) begin
s_axil_awready_reg <= 1'b1;
s_axil_wready_reg <= 1'b1;
s_axil_bvalid_reg <= 1'b1;
case ({s_axil_ctrl[0].awaddr[15:2], 2'b00})
// 16'h0100: reg <= s_axil_ctrl[0].wdata;
default: begin end
endcase
end
if (s_axil_ctrl[0].arvalid && !s_axil_rvalid_reg) begin
s_axil_rdata_reg <= '0;
s_axil_arready_reg <= 1'b1;
s_axil_rvalid_reg <= 1'b1;
case ({s_axil_ctrl[0].araddr[15:2], 2'b00})
16'h0100: s_axil_rdata_reg <= PORTS; // port count
16'h0104: s_axil_rdata_reg <= 32'h00010000; // port offset
16'h0108: s_axil_rdata_reg <= 32'h00010000; // port stride
default: begin end
endcase
end
if (rst) begin
s_axil_awready_reg <= 1'b0;
s_axil_wready_reg <= 1'b0;
s_axil_bvalid_reg <= 1'b0;
s_axil_arready_reg <= 1'b0;
s_axil_rvalid_reg <= 1'b0;
end
end
taxi_dma_desc_if #(
.SRC_ADDR_W(dma_rd_desc_req.SRC_ADDR_W),
.SRC_SEL_EN(dma_rd_desc_req.SRC_SEL_EN),
.SRC_SEL_W(dma_rd_desc_req.SRC_SEL_W),
.SRC_ASID_EN(dma_rd_desc_req.SRC_ASID_EN),
.DST_ADDR_W(dma_rd_desc_req.DST_ADDR_W),
.DST_SEL_EN(dma_rd_desc_req.DST_SEL_EN),
.DST_SEL_W(dma_rd_desc_req.DST_SEL_W-CL_PORTS),
.DST_ASID_EN(dma_rd_desc_req.DST_ASID_EN),
.IMM_EN(dma_rd_desc_req.IMM_EN),
.LEN_W(dma_rd_desc_req.LEN_W),
.TAG_W(dma_rd_desc_req.TAG_W-CL_PORTS),
.ID_EN(dma_rd_desc_req.ID_EN),
.DEST_EN(dma_rd_desc_req.DEST_EN),
.USER_EN(dma_rd_desc_req.USER_EN)
) dma_rd_desc_int[PORTS]();
taxi_dma_desc_if #(
.SRC_ADDR_W(dma_wr_desc_req.SRC_ADDR_W),
.SRC_SEL_EN(dma_wr_desc_req.SRC_SEL_EN),
.SRC_SEL_W(dma_wr_desc_req.SRC_SEL_W-CL_PORTS),
.SRC_ASID_EN(dma_wr_desc_req.SRC_ASID_EN),
.DST_ADDR_W(dma_wr_desc_req.DST_ADDR_W),
.DST_SEL_EN(dma_wr_desc_req.DST_SEL_EN),
.DST_SEL_W(dma_wr_desc_req.DST_SEL_W),
.DST_ASID_EN(dma_wr_desc_req.DST_ASID_EN),
.IMM_EN(dma_wr_desc_req.IMM_EN),
.IMM_W(dma_wr_desc_req.IMM_W),
.LEN_W(dma_wr_desc_req.LEN_W),
.TAG_W(dma_wr_desc_req.TAG_W-CL_PORTS),
.ID_EN(dma_wr_desc_req.ID_EN),
.DEST_EN(dma_wr_desc_req.DEST_EN),
.USER_EN(dma_wr_desc_req.USER_EN)
) dma_wr_desc_int[PORTS]();
taxi_dma_ram_if #(
.SEGS(RAM_SEGS),
.SEG_ADDR_W(RAM_SEG_ADDR_W),
.SEG_DATA_W(RAM_SEG_DATA_W),
.SEG_BE_W(RAM_SEG_BE_W),
.SEL_W(RAM_SEL_W-CL_PORTS)
) dma_ram_int[PORTS]();
taxi_dma_if_mux #(
.PORTS(PORTS),
.ARB_ROUND_ROBIN(1),
.ARB_LSB_HIGH_PRIO(1)
)
dma_mux_inst (
.clk(clk),
.rst(rst),
/*
* DMA descriptors from clients
*/
.client_rd_req(dma_rd_desc_int),
.client_rd_sts(dma_rd_desc_int),
.client_wr_req(dma_wr_desc_int),
.client_wr_sts(dma_wr_desc_int),
/*
* DMA descriptors to DMA engines
*/
.dma_rd_req(dma_rd_desc_req),
.dma_rd_sts(dma_rd_desc_sts),
.dma_wr_req(dma_wr_desc_req),
.dma_wr_sts(dma_wr_desc_sts),
/*
* RAM interface (from DMA interface)
*/
.dma_ram_wr(dma_ram_wr),
.dma_ram_rd(dma_ram_rd),
/*
* RAM interface (towards RAM)
*/
.client_ram_wr(dma_ram_int),
.client_ram_rd(dma_ram_int)
);
for (genvar p = 0; p < PORTS; p = p + 1) begin : port
cndm_proto_port #(
.PORTS(PORTS)
)
port_inst (
.clk(clk),
.rst(rst),
/*
* Control register interface
*/
.s_axil_wr(s_axil_ctrl[PORT_OFFSET+p]),
.s_axil_rd(s_axil_ctrl[PORT_OFFSET+p]),
/*
* DMA
*/
.dma_rd_desc_req(dma_rd_desc_int[p]),
.dma_rd_desc_sts(dma_rd_desc_int[p]),
.dma_wr_desc_req(dma_wr_desc_int[p]),
.dma_wr_desc_sts(dma_wr_desc_int[p]),
.dma_ram_wr(dma_ram_int[p]),
.dma_ram_rd(dma_ram_int[p]),
.irq(irq[p]),
/*
* Ethernet
*/
.mac_tx_clk(mac_tx_clk[p]),
.mac_tx_rst(mac_tx_rst[p]),
.mac_axis_tx(mac_axis_tx[p]),
.mac_axis_tx_cpl(mac_axis_tx_cpl[p]),
.mac_rx_clk(mac_rx_clk[p]),
.mac_rx_rst(mac_rx_rst[p]),
.mac_axis_rx(mac_axis_rx[p])
);
end
endmodule
`resetall

View File

@@ -0,0 +1,227 @@
// SPDX-License-Identifier: CERN-OHL-S-2.0
/*
Copyright (c) 2025 FPGA Ninja, LLC
Authors:
- Alex Forencich
*/
`resetall
`timescale 1ns / 1ps
`default_nettype none
/*
* Corundum-proto completion write module
*/
module cndm_proto_cpl_wr
(
input wire logic clk,
input wire logic rst,
/*
* DMA
*/
taxi_dma_desc_if.req_src dma_wr_desc_req,
taxi_dma_desc_if.sts_snk dma_wr_desc_sts,
taxi_dma_ram_if.rd_slv dma_ram_rd,
input wire logic txcq_en,
input wire logic [3:0] txcq_size,
input wire logic [63:0] txcq_base_addr,
output wire logic [15:0] txcq_prod,
input wire logic rxcq_en,
input wire logic [3:0] rxcq_size,
input wire logic [63:0] rxcq_base_addr,
output wire logic [15:0] rxcq_prod,
taxi_axis_if.snk axis_cpl[2],
output wire logic irq
);
taxi_axis_if #(
.DATA_W(axis_cpl[0].DATA_W),
.KEEP_EN(axis_cpl[0].KEEP_EN),
.KEEP_W(axis_cpl[0].KEEP_W),
.STRB_EN(axis_cpl[0].STRB_EN),
.LAST_EN(axis_cpl[0].LAST_EN),
.ID_EN(1),
.ID_W(1),
.DEST_EN(axis_cpl[0].DEST_EN),
.DEST_W(axis_cpl[0].DEST_W),
.USER_EN(axis_cpl[0].USER_EN),
.USER_W(axis_cpl[0].USER_W)
) cpl_comb();
localparam [2:0]
STATE_IDLE = 0,
STATE_RX_CPL = 1,
STATE_WRITE_DATA = 2;
logic [2:0] state_reg = STATE_IDLE;
logic [15:0] txcq_prod_ptr_reg = '0;
logic [15:0] rxcq_prod_ptr_reg = '0;
logic phase_tag_reg = 1'b0;
logic irq_reg = 1'b0;
assign txcq_prod = txcq_prod_ptr_reg;
assign rxcq_prod = rxcq_prod_ptr_reg;
assign irq = irq_reg;
always_ff @(posedge clk) begin
cpl_comb.tready <= 1'b0;
dma_wr_desc_req.req_src_sel <= '0;
dma_wr_desc_req.req_src_asid <= '0;
dma_wr_desc_req.req_dst_sel <= '0;
dma_wr_desc_req.req_dst_asid <= '0;
dma_wr_desc_req.req_imm <= '0;
dma_wr_desc_req.req_imm_en <= '0;
dma_wr_desc_req.req_len <= 16;
dma_wr_desc_req.req_tag <= '0;
dma_wr_desc_req.req_id <= '0;
dma_wr_desc_req.req_dest <= '0;
dma_wr_desc_req.req_user <= '0;
dma_wr_desc_req.req_valid <= dma_wr_desc_req.req_valid && !dma_wr_desc_req.req_ready;
if (!txcq_en) begin
txcq_prod_ptr_reg <= '0;
end
if (!rxcq_en) begin
rxcq_prod_ptr_reg <= '0;
end
irq_reg <= 1'b0;
case (state_reg)
STATE_IDLE: begin
dma_wr_desc_req.req_src_addr <= '0;
if (cpl_comb.tid == 0) begin
dma_wr_desc_req.req_dst_addr <= txcq_base_addr + 64'(16'(txcq_prod_ptr_reg & ({16{1'b1}} >> (16 - txcq_size))) * 16);
phase_tag_reg <= !txcq_prod_ptr_reg[txcq_size];
if (cpl_comb.tvalid && !cpl_comb.tready) begin
txcq_prod_ptr_reg <= txcq_prod_ptr_reg + 1;
if (txcq_en) begin
dma_wr_desc_req.req_valid <= 1'b1;
state_reg <= STATE_WRITE_DATA;
end else begin
state_reg <= STATE_IDLE;
end
end
end else begin
dma_wr_desc_req.req_dst_addr <= rxcq_base_addr + 64'(16'(rxcq_prod_ptr_reg & ({16{1'b1}} >> (16 - rxcq_size))) * 16);
phase_tag_reg <= !rxcq_prod_ptr_reg[rxcq_size];
if (cpl_comb.tvalid && !cpl_comb.tready) begin
rxcq_prod_ptr_reg <= rxcq_prod_ptr_reg + 1;
if (rxcq_en) begin
dma_wr_desc_req.req_valid <= 1'b1;
state_reg <= STATE_WRITE_DATA;
end else begin
state_reg <= STATE_IDLE;
end
end
end
end
STATE_WRITE_DATA: begin
if (dma_wr_desc_sts.sts_valid) begin
cpl_comb.tready <= 1'b1;
irq_reg <= 1'b1;
state_reg <= STATE_IDLE;
end
end
default: begin
state_reg <= STATE_IDLE;
end
endcase
if (rst) begin
state_reg <= STATE_IDLE;
txcq_prod_ptr_reg <= '0;
rxcq_prod_ptr_reg <= '0;
irq_reg <= 1'b0;
end
end
taxi_axis_arb_mux #(
.S_COUNT(2),
.UPDATE_TID(1),
.ARB_ROUND_ROBIN(1),
.ARB_LSB_HIGH_PRIO(1)
)
mux_inst (
.clk(clk),
.rst(rst),
/*
* AXI4-Stream input (sink)
*/
.s_axis(axis_cpl),
/*
* AXI4-Stream output (source)
*/
.m_axis(cpl_comb)
);
// extract parameters
localparam SEGS = dma_ram_rd.SEGS;
localparam SEG_ADDR_W = dma_ram_rd.SEG_ADDR_W;
localparam SEG_DATA_W = dma_ram_rd.SEG_DATA_W;
localparam SEG_BE_W = dma_ram_rd.SEG_BE_W;
if (SEGS*SEG_DATA_W < 128)
$fatal(0, "Total segmented interface width must be at least 128 (instance %m)");
wire [SEGS-1:0][SEG_DATA_W-1:0] ram_data = (SEG_DATA_W*SEGS)'({phase_tag_reg, cpl_comb.tdata[126:0]});
for (genvar n = 0; n < SEGS; n = n + 1) begin
logic [0:0] rd_resp_valid_pipe_reg = '0;
logic [SEG_DATA_W-1:0] rd_resp_data_pipe_reg[1];
initial begin
for (integer i = 0; i < 1; i = i + 1) begin
rd_resp_data_pipe_reg[i] = '0;
end
end
always_ff @(posedge clk) begin
if (dma_ram_rd.rd_resp_ready[n]) begin
rd_resp_valid_pipe_reg[0] <= 1'b0;
end
for (integer j = 0; j > 0; j = j - 1) begin
if (dma_ram_rd.rd_resp_ready[n] || (1'(~rd_resp_valid_pipe_reg) >> j) != 0) begin
rd_resp_valid_pipe_reg[j] <= rd_resp_valid_pipe_reg[j-1];
rd_resp_data_pipe_reg[j] <= rd_resp_data_pipe_reg[j-1];
rd_resp_valid_pipe_reg[j-1] <= 1'b0;
end
end
if (dma_ram_rd.rd_cmd_valid[n] && dma_ram_rd.rd_cmd_ready[n]) begin
rd_resp_valid_pipe_reg[0] <= 1'b1;
rd_resp_data_pipe_reg[0] <= ram_data[0];
end
if (rst) begin
rd_resp_valid_pipe_reg <= '0;
end
end
assign dma_ram_rd.rd_cmd_ready[n] = dma_ram_rd.rd_resp_ready[n] || &rd_resp_valid_pipe_reg == 0;
assign dma_ram_rd.rd_resp_valid[n] = rd_resp_valid_pipe_reg[0];
assign dma_ram_rd.rd_resp_data[n] = rd_resp_data_pipe_reg[0];
end
endmodule
`resetall

View File

@@ -0,0 +1,266 @@
// SPDX-License-Identifier: CERN-OHL-S-2.0
/*
Copyright (c) 2025 FPGA Ninja, LLC
Authors:
- Alex Forencich
*/
`resetall
`timescale 1ns / 1ps
`default_nettype none
/*
* Corundum-proto descriptor read module
*/
module cndm_proto_desc_rd
(
input wire logic clk,
input wire logic rst,
/*
* DMA
*/
taxi_dma_desc_if.req_src dma_rd_desc_req,
taxi_dma_desc_if.sts_snk dma_rd_desc_sts,
taxi_dma_ram_if.wr_slv dma_ram_wr,
input wire logic txq_en,
input wire logic [3:0] txq_size,
input wire logic [63:0] txq_base_addr,
input wire logic [15:0] txq_prod,
output wire logic [15:0] txq_cons,
input wire logic rxq_en,
input wire logic [3:0] rxq_size,
input wire logic [63:0] rxq_base_addr,
input wire logic [15:0] rxq_prod,
output wire logic [15:0] rxq_cons,
input wire logic [1:0] desc_req,
taxi_axis_if.src axis_desc[2]
);
localparam RAM_ADDR_W = 16;
taxi_dma_desc_if #(
.SRC_ADDR_W(RAM_ADDR_W),
.SRC_SEL_EN(1'b0),
.SRC_ASID_EN(1'b0),
.DST_ADDR_W(RAM_ADDR_W),
.DST_SEL_EN(1'b0),
.DST_ASID_EN(1'b0),
.IMM_EN(1'b0),
.LEN_W(5),
.TAG_W(1),
.ID_EN(0),
.DEST_EN(1),
.DEST_W(1),
.USER_EN(1),
.USER_W(1)
) dma_desc();
localparam [2:0]
STATE_IDLE = 0,
STATE_READ_DESC = 1,
STATE_READ_DATA = 2,
STATE_TX_DESC = 3;
logic [2:0] state_reg = STATE_IDLE;
logic [1:0] desc_req_reg = '0;
logic [15:0] txq_cons_ptr_reg = '0;
logic [15:0] rxq_cons_ptr_reg = '0;
assign txq_cons = txq_cons_ptr_reg;
assign rxq_cons = rxq_cons_ptr_reg;
always_ff @(posedge clk) begin
// axis_desc.tready <= 1'b0;
dma_rd_desc_req.req_src_sel <= '0;
dma_rd_desc_req.req_src_asid <= '0;
dma_rd_desc_req.req_dst_sel <= '0;
dma_rd_desc_req.req_dst_asid <= '0;
dma_rd_desc_req.req_imm <= '0;
dma_rd_desc_req.req_imm_en <= '0;
dma_rd_desc_req.req_len <= 16;
dma_rd_desc_req.req_tag <= '0;
dma_rd_desc_req.req_id <= '0;
dma_rd_desc_req.req_dest <= '0;
dma_rd_desc_req.req_user <= '0;
dma_rd_desc_req.req_valid <= dma_rd_desc_req.req_valid && !dma_rd_desc_req.req_ready;
dma_desc.req_src_sel <= '0;
dma_desc.req_src_asid <= '0;
dma_desc.req_dst_addr <= '0;
dma_desc.req_dst_sel <= '0;
dma_desc.req_dst_asid <= '0;
dma_desc.req_imm <= '0;
dma_desc.req_imm_en <= '0;
dma_desc.req_len <= 16;
dma_desc.req_tag <= '0;
dma_desc.req_id <= '0;
dma_desc.req_user <= '0;
dma_desc.req_valid <= dma_desc.req_valid && !dma_desc.req_ready;
desc_req_reg <= desc_req_reg | desc_req;
if (!txq_en) begin
txq_cons_ptr_reg <= '0;
end
if (!rxq_en) begin
rxq_cons_ptr_reg <= '0;
end
case (state_reg)
STATE_IDLE: begin
if (desc_req_reg[1]) begin
dma_rd_desc_req.req_src_addr <= rxq_base_addr + 64'(16'(rxq_cons_ptr_reg & ({16{1'b1}} >> (16 - rxq_size))) * 16);
dma_desc.req_dest <= 1'b1;
desc_req_reg[1] <= 1'b0;
if (rxq_cons_ptr_reg == rxq_prod || !rxq_en) begin
dma_desc.req_user <= 1'b1;
dma_desc.req_valid <= 1'b1;
state_reg <= STATE_TX_DESC;
end else begin
dma_desc.req_user <= 1'b0;
dma_rd_desc_req.req_valid <= 1'b1;
rxq_cons_ptr_reg <= rxq_cons_ptr_reg + 1;
state_reg <= STATE_READ_DESC;
end
end else if (desc_req_reg[0]) begin
dma_rd_desc_req.req_src_addr <= txq_base_addr + 64'(16'(txq_cons_ptr_reg & ({16{1'b1}} >> (16 - txq_size))) * 16);
dma_desc.req_dest <= 1'b0;
desc_req_reg[0] <= 1'b0;
if (txq_cons_ptr_reg == txq_prod || !txq_en) begin
dma_desc.req_user <= 1'b1;
dma_desc.req_valid <= 1'b1;
state_reg <= STATE_TX_DESC;
end else begin
dma_desc.req_user <= 1'b0;
dma_rd_desc_req.req_valid <= 1'b1;
txq_cons_ptr_reg <= txq_cons_ptr_reg + 1;
state_reg <= STATE_READ_DESC;
end
end
end
STATE_READ_DESC: begin
if (dma_rd_desc_sts.sts_valid) begin
dma_desc.req_valid <= 1'b1;
state_reg <= STATE_TX_DESC;
end
end
STATE_TX_DESC: begin
if (dma_desc.sts_valid) begin
state_reg <= STATE_IDLE;
end
end
default: begin
state_reg <= STATE_IDLE;
end
endcase
if (rst) begin
state_reg <= STATE_IDLE;
end
end
taxi_dma_ram_if #(
.SEGS(dma_ram_wr.SEGS),
.SEG_ADDR_W(dma_ram_wr.SEG_ADDR_W),
.SEG_DATA_W(dma_ram_wr.SEG_DATA_W),
.SEG_BE_W(dma_ram_wr.SEG_BE_W)
) dma_ram_rd();
taxi_dma_psdpram #(
.SIZE(1024),
.PIPELINE(2)
)
ram_inst (
.clk(clk),
.rst(rst),
/*
* Write port
*/
.dma_ram_wr(dma_ram_wr),
/*
* Read port
*/
.dma_ram_rd(dma_ram_rd)
);
taxi_axis_if #(
.DATA_W(axis_desc[0].DATA_W),
.KEEP_EN(axis_desc[0].KEEP_EN),
.KEEP_W(axis_desc[0].KEEP_W),
.LAST_EN(axis_desc[0].LAST_EN),
.ID_EN(axis_desc[0].ID_EN),
.ID_W(axis_desc[0].ID_W),
.DEST_EN(1),
.DEST_W(1),
.USER_EN(axis_desc[0].USER_EN),
.USER_W(axis_desc[0].USER_W)
) m_axis_rd_data();
taxi_dma_client_axis_source
dma_inst (
.clk(clk),
.rst(rst),
/*
* DMA descriptor
*/
.desc_req(dma_desc),
.desc_sts(dma_desc),
/*
* AXI stream read data output
*/
.m_axis_rd_data(m_axis_rd_data),
/*
* RAM interface
*/
.dma_ram_rd(dma_ram_rd),
/*
* Configuration
*/
.enable(1'b1)
);
taxi_axis_demux #(
.M_COUNT(2),
.TDEST_ROUTE(1)
)
demux_inst (
.clk(clk),
.rst(rst),
/*
* AXI4-Stream input (sink)
*/
.s_axis(m_axis_rd_data),
/*
* AXI4-Stream output (source)
*/
.m_axis(axis_desc),
/*
* Control
*/
.enable(1'b1),
.drop(1'b0),
.select('0)
);
endmodule
`resetall

View File

@@ -0,0 +1,5 @@
cndm_proto_pcie_us.sv
cndm_proto_core.f
../lib/taxi/src/pcie/rtl/taxi_pcie_us_axil_master.sv
../lib/taxi/src/pcie/rtl/taxi_pcie_us_msi.sv
../lib/taxi/src/dma/rtl/taxi_dma_if_pcie_us.f

View File

@@ -0,0 +1,465 @@
// SPDX-License-Identifier: CERN-OHL-S-2.0
/*
Copyright (c) 2025 FPGA Ninja, LLC
Authors:
- Alex Forencich
*/
`resetall
`timescale 1ns / 1ps
`default_nettype none
/*
* Corundum-proto core logic for UltraScale PCIe
*/
module cndm_proto_pcie_us #(
// simulation (set to avoid vendor primitives)
parameter logic SIM = 1'b0,
// vendor ("GENERIC", "XILINX", "ALTERA")
parameter string VENDOR = "XILINX",
// device family
parameter string FAMILY = "kintexuplus",
parameter PORTS = 2,
parameter RQ_SEQ_NUM_W = 6,
parameter BAR0_APERTURE = 24
)
(
/*
* PCIe
*/
input wire logic pcie_clk,
input wire logic pcie_rst,
taxi_axis_if.snk s_axis_pcie_cq,
taxi_axis_if.src m_axis_pcie_cc,
taxi_axis_if.src m_axis_pcie_rq,
taxi_axis_if.snk s_axis_pcie_rc,
input wire [RQ_SEQ_NUM_W-1:0] pcie_rq_seq_num0,
input wire pcie_rq_seq_num_vld0,
input wire [RQ_SEQ_NUM_W-1:0] pcie_rq_seq_num1,
input wire pcie_rq_seq_num_vld1,
input wire [2:0] cfg_max_payload,
input wire [2:0] cfg_max_read_req,
input wire [3:0] cfg_rcb_status,
output wire [9:0] cfg_mgmt_addr,
output wire [7:0] cfg_mgmt_function_number,
output wire cfg_mgmt_write,
output wire [31:0] cfg_mgmt_write_data,
output wire [3:0] cfg_mgmt_byte_enable,
output wire cfg_mgmt_read,
input wire [31:0] cfg_mgmt_read_data,
input wire cfg_mgmt_read_write_done,
input wire [7:0] cfg_fc_ph,
input wire [11:0] cfg_fc_pd,
input wire [7:0] cfg_fc_nph,
input wire [11:0] cfg_fc_npd,
input wire [7:0] cfg_fc_cplh,
input wire [11:0] cfg_fc_cpld,
output wire [2:0] cfg_fc_sel,
input wire [3:0] cfg_interrupt_msi_enable,
input wire [11:0] cfg_interrupt_msi_mmenable,
input wire cfg_interrupt_msi_mask_update,
input wire [31:0] cfg_interrupt_msi_data,
output wire [1:0] cfg_interrupt_msi_select,
output wire [31:0] cfg_interrupt_msi_int,
output wire [31:0] cfg_interrupt_msi_pending_status,
output wire cfg_interrupt_msi_pending_status_data_enable,
output wire [1:0] cfg_interrupt_msi_pending_status_function_num,
input wire cfg_interrupt_msi_sent,
input wire cfg_interrupt_msi_fail,
output wire [2:0] cfg_interrupt_msi_attr,
output wire cfg_interrupt_msi_tph_present,
output wire [1:0] cfg_interrupt_msi_tph_type,
output wire [7:0] cfg_interrupt_msi_tph_st_tag,
output wire [7:0] cfg_interrupt_msi_function_number,
/*
* Ethernet
*/
input wire logic mac_tx_clk[PORTS],
input wire logic mac_tx_rst[PORTS],
taxi_axis_if.src mac_axis_tx[PORTS],
taxi_axis_if.snk mac_axis_tx_cpl[PORTS],
input wire logic mac_rx_clk[PORTS],
input wire logic mac_rx_rst[PORTS],
taxi_axis_if.snk mac_axis_rx[PORTS]
);
localparam CL_PORTS = $clog2(PORTS);
localparam AXIL_DATA_W = 32;
localparam AXIL_ADDR_W = BAR0_APERTURE;
taxi_axil_if #(
.DATA_W(AXIL_DATA_W),
.ADDR_W(AXIL_ADDR_W),
.AWUSER_EN(1'b0),
.WUSER_EN(1'b0),
.BUSER_EN(1'b0),
.ARUSER_EN(1'b0),
.RUSER_EN(1'b0)
) axil_ctrl_bar();
taxi_pcie_us_axil_master
pcie_axil_master_inst (
.clk(pcie_clk),
.rst(pcie_rst),
/*
* UltraScale PCIe interface
*/
.s_axis_cq(s_axis_pcie_cq),
.m_axis_cc(m_axis_pcie_cc),
/*
* AXI Lite Master output
*/
.m_axil_wr(axil_ctrl_bar),
.m_axil_rd(axil_ctrl_bar),
/*
* Configuration
*/
.completer_id('0),
.completer_id_en(1'b0),
/*
* Status
*/
.stat_err_cor(),
.stat_err_uncor()
);
localparam AXIS_PCIE_DATA_W = m_axis_pcie_rq.DATA_W;
localparam PCIE_ADDR_W = 64;
// TODO
localparam logic RQ_SEQ_NUM_EN = 1'b1;
localparam RAM_SEL_W = 2+CL_PORTS;
localparam RAM_ADDR_W = 16;
localparam RAM_SEGS = 2;//AXIS_PCIE_DATA_W > 256 ? AXIS_PCIE_DATA_W / 128 : 2;
localparam PCIE_TAG_CNT = 64;//AXIS_PCIE_RQ_USER_W == 60 ? 64 : 256,
localparam logic IMM_EN = 1'b0;
localparam IMM_W = 32;
localparam LEN_W = 20;
localparam TAG_W = 8;
localparam RD_OP_TBL_SIZE = PCIE_TAG_CNT;
localparam RD_TX_LIMIT = 2**(RQ_SEQ_NUM_W-1);
localparam logic RD_TX_FC_EN = 1'b1;
localparam RD_CPLH_FC_LIMIT = 512;
localparam RD_CPLD_FC_LIMIT = RD_CPLH_FC_LIMIT*4;
localparam WR_OP_TBL_SIZE = 2**(RQ_SEQ_NUM_W-1);
localparam WR_TX_LIMIT = 2**(RQ_SEQ_NUM_W-1);
localparam logic WR_TX_FC_EN = 1'b1;
localparam RAM_DATA_W = AXIS_PCIE_DATA_W*2;
localparam RAM_SEG_DATA_W = RAM_DATA_W / RAM_SEGS;
localparam RAM_SEG_BE_W = RAM_SEG_DATA_W / 8;
localparam RAM_SEG_ADDR_W = RAM_ADDR_W - $clog2(RAM_SEGS*RAM_SEG_BE_W);
logic [RQ_SEQ_NUM_W-1:0] s_axis_rq_seq_num_0;
logic s_axis_rq_seq_num_valid_0;
logic [RQ_SEQ_NUM_W-1:0] s_axis_rq_seq_num_1;
logic s_axis_rq_seq_num_valid_1;
logic [7:0] pcie_tx_fc_nph_av;
logic [7:0] pcie_tx_fc_ph_av;
logic [11:0] pcie_tx_fc_pd_av;
assign cfg_fc_sel = 3'b100;
taxi_dma_desc_if #(
.SRC_ADDR_W(PCIE_ADDR_W),
.SRC_SEL_EN(1'b0),
.SRC_ASID_EN(1'b0),
.DST_ADDR_W(RAM_ADDR_W),
.DST_SEL_EN(1'b1),
.DST_SEL_W(RAM_SEL_W),
.DST_ASID_EN(1'b0),
.IMM_EN(1'b0),
.LEN_W(LEN_W),
.TAG_W(TAG_W),
.ID_EN(1'b0),
.DEST_EN(1'b0),
.USER_EN(1'b0)
) dma_rd_desc();
taxi_dma_desc_if #(
.SRC_ADDR_W(RAM_ADDR_W),
.SRC_SEL_EN(1'b1),
.SRC_SEL_W(RAM_SEL_W),
.SRC_ASID_EN(1'b0),
.DST_ADDR_W(PCIE_ADDR_W),
.DST_SEL_EN(1'b0),
.DST_ASID_EN(1'b0),
.IMM_EN(IMM_EN),
.IMM_W(IMM_W),
.LEN_W(LEN_W),
.TAG_W(TAG_W),
.ID_EN(1'b0),
.DEST_EN(1'b0),
.USER_EN(1'b0)
) dma_wr_desc();
taxi_dma_ram_if #(
.SEGS(RAM_SEGS),
.SEG_ADDR_W(RAM_SEG_ADDR_W),
.SEG_DATA_W(RAM_SEG_DATA_W),
.SEG_BE_W(RAM_SEG_BE_W),
.SEL_W(RAM_SEL_W)
) dma_ram();
logic stat_rd_busy;
logic stat_wr_busy;
logic stat_err_cor;
logic stat_err_uncor;
logic [$clog2(RD_OP_TBL_SIZE)-1:0] stat_rd_op_start_tag;
logic stat_rd_op_start_valid;
logic [$clog2(RD_OP_TBL_SIZE)-1:0] stat_rd_op_finish_tag;
logic [3:0] stat_rd_op_finish_status;
logic stat_rd_op_finish_valid;
logic [$clog2(PCIE_TAG_CNT)-1:0] stat_rd_req_start_tag;
logic [12:0] stat_rd_req_start_len;
logic stat_rd_req_start_valid;
logic [$clog2(PCIE_TAG_CNT)-1:0] stat_rd_req_finish_tag;
logic [3:0] stat_rd_req_finish_status;
logic stat_rd_req_finish_valid;
logic stat_rd_req_timeout;
logic stat_rd_op_tbl_full;
logic stat_rd_no_tags;
logic stat_rd_tx_limit;
logic stat_rd_tx_stall;
logic [$clog2(WR_OP_TBL_SIZE)-1:0] stat_wr_op_start_tag;
logic stat_wr_op_start_valid;
logic [$clog2(WR_OP_TBL_SIZE)-1:0] stat_wr_op_finish_tag;
logic [3:0] stat_wr_op_finish_status;
logic stat_wr_op_finish_valid;
logic [$clog2(WR_OP_TBL_SIZE)-1:0] stat_wr_req_start_tag;
logic [12:0] stat_wr_req_start_len;
logic stat_wr_req_start_valid;
logic [$clog2(WR_OP_TBL_SIZE)-1:0] stat_wr_req_finish_tag;
logic [3:0] stat_wr_req_finish_status;
logic stat_wr_req_finish_valid;
logic stat_wr_op_tbl_full;
logic stat_wr_tx_limit;
logic stat_wr_tx_stall;
// register to break timing path from PCIe HIP 500 MHz clock domain
logic [RQ_SEQ_NUM_W-1:0] pcie_rq_seq_num0_reg = '0;
logic pcie_rq_seq_num_vld0_reg = 1'b0;
logic [RQ_SEQ_NUM_W-1:0] pcie_rq_seq_num1_reg = '0;
logic pcie_rq_seq_num_vld1_reg = 1'b0;
always_ff @(posedge pcie_clk) begin
pcie_rq_seq_num0_reg <= pcie_rq_seq_num0;
pcie_rq_seq_num_vld0_reg <= pcie_rq_seq_num_vld0;
pcie_rq_seq_num1_reg <= pcie_rq_seq_num1;
pcie_rq_seq_num_vld1_reg <= pcie_rq_seq_num_vld1;
if (pcie_rst) begin
pcie_rq_seq_num_vld0_reg <= 1'b0;
pcie_rq_seq_num_vld1_reg <= 1'b0;
end
end
taxi_dma_if_pcie_us #(
.RQ_SEQ_NUM_W(RQ_SEQ_NUM_W),
.RQ_SEQ_NUM_EN(RQ_SEQ_NUM_EN),
.PCIE_TAG_CNT(PCIE_TAG_CNT),
.RD_OP_TBL_SIZE(RD_OP_TBL_SIZE),
.RD_TX_LIMIT(RD_TX_LIMIT),
.RD_TX_FC_EN(RD_TX_FC_EN),
.RD_CPLH_FC_LIMIT(RD_CPLH_FC_LIMIT),
.RD_CPLD_FC_LIMIT(RD_CPLD_FC_LIMIT),
.WR_OP_TBL_SIZE(WR_OP_TBL_SIZE),
.WR_TX_LIMIT(WR_TX_LIMIT),
.WR_TX_FC_EN(WR_TX_FC_EN)
)
dma_if_inst (
.clk(pcie_clk),
.rst(pcie_rst),
/*
* UltraScale PCIe interface
*/
.m_axis_rq(m_axis_pcie_rq),
.s_axis_rc(s_axis_pcie_rc),
/*
* Transmit sequence number input
*/
.s_axis_rq_seq_num_0(pcie_rq_seq_num0_reg),
.s_axis_rq_seq_num_valid_0(pcie_rq_seq_num_vld0_reg),
.s_axis_rq_seq_num_1(pcie_rq_seq_num1_reg),
.s_axis_rq_seq_num_valid_1(pcie_rq_seq_num_vld1_reg),
/*
* Transmit flow control
*/
.pcie_tx_fc_nph_av(cfg_fc_nph),
.pcie_tx_fc_ph_av(cfg_fc_ph),
.pcie_tx_fc_pd_av(cfg_fc_pd),
/*
* Read descriptor
*/
.rd_desc_req(dma_rd_desc),
.rd_desc_sts(dma_rd_desc),
/*
* Write descriptor
*/
.wr_desc_req(dma_wr_desc),
.wr_desc_sts(dma_wr_desc),
/*
* RAM interface
*/
.dma_ram_wr(dma_ram),
.dma_ram_rd(dma_ram),
/*
* Configuration
*/
.read_enable(1'b1),
.write_enable(1'b1),
.ext_tag_en(1'b0), // TODO
.rcb_128b(cfg_rcb_status[0]),
.requester_id('0),
.requester_id_en(1'b0),
.max_rd_req_size(cfg_max_read_req),
.max_payload_size(cfg_max_payload),
/*
* Status
*/
.stat_rd_busy(stat_rd_busy),
.stat_wr_busy(stat_wr_busy),
.stat_err_cor(stat_err_cor),
.stat_err_uncor(stat_err_uncor),
/*
* Statistics
*/
.stat_rd_op_start_tag(stat_rd_op_start_tag),
.stat_rd_op_start_valid(stat_rd_op_start_valid),
.stat_rd_op_finish_tag(stat_rd_op_finish_tag),
.stat_rd_op_finish_status(stat_rd_op_finish_status),
.stat_rd_op_finish_valid(stat_rd_op_finish_valid),
.stat_rd_req_start_tag(stat_rd_req_start_tag),
.stat_rd_req_start_len(stat_rd_req_start_len),
.stat_rd_req_start_valid(stat_rd_req_start_valid),
.stat_rd_req_finish_tag(stat_rd_req_finish_tag),
.stat_rd_req_finish_status(stat_rd_req_finish_status),
.stat_rd_req_finish_valid(stat_rd_req_finish_valid),
.stat_rd_req_timeout(stat_rd_req_timeout),
.stat_rd_op_tbl_full(stat_rd_op_tbl_full),
.stat_rd_no_tags(stat_rd_no_tags),
.stat_rd_tx_limit(stat_rd_tx_limit),
.stat_rd_tx_stall(stat_rd_tx_stall),
.stat_wr_op_start_tag(stat_wr_op_start_tag),
.stat_wr_op_start_valid(stat_wr_op_start_valid),
.stat_wr_op_finish_tag(stat_wr_op_finish_tag),
.stat_wr_op_finish_status(stat_wr_op_finish_status),
.stat_wr_op_finish_valid(stat_wr_op_finish_valid),
.stat_wr_req_start_tag(stat_wr_req_start_tag),
.stat_wr_req_start_len(stat_wr_req_start_len),
.stat_wr_req_start_valid(stat_wr_req_start_valid),
.stat_wr_req_finish_tag(stat_wr_req_finish_tag),
.stat_wr_req_finish_status(stat_wr_req_finish_status),
.stat_wr_req_finish_valid(stat_wr_req_finish_valid),
.stat_wr_op_tbl_full(stat_wr_op_tbl_full),
.stat_wr_tx_limit(stat_wr_tx_limit),
.stat_wr_tx_stall(stat_wr_tx_stall)
);
wire [PORTS-1:0] irq;
wire [31:0] msi_irq = 32'(irq);
taxi_pcie_us_msi #(
.MSI_CNT(32)
)
msi_inst (
.clk(pcie_clk),
.rst(pcie_rst),
/*
* Interrupt request inputs
*/
.msi_irq(msi_irq),
/*
* Interface to UltraScale PCIe IP core
*/
/* verilator lint_off WIDTHEXPAND */
.cfg_interrupt_msi_enable(cfg_interrupt_msi_enable),
.cfg_interrupt_msi_vf_enable(),
.cfg_interrupt_msi_mmenable(cfg_interrupt_msi_mmenable),
.cfg_interrupt_msi_mask_update(cfg_interrupt_msi_mask_update),
.cfg_interrupt_msi_data(cfg_interrupt_msi_data),
.cfg_interrupt_msi_select(cfg_interrupt_msi_select),
.cfg_interrupt_msi_int(cfg_interrupt_msi_int),
.cfg_interrupt_msi_pending_status(cfg_interrupt_msi_pending_status),
.cfg_interrupt_msi_pending_status_data_enable(cfg_interrupt_msi_pending_status_data_enable),
.cfg_interrupt_msi_pending_status_function_num(cfg_interrupt_msi_pending_status_function_num),
.cfg_interrupt_msi_sent(cfg_interrupt_msi_sent),
.cfg_interrupt_msi_fail(cfg_interrupt_msi_fail),
.cfg_interrupt_msi_attr(cfg_interrupt_msi_attr),
.cfg_interrupt_msi_tph_present(cfg_interrupt_msi_tph_present),
.cfg_interrupt_msi_tph_type(cfg_interrupt_msi_tph_type),
.cfg_interrupt_msi_tph_st_tag(cfg_interrupt_msi_tph_st_tag),
.cfg_interrupt_msi_function_number(cfg_interrupt_msi_function_number)
/* verilator lint_on WIDTHEXPAND */
);
cndm_proto_core #(
.PORTS(PORTS)
)
core_inst (
.clk(pcie_clk),
.rst(pcie_rst),
/*
* Control register interface
*/
.s_axil_wr(axil_ctrl_bar),
.s_axil_rd(axil_ctrl_bar),
/*
* DMA
*/
.dma_rd_desc_req(dma_rd_desc),
.dma_rd_desc_sts(dma_rd_desc),
.dma_wr_desc_req(dma_wr_desc),
.dma_wr_desc_sts(dma_wr_desc),
.dma_ram_wr(dma_ram),
.dma_ram_rd(dma_ram),
.irq(irq),
/*
* Ethernet
*/
.mac_tx_clk(mac_tx_clk),
.mac_tx_rst(mac_tx_rst),
.mac_axis_tx(mac_axis_tx),
.mac_axis_tx_cpl(mac_axis_tx_cpl),
.mac_rx_clk(mac_rx_clk),
.mac_rx_rst(mac_rx_rst),
.mac_axis_rx(mac_axis_rx)
);
endmodule
`resetall

View File

@@ -0,0 +1,544 @@
// SPDX-License-Identifier: CERN-OHL-S-2.0
/*
Copyright (c) 2025 FPGA Ninja, LLC
Authors:
- Alex Forencich
*/
`resetall
`timescale 1ns / 1ps
`default_nettype none
/*
* Corundum-proto port logic
*/
module cndm_proto_port #(
parameter PORTS = 2
)
(
input wire logic clk,
input wire logic rst,
/*
* Control register interface
*/
taxi_axil_if.wr_slv s_axil_wr,
taxi_axil_if.rd_slv s_axil_rd,
/*
* DMA
*/
taxi_dma_desc_if.req_src dma_rd_desc_req,
taxi_dma_desc_if.sts_snk dma_rd_desc_sts,
taxi_dma_desc_if.req_src dma_wr_desc_req,
taxi_dma_desc_if.sts_snk dma_wr_desc_sts,
taxi_dma_ram_if.wr_slv dma_ram_wr,
taxi_dma_ram_if.rd_slv dma_ram_rd,
output wire logic irq,
/*
* Ethernet
*/
input wire logic mac_tx_clk,
input wire logic mac_tx_rst,
taxi_axis_if.src mac_axis_tx,
taxi_axis_if.snk mac_axis_tx_cpl,
input wire logic mac_rx_clk,
input wire logic mac_rx_rst,
taxi_axis_if.snk mac_axis_rx
);
localparam AXIL_ADDR_W = s_axil_wr.ADDR_W;
localparam AXIL_DATA_W = s_axil_wr.DATA_W;
localparam RAM_SEGS = dma_ram_wr.SEGS;
localparam RAM_SEG_ADDR_W = dma_ram_wr.SEG_ADDR_W;
localparam RAM_SEG_DATA_W = dma_ram_wr.SEG_DATA_W;
localparam RAM_SEG_BE_W = dma_ram_wr.SEG_BE_W;
localparam RAM_SEL_W = dma_ram_wr.SEL_W;
logic txq_en_reg = '0;
logic [3:0] txq_size_reg = '0;
logic [63:0] txq_base_addr_reg = '0;
logic [15:0] txq_prod_reg = '0;
wire [15:0] txq_cons;
logic rxq_en_reg = '0;
logic [3:0] rxq_size_reg = '0;
logic [63:0] rxq_base_addr_reg = '0;
logic [15:0] rxq_prod_reg = '0;
wire [15:0] rxq_cons;
logic txcq_en_reg = '0;
logic [3:0] txcq_size_reg = '0;
logic [63:0] txcq_base_addr_reg = '0;
wire [15:0] txcq_prod;
logic rxcq_en_reg = '0;
logic [3:0] rxcq_size_reg = '0;
logic [63:0] rxcq_base_addr_reg = '0;
wire [15:0] rxcq_prod;
logic s_axil_awready_reg = 1'b0;
logic s_axil_wready_reg = 1'b0;
logic s_axil_bvalid_reg = 1'b0;
logic s_axil_arready_reg = 1'b0;
logic [AXIL_DATA_W-1:0] s_axil_rdata_reg = '0;
logic s_axil_rvalid_reg = 1'b0;
assign s_axil_wr.awready = s_axil_awready_reg;
assign s_axil_wr.wready = s_axil_wready_reg;
assign s_axil_wr.bresp = '0;
assign s_axil_wr.buser = '0;
assign s_axil_wr.bvalid = s_axil_bvalid_reg;
assign s_axil_rd.arready = s_axil_arready_reg;
assign s_axil_rd.rdata = s_axil_rdata_reg;
assign s_axil_rd.rresp = '0;
assign s_axil_rd.ruser = '0;
assign s_axil_rd.rvalid = s_axil_rvalid_reg;
always_ff @(posedge clk) begin
s_axil_awready_reg <= 1'b0;
s_axil_wready_reg <= 1'b0;
s_axil_bvalid_reg <= s_axil_bvalid_reg && !s_axil_wr.bready;
s_axil_arready_reg <= 1'b0;
s_axil_rvalid_reg <= s_axil_rvalid_reg && !s_axil_rd.rready;
if (s_axil_wr.awvalid && s_axil_wr.wvalid && !s_axil_bvalid_reg) begin
s_axil_awready_reg <= 1'b1;
s_axil_wready_reg <= 1'b1;
s_axil_bvalid_reg <= 1'b1;
case ({s_axil_wr.awaddr[15:2], 2'b00})
16'h0100: begin
txq_en_reg <= s_axil_wr.wdata[0];
txq_size_reg <= s_axil_wr.wdata[19:16];
end
16'h0104: txq_prod_reg <= s_axil_wr.wdata[15:0];
16'h0108: txq_base_addr_reg[31:0] <= s_axil_wr.wdata;
16'h010c: txq_base_addr_reg[63:32] <= s_axil_wr.wdata;
16'h0200: begin
rxq_en_reg <= s_axil_wr.wdata[0];
rxq_size_reg <= s_axil_wr.wdata[19:16];
end
16'h0204: rxq_prod_reg <= s_axil_wr.wdata[15:0];
16'h0208: rxq_base_addr_reg[31:0] <= s_axil_wr.wdata;
16'h020c: rxq_base_addr_reg[63:32] <= s_axil_wr.wdata;
16'h0300: begin
txcq_en_reg <= s_axil_wr.wdata[0];
txcq_size_reg <= s_axil_wr.wdata[19:16];
end
16'h0308: txcq_base_addr_reg[31:0] <= s_axil_wr.wdata;
16'h030c: txcq_base_addr_reg[63:32] <= s_axil_wr.wdata;
16'h0400: begin
rxcq_en_reg <= s_axil_wr.wdata[0];
rxcq_size_reg <= s_axil_wr.wdata[19:16];
end
16'h0408: rxcq_base_addr_reg[31:0] <= s_axil_wr.wdata;
16'h040c: rxcq_base_addr_reg[63:32] <= s_axil_wr.wdata;
default: begin end
endcase
end
if (s_axil_rd.arvalid && !s_axil_rvalid_reg) begin
s_axil_rdata_reg <= '0;
s_axil_arready_reg <= 1'b1;
s_axil_rvalid_reg <= 1'b1;
case ({s_axil_rd.araddr[15:2], 2'b00})
16'h0100: begin
s_axil_rdata_reg[0] <= txq_en_reg;
s_axil_rdata_reg[19:16] <= txq_size_reg;
end
16'h0104: begin
s_axil_rdata_reg[15:0] <= txq_prod_reg;
s_axil_rdata_reg[31:16] <= txq_cons;
end
16'h0108: s_axil_rdata_reg <= txq_base_addr_reg[31:0];
16'h010c: s_axil_rdata_reg <= txq_base_addr_reg[63:32];
16'h0200: begin
s_axil_rdata_reg[0] <= rxq_en_reg;
s_axil_rdata_reg[19:16] <= rxq_size_reg;
end
16'h0204: begin
s_axil_rdata_reg[15:0] <= rxq_prod_reg;
s_axil_rdata_reg[31:16] <= rxq_cons;
end
16'h0208: s_axil_rdata_reg <= rxq_base_addr_reg[31:0];
16'h020c: s_axil_rdata_reg <= rxq_base_addr_reg[63:32];
16'h0300: begin
s_axil_rdata_reg[0] <= txcq_en_reg;
s_axil_rdata_reg[19:16] <= txcq_size_reg;
end
16'h0304: s_axil_rdata_reg[15:0] <= txcq_prod;
16'h0308: s_axil_rdata_reg <= txcq_base_addr_reg[31:0];
16'h030c: s_axil_rdata_reg <= txcq_base_addr_reg[63:32];
16'h0400: begin
s_axil_rdata_reg[0] <= rxcq_en_reg;
s_axil_rdata_reg[19:16] <= rxcq_size_reg;
end
16'h0404: s_axil_rdata_reg[15:0] <= rxcq_prod;
16'h0408: s_axil_rdata_reg <= rxcq_base_addr_reg[31:0];
16'h040c: s_axil_rdata_reg <= rxcq_base_addr_reg[63:32];
default: begin end
endcase
end
if (rst) begin
s_axil_awready_reg <= 1'b0;
s_axil_wready_reg <= 1'b0;
s_axil_bvalid_reg <= 1'b0;
s_axil_arready_reg <= 1'b0;
s_axil_rvalid_reg <= 1'b0;
end
end
taxi_dma_desc_if #(
.SRC_ADDR_W(dma_rd_desc_req.SRC_ADDR_W),
.SRC_SEL_EN(dma_rd_desc_req.SRC_SEL_EN),
.SRC_SEL_W(dma_rd_desc_req.SRC_SEL_W),
.SRC_ASID_EN(dma_rd_desc_req.SRC_ASID_EN),
.DST_ADDR_W(dma_rd_desc_req.DST_ADDR_W),
.DST_SEL_EN(dma_rd_desc_req.DST_SEL_EN),
.DST_SEL_W(dma_rd_desc_req.DST_SEL_W-1),
.DST_ASID_EN(dma_rd_desc_req.DST_ASID_EN),
.IMM_EN(dma_rd_desc_req.IMM_EN),
.LEN_W(dma_rd_desc_req.LEN_W),
.TAG_W(dma_rd_desc_req.TAG_W-1),
.ID_EN(dma_rd_desc_req.ID_EN),
.DEST_EN(dma_rd_desc_req.DEST_EN),
.USER_EN(dma_rd_desc_req.USER_EN)
) dma_rd_desc_int[2]();
taxi_dma_ram_if #(
.SEGS(RAM_SEGS),
.SEG_ADDR_W(RAM_SEG_ADDR_W),
.SEG_DATA_W(RAM_SEG_DATA_W),
.SEG_BE_W(RAM_SEG_BE_W),
.SEL_W(RAM_SEL_W-1)
) dma_ram_wr_int[2]();
taxi_dma_if_mux_rd #(
.PORTS(2),
.ARB_ROUND_ROBIN(1),
.ARB_LSB_HIGH_PRIO(1)
)
rd_dma_mux_inst (
.clk(clk),
.rst(rst),
/*
* DMA descriptors from clients
*/
.client_req(dma_rd_desc_int),
.client_sts(dma_rd_desc_int),
/*
* DMA descriptors to DMA engines
*/
.dma_req(dma_rd_desc_req),
.dma_sts(dma_rd_desc_sts),
/*
* RAM interface (from DMA interface)
*/
.dma_ram_wr(dma_ram_wr),
/*
* RAM interface (towards RAM)
*/
.client_ram_wr(dma_ram_wr_int)
);
taxi_dma_desc_if #(
.SRC_ADDR_W(dma_wr_desc_req.SRC_ADDR_W),
.SRC_SEL_EN(dma_wr_desc_req.SRC_SEL_EN),
.SRC_SEL_W(dma_wr_desc_req.SRC_SEL_W-1),
.SRC_ASID_EN(dma_wr_desc_req.SRC_ASID_EN),
.DST_ADDR_W(dma_wr_desc_req.DST_ADDR_W),
.DST_SEL_EN(dma_wr_desc_req.DST_SEL_EN),
.DST_SEL_W(dma_wr_desc_req.DST_SEL_W),
.DST_ASID_EN(dma_wr_desc_req.DST_ASID_EN),
.IMM_EN(dma_wr_desc_req.IMM_EN),
.IMM_W(dma_wr_desc_req.IMM_W),
.LEN_W(dma_wr_desc_req.LEN_W),
.TAG_W(dma_wr_desc_req.TAG_W-1),
.ID_EN(dma_wr_desc_req.ID_EN),
.DEST_EN(dma_wr_desc_req.DEST_EN),
.USER_EN(dma_wr_desc_req.USER_EN)
) dma_wr_desc_int[2]();
taxi_dma_ram_if #(
.SEGS(RAM_SEGS),
.SEG_ADDR_W(RAM_SEG_ADDR_W),
.SEG_DATA_W(RAM_SEG_DATA_W),
.SEG_BE_W(RAM_SEG_BE_W),
.SEL_W(RAM_SEL_W-1)
) dma_ram_rd_int[2]();
taxi_dma_if_mux_wr #(
.PORTS(2),
.ARB_ROUND_ROBIN(1),
.ARB_LSB_HIGH_PRIO(1)
)
wr_dma_mux_inst (
.clk(clk),
.rst(rst),
/*
* DMA descriptors from clients
*/
.client_req(dma_wr_desc_int),
.client_sts(dma_wr_desc_int),
/*
* DMA descriptors to DMA engines
*/
.dma_req(dma_wr_desc_req),
.dma_sts(dma_wr_desc_sts),
/*
* RAM interface (from DMA interface)
*/
.dma_ram_rd(dma_ram_rd),
/*
* RAM interface (towards RAM)
*/
.client_ram_rd(dma_ram_rd_int)
);
wire [1:0] desc_req;
taxi_axis_if #(
.DATA_W(16*8),
.KEEP_EN(1),
.LAST_EN(1),
.ID_EN(0),
.DEST_EN(1), // TODO
.USER_EN(1),
.USER_W(1)
) axis_desc[2]();
taxi_axis_if #(
.DATA_W(16*8),
.KEEP_EN(1),
.LAST_EN(1),
.ID_EN(1), // TODO
.DEST_EN(0),
.USER_EN(0)
) axis_cpl[2]();
cndm_proto_desc_rd
desc_rd_inst (
.clk(clk),
.rst(rst),
/*
* DMA
*/
.dma_rd_desc_req(dma_rd_desc_int[0]),
.dma_rd_desc_sts(dma_rd_desc_int[0]),
.dma_ram_wr(dma_ram_wr_int[0]),
.txq_en(txq_en_reg),
.txq_size(txq_size_reg),
.txq_base_addr(txq_base_addr_reg),
.txq_prod(txq_prod_reg),
.txq_cons(txq_cons),
.rxq_en(rxq_en_reg),
.rxq_size(rxq_size_reg),
.rxq_base_addr(rxq_base_addr_reg),
.rxq_prod(rxq_prod_reg),
.rxq_cons(rxq_cons),
.desc_req(desc_req),
.axis_desc(axis_desc)
);
cndm_proto_cpl_wr
cpl_wr_inst (
.clk(clk),
.rst(rst),
/*
* DMA
*/
.dma_wr_desc_req(dma_wr_desc_int[0]),
.dma_wr_desc_sts(dma_wr_desc_int[0]),
.dma_ram_rd(dma_ram_rd_int[0]),
.txcq_en(txcq_en_reg),
.txcq_size(txcq_size_reg),
.txcq_base_addr(txcq_base_addr_reg),
.txcq_prod(txcq_prod),
.rxcq_en(rxcq_en_reg),
.rxcq_size(rxcq_size_reg),
.rxcq_base_addr(rxcq_base_addr_reg),
.rxcq_prod(rxcq_prod),
.axis_cpl(axis_cpl),
.irq(irq)
);
taxi_axis_if #(
.DATA_W(mac_axis_tx.DATA_W),
.USER_EN(1),
.USER_W(1)
) mac_tx_int();
taxi_axis_async_fifo #(
.DEPTH(16384),
.RAM_PIPELINE(2),
.FRAME_FIFO(1),
.USER_BAD_FRAME_VALUE(1'b1),
.USER_BAD_FRAME_MASK(1'b1),
.DROP_OVERSIZE_FRAME(1),
.DROP_BAD_FRAME(1),
.DROP_WHEN_FULL(1)
)
tx_fifo (
/*
* AXI4-Stream input (sink)
*/
.s_clk(clk),
.s_rst(rst),
.s_axis(mac_tx_int),
/*
* AXI4-Stream output (source)
*/
.m_clk(mac_tx_clk),
.m_rst(mac_tx_rst),
.m_axis(mac_axis_tx),
/*
* Pause
*/
.s_pause_req(1'b0),
.s_pause_ack(),
.m_pause_req(1'b0),
.m_pause_ack(),
/*
* Status
*/
.s_status_depth(),
.s_status_depth_commit(),
.s_status_overflow(),
.s_status_bad_frame(),
.s_status_good_frame(),
.m_status_depth(),
.m_status_depth_commit(),
.m_status_overflow(),
.m_status_bad_frame(),
.m_status_good_frame()
);
cndm_proto_tx
tx_inst (
.clk(clk),
.rst(rst),
/*
* DMA
*/
.dma_rd_desc_req(dma_rd_desc_int[1]),
.dma_rd_desc_sts(dma_rd_desc_int[1]),
.dma_ram_wr(dma_ram_wr_int[1]),
.desc_req(desc_req[0]),
.axis_desc(axis_desc[0]),
.tx_data(mac_tx_int),
.axis_cpl(axis_cpl[0])
);
taxi_axis_if #(
.DATA_W(mac_axis_rx.DATA_W),
.USER_EN(1),
.USER_W(1)
) mac_rx_int();
taxi_axis_async_fifo #(
.DEPTH(16384),
.RAM_PIPELINE(2),
.FRAME_FIFO(1),
.USER_BAD_FRAME_VALUE(1'b1),
.USER_BAD_FRAME_MASK(1'b1),
.DROP_OVERSIZE_FRAME(1),
.DROP_BAD_FRAME(1),
.DROP_WHEN_FULL(1)
)
rx_fifo (
/*
* AXI4-Stream input (sink)
*/
.s_clk(mac_rx_clk),
.s_rst(mac_rx_rst),
.s_axis(mac_axis_rx),
/*
* AXI4-Stream output (source)
*/
.m_clk(clk),
.m_rst(rst),
.m_axis(mac_rx_int),
/*
* Pause
*/
.s_pause_req(1'b0),
.s_pause_ack(),
.m_pause_req(1'b0),
.m_pause_ack(),
/*
* Status
*/
.s_status_depth(),
.s_status_depth_commit(),
.s_status_overflow(),
.s_status_bad_frame(),
.s_status_good_frame(),
.m_status_depth(),
.m_status_depth_commit(),
.m_status_overflow(),
.m_status_bad_frame(),
.m_status_good_frame()
);
cndm_proto_rx
rx_inst (
.clk(clk),
.rst(rst),
/*
* DMA
*/
.dma_wr_desc_req(dma_wr_desc_int[1]),
.dma_wr_desc_sts(dma_wr_desc_int[1]),
.dma_ram_rd(dma_ram_rd_int[1]),
.rx_data(mac_rx_int),
.desc_req(desc_req[1]),
.axis_desc(axis_desc[1]),
.axis_cpl(axis_cpl[1])
);
endmodule
`resetall

View File

@@ -0,0 +1,211 @@
// SPDX-License-Identifier: CERN-OHL-S-2.0
/*
Copyright (c) 2025 FPGA Ninja, LLC
Authors:
- Alex Forencich
*/
`resetall
`timescale 1ns / 1ps
`default_nettype none
/*
* Corundum-proto receive datapath
*/
module cndm_proto_rx
(
input wire logic clk,
input wire logic rst,
/*
* DMA
*/
taxi_dma_desc_if.req_src dma_wr_desc_req,
taxi_dma_desc_if.sts_snk dma_wr_desc_sts,
taxi_dma_ram_if.rd_slv dma_ram_rd,
taxi_axis_if.snk rx_data,
output wire logic desc_req,
taxi_axis_if.snk axis_desc,
taxi_axis_if.src axis_cpl
);
localparam RAM_ADDR_W = 16;
taxi_dma_desc_if #(
.SRC_ADDR_W(RAM_ADDR_W),
.SRC_SEL_EN(1'b0),
.SRC_ASID_EN(1'b0),
.DST_ADDR_W(RAM_ADDR_W),
.DST_SEL_EN(1'b0),
.DST_ASID_EN(1'b0),
.IMM_EN(1'b0),
.LEN_W(16),
.TAG_W(1),
.ID_EN(0),
.DEST_EN(0),
.USER_EN(1),
.USER_W(1)
) dma_desc();
localparam [2:0]
STATE_IDLE = 0,
STATE_RX_DATA = 1,
STATE_READ_DESC = 2,
STATE_WRITE_DATA = 3;
logic [2:0] state_reg = STATE_IDLE;
logic desc_req_reg = 1'b0;
assign desc_req = desc_req_reg;
always_ff @(posedge clk) begin
desc_req_reg <= 1'b0;
axis_desc.tready <= 1'b0;
dma_wr_desc_req.req_src_sel <= '0;
dma_wr_desc_req.req_src_asid <= '0;
dma_wr_desc_req.req_dst_sel <= '0;
dma_wr_desc_req.req_dst_asid <= '0;
dma_wr_desc_req.req_imm <= '0;
dma_wr_desc_req.req_imm_en <= '0;
dma_wr_desc_req.req_tag <= '0;
dma_wr_desc_req.req_id <= '0;
dma_wr_desc_req.req_dest <= '0;
dma_wr_desc_req.req_user <= '0;
dma_wr_desc_req.req_valid <= dma_wr_desc_req.req_valid && !dma_wr_desc_req.req_ready;
dma_desc.req_src_addr <= '0;
dma_desc.req_src_sel <= '0;
dma_desc.req_src_asid <= '0;
dma_desc.req_dst_addr <= '0;
dma_desc.req_dst_sel <= '0;
dma_desc.req_dst_asid <= '0;
dma_desc.req_imm <= '0;
dma_desc.req_imm_en <= '0;
dma_desc.req_len <= 4096;
dma_desc.req_tag <= '0;
dma_desc.req_id <= '0;
dma_desc.req_dest <= '0;
dma_desc.req_user <= '0;
dma_desc.req_valid <= dma_desc.req_valid && !dma_desc.req_ready;
axis_cpl.tkeep <= '0;
axis_cpl.tid <= '0;
axis_cpl.tdest <= '0;
axis_cpl.tuser <= '0;
axis_cpl.tlast <= 1'b1;
axis_cpl.tvalid <= axis_cpl.tvalid && !axis_cpl.tready;
case (state_reg)
STATE_IDLE: begin
dma_desc.req_valid <= 1'b1;
state_reg <= STATE_RX_DATA;
end
STATE_RX_DATA: begin
dma_wr_desc_req.req_len <= 20'(dma_desc.sts_len);
axis_cpl.tdata[47:32] <= 16'(dma_desc.sts_len);
if (dma_desc.sts_valid) begin
desc_req_reg <= 1'b1;
state_reg <= STATE_READ_DESC;
end
end
STATE_READ_DESC: begin
axis_desc.tready <= 1'b1;
dma_wr_desc_req.req_src_addr <= '0;
dma_wr_desc_req.req_dst_addr <= axis_desc.tdata[127:64];
if (axis_desc.tvalid && axis_desc.tready) begin
if (dma_wr_desc_req.req_len > 20'(axis_desc.tdata[47:32])) begin
dma_wr_desc_req.req_len <= 20'(axis_desc.tdata[47:32]);
end
if (axis_desc.tuser) begin
// failed to read desc
state_reg <= STATE_IDLE;
end else begin
dma_wr_desc_req.req_valid <= 1'b1;
state_reg <= STATE_WRITE_DATA;
end
end
end
STATE_WRITE_DATA: begin
if (dma_wr_desc_sts.sts_valid) begin
axis_cpl.tvalid <= 1'b1;
state_reg <= STATE_IDLE;
end
end
default: begin
state_reg <= STATE_IDLE;
end
endcase
if (rst) begin
state_reg <= STATE_IDLE;
end
end
taxi_dma_ram_if #(
.SEGS(dma_ram_rd.SEGS),
.SEG_ADDR_W(dma_ram_rd.SEG_ADDR_W),
.SEG_DATA_W(dma_ram_rd.SEG_DATA_W),
.SEG_BE_W(dma_ram_rd.SEG_BE_W)
) dma_ram_wr();
taxi_dma_psdpram #(
.SIZE(4096),
.PIPELINE(2)
)
ram_inst (
.clk(clk),
.rst(rst),
/*
* Write port
*/
.dma_ram_wr(dma_ram_wr),
/*
* Read port
*/
.dma_ram_rd(dma_ram_rd)
);
taxi_dma_client_axis_sink
dma_inst (
.clk(clk),
.rst(rst),
/*
* DMA descriptor
*/
.desc_req(dma_desc),
.desc_sts(dma_desc),
/*
* AXI stream write data input
*/
.s_axis_wr_data(rx_data),
/*
* RAM interface
*/
.dma_ram_wr(dma_ram_wr),
/*
* Configuration
*/
.enable(1),
.abort(0)
);
endmodule
`resetall

View File

@@ -0,0 +1,205 @@
// SPDX-License-Identifier: CERN-OHL-S-2.0
/*
Copyright (c) 2025 FPGA Ninja, LLC
Authors:
- Alex Forencich
*/
`resetall
`timescale 1ns / 1ps
`default_nettype none
/*
* Corundum-proto transmit path
*/
module cndm_proto_tx
(
input wire logic clk,
input wire logic rst,
/*
* DMA
*/
taxi_dma_desc_if.req_src dma_rd_desc_req,
taxi_dma_desc_if.sts_snk dma_rd_desc_sts,
taxi_dma_ram_if.wr_slv dma_ram_wr,
output wire logic desc_req,
taxi_axis_if.snk axis_desc,
taxi_axis_if.src tx_data,
taxi_axis_if.src axis_cpl
);
localparam RAM_ADDR_W = 16;
taxi_dma_desc_if #(
.SRC_ADDR_W(RAM_ADDR_W),
.SRC_SEL_EN(1'b0),
.SRC_ASID_EN(1'b0),
.DST_ADDR_W(RAM_ADDR_W),
.DST_SEL_EN(1'b0),
.DST_ASID_EN(1'b0),
.IMM_EN(1'b0),
.LEN_W(16),
.TAG_W(1),
.ID_EN(0),
.DEST_EN(0),
.USER_EN(1),
.USER_W(1)
) dma_desc();
localparam [2:0]
STATE_IDLE = 0,
STATE_READ_DESC = 1,
STATE_READ_DATA = 2,
STATE_TX_DATA = 3;
logic [2:0] state_reg = STATE_IDLE;
logic desc_req_reg = 1'b0;
assign desc_req = desc_req_reg;
always_ff @(posedge clk) begin
desc_req_reg <= 1'b0;
axis_desc.tready <= 1'b0;
dma_rd_desc_req.req_src_sel <= '0;
dma_rd_desc_req.req_src_asid <= '0;
dma_rd_desc_req.req_dst_sel <= '0;
dma_rd_desc_req.req_dst_asid <= '0;
dma_rd_desc_req.req_imm <= '0;
dma_rd_desc_req.req_imm_en <= '0;
dma_rd_desc_req.req_tag <= '0;
dma_rd_desc_req.req_id <= '0;
dma_rd_desc_req.req_dest <= '0;
dma_rd_desc_req.req_user <= '0;
dma_rd_desc_req.req_valid <= dma_rd_desc_req.req_valid && !dma_rd_desc_req.req_ready;
dma_desc.req_src_sel <= '0;
dma_desc.req_src_asid <= '0;
dma_desc.req_dst_addr <= '0;
dma_desc.req_dst_sel <= '0;
dma_desc.req_dst_asid <= '0;
dma_desc.req_imm <= '0;
dma_desc.req_imm_en <= '0;
dma_desc.req_tag <= '0;
dma_desc.req_id <= '0;
dma_desc.req_dest <= '0;
dma_desc.req_user <= '0;
dma_desc.req_valid <= dma_desc.req_valid && !dma_desc.req_ready;
axis_cpl.tkeep <= '0;
axis_cpl.tid <= '0;
axis_cpl.tdest <= '0;
axis_cpl.tuser <= '0;
axis_cpl.tlast <= 1'b1;
axis_cpl.tvalid <= axis_cpl.tvalid && !axis_cpl.tready;
case (state_reg)
STATE_IDLE: begin
desc_req_reg <= 1'b1;
state_reg <= STATE_READ_DESC;
end
STATE_READ_DESC: begin
axis_desc.tready <= 1'b1;
dma_rd_desc_req.req_src_addr <= axis_desc.tdata[127:64];
dma_rd_desc_req.req_dst_addr <= '0;
dma_rd_desc_req.req_len <= 20'(axis_desc.tdata[47:32]);
dma_desc.req_src_addr <= '0;
dma_desc.req_len <= axis_desc.tdata[47:32];
if (axis_desc.tvalid && axis_desc.tready) begin
if (axis_desc.tuser) begin
// failed to read desc
state_reg <= STATE_IDLE;
end else begin
dma_rd_desc_req.req_valid <= 1'b1;
state_reg <= STATE_READ_DATA;
end
end
end
STATE_READ_DATA: begin
if (dma_rd_desc_sts.sts_valid) begin
dma_desc.req_valid <= 1'b1;
state_reg <= STATE_TX_DATA;
end
end
STATE_TX_DATA: begin
if (dma_desc.sts_valid) begin
axis_cpl.tvalid <= 1'b1;
state_reg <= STATE_IDLE;
end
end
default: begin
state_reg <= STATE_IDLE;
end
endcase
if (rst) begin
state_reg <= STATE_IDLE;
end
end
taxi_dma_ram_if #(
.SEGS(dma_ram_wr.SEGS),
.SEG_ADDR_W(dma_ram_wr.SEG_ADDR_W),
.SEG_DATA_W(dma_ram_wr.SEG_DATA_W),
.SEG_BE_W(dma_ram_wr.SEG_BE_W)
) dma_ram_rd();
taxi_dma_psdpram #(
.SIZE(4096),
.PIPELINE(2)
)
ram_inst (
.clk(clk),
.rst(rst),
/*
* Write port
*/
.dma_ram_wr(dma_ram_wr),
/*
* Read port
*/
.dma_ram_rd(dma_ram_rd)
);
taxi_dma_client_axis_source
dma_inst (
.clk(clk),
.rst(rst),
/*
* DMA descriptor
*/
.desc_req(dma_desc),
.desc_sts(dma_desc),
/*
* AXI stream read data output
*/
.m_axis_rd_data(tx_data),
/*
* RAM interface
*/
.dma_ram_rd(dma_ram_rd),
/*
* Configuration
*/
.enable(1'b1)
);
endmodule
`resetall

View File

@@ -0,0 +1,276 @@
# SPDX-License-Identifier: CERN-OHL-S-2.0
"""
Copyright (c) 2025 FPGA Ninja, LLC
Authors:
- Alex Forencich
"""
import logging
import struct
from collections import deque
from cocotb.queue import Queue
class Port:
def __init__(self, driver, index, hw_regs):
self.driver = driver
self.log = driver.log
self.index = index
self.hw_regs = hw_regs
self.rxq_log_size = (256).bit_length()-1
self.rxq_size = 2**self.rxq_log_size
self.rxq_mask = self.rxq_size-1
self.rxq = None
self.rxq_prod = 0
self.rxq_cons = 0
self.rx_info = [None] * self.rxq_size
self.rxcq_log_size = (256).bit_length()-1
self.rxcq_size = 2**self.rxcq_log_size
self.rxcq_mask = self.rxcq_size-1
self.rxcq = None
self.rxcq_prod = 0
self.rxcq_cons = 0
self.txq_log_size = (256).bit_length()-1
self.txq_size = 2**self.txq_log_size
self.txq_mask = self.txq_size-1
self.txq = None
self.txq_prod = 0
self.txq_cons = 0
self.tx_info = [None] * self.txq_size
self.txcq_log_size = (256).bit_length()-1
self.txcq_size = 2**self.txcq_log_size
self.txcq_mask = self.txcq_size-1
self.txcq = None
self.txcq_prod = 0
self.txcq_cons = 0
self.rx_queue = Queue()
async def init(self):
self.rxq = self.driver.pool.alloc_region(self.rxq_size*16)
addr = self.rxq.get_absolute_address(0)
await self.hw_regs.write_dword(0x0200, 0x00000000)
await self.hw_regs.write_dword(0x0204, 0x00000000)
await self.hw_regs.write_dword(0x0208, addr & 0xffffffff)
await self.hw_regs.write_dword(0x020c, addr >> 32)
await self.hw_regs.write_dword(0x0200, 0x00000001 | (self.rxq_log_size << 16))
self.rxcq = self.driver.pool.alloc_region(self.rxcq_size*16)
addr = self.rxcq.get_absolute_address(0)
await self.hw_regs.write_dword(0x0400, 0x00000000)
await self.hw_regs.write_dword(0x0408, addr & 0xffffffff)
await self.hw_regs.write_dword(0x040c, addr >> 32)
await self.hw_regs.write_dword(0x0400, 0x00000001 | (self.rxcq_log_size << 16))
self.txq = self.driver.pool.alloc_region(self.txq_size*16)
addr = self.txq.get_absolute_address(0)
await self.hw_regs.write_dword(0x0100, 0x00000000)
await self.hw_regs.write_dword(0x0104, 0x00000000)
await self.hw_regs.write_dword(0x0108, addr & 0xffffffff)
await self.hw_regs.write_dword(0x010c, addr >> 32)
await self.hw_regs.write_dword(0x0100, 0x00000001 | (self.txq_log_size << 16))
self.txcq = self.driver.pool.alloc_region(self.txcq_size*16)
addr = self.txcq.get_absolute_address(0)
await self.hw_regs.write_dword(0x0300, 0x00000000)
await self.hw_regs.write_dword(0x0308, addr & 0xffffffff)
await self.hw_regs.write_dword(0x030c, addr >> 32)
await self.hw_regs.write_dword(0x0300, 0x00000001 | (self.txcq_log_size << 16))
# wait for writes to complete
await self.hw_regs.read_dword(0)
await self.refill_rx_buffers()
async def start_xmit(self, data):
headroom = 10
tx_buf = self.driver.alloc_pkt()
await tx_buf.write(headroom, data)
index = self.txq_prod & self.txq_mask
ptr = tx_buf.get_absolute_address(0)
struct.pack_into('<xxxxLQ', self.txq.mem, 16*index, len(data), ptr+headroom)
self.tx_info[index] = tx_buf
self.txq_prod += 1
await self.hw_regs.write_dword(0x0104, self.txq_prod & 0xffff)
async def recv(self):
return await self.rx_queue.get()
async def recv_nowait(self):
return self.rx_queue.get_nowait()
def free_tx_desc(self, index):
pkt = self.tx_info[index]
self.driver.free_pkt(pkt)
self.tx_info[index] = None
def free_tx_buf(self):
while self.txq_cons != self.txq_prod:
index = self.txq_cons & self.txq_mask
self.free_tx_desc(index)
self.txq_cons += 1
async def process_tx_cq(self):
cq_cons_ptr = self.txcq_cons
cons_ptr = self.txq_cons
while True:
cq_index = cq_cons_ptr & self.txcq_mask
index = cons_ptr & self.txq_mask
cpl_data = struct.unpack_from("<LLLL", self.txcq.mem, cq_index*16)
self.log.info("TX CQ index %d data %s", cq_index, cpl_data)
if bool(cpl_data[-1] & 0x80000000) == bool(cq_cons_ptr & self.txcq_size):
self.log.info("CQ empty")
break
pkt = self.tx_info[index]
self.free_tx_desc(index)
cq_cons_ptr += 1
cons_ptr += 1
self.txcq_cons = cq_cons_ptr
self.txq_cons = cons_ptr
def free_rx_desc(self, index):
pkt = self.rx_info[index]
self.driver.free_pkt(pkt)
self.rx_info[index] = None
def free_rx_buf(self):
while self.rxq_cons != self.rxq_prod:
index = self.rxq_cons & self.rxq_mask
self.free_rx_desc(index)
self.rxq_cons += 1
def prepare_rx_desc(self, index):
pkt = self.driver.alloc_pkt()
self.rx_info[index] = pkt
length = pkt.size
ptr = pkt.get_absolute_address(0)
struct.pack_into('<xxxxLQ', self.rxq.mem, 16*index, length, ptr)
async def refill_rx_buffers(self):
missing = self.rxq_size - (self.rxq_prod - self.rxq_cons)
if missing < 8:
return
for k in range(missing):
self.prepare_rx_desc(self.rxq_prod & self.rxq_mask)
self.rxq_prod += 1
await self.hw_regs.write_dword(0x0204, self.rxq_prod & 0xffff)
async def process_rx_cq(self):
cq_cons_ptr = self.rxcq_cons
cons_ptr = self.rxq_cons
while True:
cq_index = cq_cons_ptr & self.rxcq_mask
index = cons_ptr & self.rxq_mask
cpl_data = struct.unpack_from("<LLLL", self.rxcq.mem, cq_index*16)
self.log.info("RX CQ index %d data %s", cq_index, cpl_data)
if bool(cpl_data[-1] & 0x80000000) == bool(cq_cons_ptr & self.rxcq_size):
self.log.info("CQ empty")
break
pkt = self.rx_info[index]
length = cpl_data[1]
data = pkt[:length]
self.log.info("Packet: %s", data)
self.rx_queue.put_nowait(data)
self.free_rx_desc(index)
cq_cons_ptr += 1
cons_ptr += 1
self.rxcq_cons = cq_cons_ptr
self.rxq_cons = cons_ptr
await self.refill_rx_buffers()
async def interrupt_handler(self):
self.log.info("Interrupt")
await self.process_rx_cq()
await self.process_tx_cq()
class Driver:
def __init__(self):
self.log = logging.getLogger("cocotb.cndm_proto")
self.dev = None
self.pool = None
self.hw_regs = None
self.ports = []
self.free_packets = deque()
self.allocated_packets = []
async def init_pcie_dev(self, dev):
self.dev = dev
self.pool = dev.rc.mem_pool
await dev.enable_device()
await dev.set_master()
await dev.alloc_irq_vectors(32, 32)
self.hw_regs = dev.bar_window[0]
await self.init_common()
async def init_common(self):
self.port_count = await self.hw_regs.read_dword(0x0100)
self.port_offset = await self.hw_regs.read_dword(0x0104)
self.port_stride = await self.hw_regs.read_dword(0x0108)
self.log.info("Port count: %d", self.port_count)
self.log.info("Port offset: 0x%x", self.port_offset)
self.log.info("Port stride: 0x%x", self.port_stride)
for k in range(self.port_count):
port = Port(self, k, self.hw_regs.create_window(self.port_offset + self.port_stride*k))
await port.init()
self.dev.request_irq(k, port.interrupt_handler)
self.ports.append(port)
def alloc_pkt(self):
if self.free_packets:
return self.free_packets.popleft()
pkt = self.pool.alloc_region(4096)
self.allocated_packets.append(pkt)
return pkt
def free_pkt(self, pkt):
assert pkt is not None
assert pkt in self.allocated_packets
self.free_packets.append(pkt)

View File

@@ -0,0 +1,59 @@
# SPDX-License-Identifier: CERN-OHL-S-2.0
#
# Copyright (c) 2020-2025 FPGA Ninja, LLC
#
# Authors:
# - Alex Forencich
TOPLEVEL_LANG = verilog
SIM ?= verilator
WAVES ?= 0
COCOTB_HDL_TIMEUNIT = 1ns
COCOTB_HDL_TIMEPRECISION = 1ps
RTL_DIR = ../../rtl
LIB_DIR = ../../lib
TAXI_SRC_DIR = $(LIB_DIR)/taxi/src
DUT = cndm_proto_pcie_us
COCOTB_TEST_MODULES = test_$(DUT)
COCOTB_TOPLEVEL = test_$(DUT)
MODULE = $(COCOTB_TEST_MODULES)
TOPLEVEL = $(COCOTB_TOPLEVEL)
VERILOG_SOURCES += $(COCOTB_TOPLEVEL).sv
VERILOG_SOURCES += $(RTL_DIR)/$(DUT).f
VERILOG_SOURCES += $(TAXI_SRC_DIR)/axis/rtl/taxi_axis_async_fifo.f
VERILOG_SOURCES += $(TAXI_SRC_DIR)/sync/rtl/taxi_sync_reset.sv
VERILOG_SOURCES += $(TAXI_SRC_DIR)/sync/rtl/taxi_sync_signal.sv
# handle file list files
process_f_file = $(call process_f_files,$(addprefix $(dir $1),$(shell cat $1)))
process_f_files = $(foreach f,$1,$(if $(filter %.f,$f),$(call process_f_file,$f),$f))
uniq_base = $(if $1,$(call uniq_base,$(foreach f,$1,$(if $(filter-out $(notdir $(lastword $1)),$(notdir $f)),$f,))) $(lastword $1))
VERILOG_SOURCES := $(call uniq_base,$(call process_f_files,$(VERILOG_SOURCES)))
# module parameters
export PARAM_SIM := "1'b1"
export PARAM_VENDOR := "\"XILINX\""
export PARAM_FAMILY := "\"virtexuplus\""
export PARAM_PORTS := 2
export PARAM_MAC_DATA_W := 32
export PARAM_AXIS_PCIE_DATA_W := 256
export PARAM_BAR0_APERTURE := 24
ifeq ($(SIM), icarus)
PLUSARGS += -fst
COMPILE_ARGS += $(foreach v,$(filter PARAM_%,$(.VARIABLES)),-P $(COCOTB_TOPLEVEL).$(subst PARAM_,,$(v))=$($(v)))
else ifeq ($(SIM), verilator)
COMPILE_ARGS += $(foreach v,$(filter PARAM_%,$(.VARIABLES)),-G$(subst PARAM_,,$(v))=$($(v)))
ifeq ($(WAVES), 1)
COMPILE_ARGS += --trace-fst
VERILATOR_TRACE = 1
endif
endif
include $(shell cocotb-config --makefiles)/Makefile.sim

View File

@@ -0,0 +1 @@
../cndm_proto.py

View File

@@ -0,0 +1,473 @@
#!/usr/bin/env python
# SPDX-License-Identifier: CERN-OHL-S-2.0
"""
Copyright (c) 2020-2025 FPGA Ninja, LLC
Authors:
- Alex Forencich
"""
import logging
import os
import sys
import pytest
import cocotb_test.simulator
import cocotb
from cocotb.clock import Clock
from cocotb.triggers import RisingEdge, FallingEdge, Timer
from cocotbext.axi import AxiStreamBus
from cocotbext.eth import EthMac
from cocotbext.pcie.core import RootComplex
from cocotbext.pcie.xilinx.us import UltraScalePlusPcieDevice
try:
import cndm_proto
except ImportError:
# attempt import from current directory
sys.path.insert(0, os.path.join(os.path.dirname(__file__)))
try:
import cndm_proto
finally:
del sys.path[0]
class TB:
def __init__(self, dut):
self.dut = dut
self.log = logging.getLogger("cocotb.tb")
self.log.setLevel(logging.DEBUG)
# PCIe
self.rc = RootComplex()
self.rc.max_payload_size = 0x1 # 256 bytes
self.rc.max_read_request_size = 0x2 # 512 bytes
self.dev = UltraScalePlusPcieDevice(
# configuration options
pcie_generation=3,
pcie_link_width=8,
user_clk_frequency=250e6,
alignment="dword",
cq_straddle=False,
cc_straddle=False,
rq_straddle=False,
rc_straddle=False,
rc_4tlp_straddle=False,
pf_count=1,
max_payload_size=1024,
enable_client_tag=True,
enable_extended_tag=True,
enable_parity=False,
enable_rx_msg_interface=False,
enable_sriov=False,
enable_extended_configuration=False,
pf0_msi_enable=True,
pf0_msi_count=32,
pf1_msi_enable=False,
pf1_msi_count=1,
pf2_msi_enable=False,
pf2_msi_count=1,
pf3_msi_enable=False,
pf3_msi_count=1,
pf0_msix_enable=False,
pf0_msix_table_size=31,
pf0_msix_table_bir=4,
pf0_msix_table_offset=0x00000000,
pf0_msix_pba_bir=4,
pf0_msix_pba_offset=0x00008000,
pf1_msix_enable=False,
pf1_msix_table_size=0,
pf1_msix_table_bir=0,
pf1_msix_table_offset=0x00000000,
pf1_msix_pba_bir=0,
pf1_msix_pba_offset=0x00000000,
pf2_msix_enable=False,
pf2_msix_table_size=0,
pf2_msix_table_bir=0,
pf2_msix_table_offset=0x00000000,
pf2_msix_pba_bir=0,
pf2_msix_pba_offset=0x00000000,
pf3_msix_enable=False,
pf3_msix_table_size=0,
pf3_msix_table_bir=0,
pf3_msix_table_offset=0x00000000,
pf3_msix_pba_bir=0,
pf3_msix_pba_offset=0x00000000,
# signals
# Clock and Reset Interface
user_clk=dut.pcie_clk,
user_reset=dut.pcie_rst,
# user_lnk_up
# sys_clk
# sys_clk_gt
# sys_reset
# phy_rdy_out
# Requester reQuest Interface
rq_bus=AxiStreamBus.from_entity(dut.m_axis_pcie_rq),
pcie_rq_seq_num0=dut.pcie_rq_seq_num0,
pcie_rq_seq_num_vld0=dut.pcie_rq_seq_num_vld0,
pcie_rq_seq_num1=dut.pcie_rq_seq_num1,
pcie_rq_seq_num_vld1=dut.pcie_rq_seq_num_vld1,
# pcie_rq_tag0
# pcie_rq_tag1
# pcie_rq_tag_av
# pcie_rq_tag_vld0
# pcie_rq_tag_vld1
# Requester Completion Interface
rc_bus=AxiStreamBus.from_entity(dut.s_axis_pcie_rc),
# Completer reQuest Interface
cq_bus=AxiStreamBus.from_entity(dut.s_axis_pcie_cq),
# pcie_cq_np_req
# pcie_cq_np_req_count
# Completer Completion Interface
cc_bus=AxiStreamBus.from_entity(dut.m_axis_pcie_cc),
# Transmit Flow Control Interface
# pcie_tfc_nph_av=dut.pcie_tfc_nph_av,
# pcie_tfc_npd_av=dut.pcie_tfc_npd_av,
# Configuration Management Interface
# cfg_mgmt_addr=dut.cfg_mgmt_addr,
# cfg_mgmt_function_number=dut.cfg_mgmt_function_number,
# cfg_mgmt_write=dut.cfg_mgmt_write,
# cfg_mgmt_write_data=dut.cfg_mgmt_write_data,
# cfg_mgmt_byte_enable=dut.cfg_mgmt_byte_enable,
# cfg_mgmt_read=dut.cfg_mgmt_read,
# cfg_mgmt_read_data=dut.cfg_mgmt_read_data,
# cfg_mgmt_read_write_done=dut.cfg_mgmt_read_write_done,
# cfg_mgmt_debug_access
# Configuration Status Interface
# cfg_phy_link_down
# cfg_phy_link_status
# cfg_negotiated_width
# cfg_current_speed
# cfg_max_payload=dut.cfg_max_payload,
# cfg_max_read_req=dut.cfg_max_read_req,
# cfg_function_status
# cfg_vf_status
# cfg_function_power_state
# cfg_vf_power_state
# cfg_link_power_state
# cfg_err_cor_out
# cfg_err_nonfatal_out
# cfg_err_fatal_out
# cfg_local_error_out
# cfg_local_error_valid
# cfg_rx_pm_state
# cfg_tx_pm_state
# cfg_ltssm_state
# cfg_rcb_status=dut.cfg_rcb_status,
# cfg_obff_enable
# cfg_pl_status_change
# cfg_tph_requester_enable
# cfg_tph_st_mode
# cfg_vf_tph_requester_enable
# cfg_vf_tph_st_mode
# Configuration Received Message Interface
# cfg_msg_received
# cfg_msg_received_data
# cfg_msg_received_type
# Configuration Transmit Message Interface
# cfg_msg_transmit
# cfg_msg_transmit_type
# cfg_msg_transmit_data
# cfg_msg_transmit_done
# Configuration Flow Control Interface
cfg_fc_ph=dut.cfg_fc_ph,
cfg_fc_pd=dut.cfg_fc_pd,
cfg_fc_nph=dut.cfg_fc_nph,
cfg_fc_npd=dut.cfg_fc_npd,
cfg_fc_cplh=dut.cfg_fc_cplh,
cfg_fc_cpld=dut.cfg_fc_cpld,
cfg_fc_sel=dut.cfg_fc_sel,
# Configuration Control Interface
# cfg_hot_reset_in
# cfg_hot_reset_out
# cfg_config_space_enable
# cfg_dsn
# cfg_bus_number
# cfg_ds_port_number
# cfg_ds_bus_number
# cfg_ds_device_number
# cfg_ds_function_number
# cfg_power_state_change_ack
# cfg_power_state_change_interrupt
# cfg_err_cor_in=dut.status_error_cor,
# cfg_err_uncor_in=dut.status_error_uncor,
# cfg_flr_in_process
# cfg_flr_done
# cfg_vf_flr_in_process
# cfg_vf_flr_func_num
# cfg_vf_flr_done
# cfg_pm_aspm_l1_entry_reject
# cfg_pm_aspm_tx_l0s_entry_disable
# cfg_req_pm_transition_l23_ready
# cfg_link_training_enable
# Configuration Interrupt Controller Interface
# cfg_interrupt_int
# cfg_interrupt_sent
# cfg_interrupt_pending
cfg_interrupt_msi_enable=dut.cfg_interrupt_msi_enable,
cfg_interrupt_msi_mmenable=dut.cfg_interrupt_msi_mmenable,
cfg_interrupt_msi_mask_update=dut.cfg_interrupt_msi_mask_update,
cfg_interrupt_msi_data=dut.cfg_interrupt_msi_data,
cfg_interrupt_msi_select=dut.cfg_interrupt_msi_select,
cfg_interrupt_msi_int=dut.cfg_interrupt_msi_int,
cfg_interrupt_msi_pending_status=dut.cfg_interrupt_msi_pending_status,
cfg_interrupt_msi_pending_status_data_enable=dut.cfg_interrupt_msi_pending_status_data_enable,
cfg_interrupt_msi_pending_status_function_num=dut.cfg_interrupt_msi_pending_status_function_num,
cfg_interrupt_msi_sent=dut.cfg_interrupt_msi_sent,
cfg_interrupt_msi_fail=dut.cfg_interrupt_msi_fail,
# cfg_interrupt_msix_enable=dut.cfg_interrupt_msix_enable,
# cfg_interrupt_msix_mask=dut.cfg_interrupt_msix_mask,
# cfg_interrupt_msix_vf_enable=dut.cfg_interrupt_msix_vf_enable,
# cfg_interrupt_msix_vf_mask=dut.cfg_interrupt_msix_vf_mask,
# cfg_interrupt_msix_address=dut.cfg_interrupt_msix_address,
# cfg_interrupt_msix_data=dut.cfg_interrupt_msix_data,
# cfg_interrupt_msix_int=dut.cfg_interrupt_msix_int,
# cfg_interrupt_msix_vec_pending=dut.cfg_interrupt_msix_vec_pending,
# cfg_interrupt_msix_vec_pending_status=dut.cfg_interrupt_msix_vec_pending_status,
# cfg_interrupt_msix_sent=dut.cfg_interrupt_msix_sent,
# cfg_interrupt_msix_fail=dut.cfg_interrupt_msix_fail,
cfg_interrupt_msi_attr=dut.cfg_interrupt_msi_attr,
cfg_interrupt_msi_tph_present=dut.cfg_interrupt_msi_tph_present,
cfg_interrupt_msi_tph_type=dut.cfg_interrupt_msi_tph_type,
cfg_interrupt_msi_tph_st_tag=dut.cfg_interrupt_msi_tph_st_tag,
cfg_interrupt_msi_function_number=dut.cfg_interrupt_msi_function_number,
# Configuration Extend Interface
# cfg_ext_read_received
# cfg_ext_write_received
# cfg_ext_register_number
# cfg_ext_function_number
# cfg_ext_write_data
# cfg_ext_write_byte_enable
# cfg_ext_read_data
# cfg_ext_read_data_valid
)
# self.dev.log.setLevel(logging.DEBUG)
self.rc.make_port().connect(self.dev)
self.dev.functions[0].configure_bar(0, 2**int(dut.uut.axil_ctrl_bar.ADDR_W))
# Ethernet
self.port_mac = []
eth_clock_period = 3.2
eth_speed = 10e9
for k in range(len(dut.mac_axis_tx)):
cocotb.start_soon(Clock(dut.mac_tx_clk[k], eth_clock_period, units="ns").start())
cocotb.start_soon(Clock(dut.mac_rx_clk[k], eth_clock_period, units="ns").start())
dut.mac_tx_rst[k].setimmediatevalue(0)
dut.mac_rx_rst[k].setimmediatevalue(0)
mac = EthMac(
tx_clk=dut.mac_tx_clk[k],
tx_rst=dut.mac_tx_rst[k],
tx_bus=AxiStreamBus.from_entity(dut.mac_axis_tx[k]),
rx_clk=dut.mac_rx_clk[k],
rx_rst=dut.mac_rx_rst[k],
rx_bus=AxiStreamBus.from_entity(dut.mac_axis_rx[k]),
ifg=12, speed=eth_speed
)
self.port_mac.append(mac)
self.loopback_enable = False
cocotb.start_soon(self._run_loopback())
async def init(self):
for mac in self.port_mac:
mac.rx.reset.setimmediatevalue(0)
mac.tx.reset.setimmediatevalue(0)
await FallingEdge(self.dut.pcie_rst)
await Timer(100, 'ns')
for k in range(10):
await RisingEdge(self.dut.pcie_clk)
for mac in self.port_mac:
mac.rx.reset.value = 1
mac.tx.reset.value = 1
for k in range(10):
await RisingEdge(self.dut.pcie_clk)
for mac in self.port_mac:
mac.rx.reset.value = 0
mac.tx.reset.value = 0
for k in range(10):
await RisingEdge(self.dut.pcie_clk)
await self.rc.enumerate()
async def _run_loopback(self):
while True:
await RisingEdge(self.dut.pcie_clk)
if self.loopback_enable:
for mac in self.port_mac:
while not mac.tx.empty():
await mac.rx.send(await mac.tx.recv())
@cocotb.test()
async def run_test(dut):
tb = TB(dut)
await tb.init()
tb.log.info("Init driver model")
driver = cndm_proto.Driver()
await driver.init_pcie_dev(tb.rc.find_device(tb.dev.functions[0].pcie_id))
tb.log.info("Init complete")
tb.log.info("Send and receive single packet on each port")
for k in range(len(driver.ports)):
data = f"Corundum rocks on port {k}!".encode('ascii')
await driver.ports[k].start_xmit(data)
pkt = await tb.port_mac[k].tx.recv()
tb.log.info("Got TX packet: %s", pkt)
assert bytes(pkt) == data
await tb.port_mac[k].rx.send(pkt)
pkt = await driver.ports[k].recv()
tb.log.info("Got RX packet: %s", pkt)
assert bytes(pkt) == data
tb.log.info("Multiple small packets")
count = 64
pkts = [bytearray([(x+k) % 256 for x in range(60)]) for k in range(count)]
tb.loopback_enable = True
for p in pkts:
await driver.ports[0].start_xmit(p)
for k in range(count):
pkt = await driver.ports[0].recv()
tb.log.info("Got RX packet: %s", pkt)
assert bytes(pkt) == pkts[k]
tb.loopback_enable = False
tb.log.info("Multiple large packets")
count = 64
pkts = [bytearray([(x+k) % 256 for x in range(1514)]) for k in range(count)]
tb.loopback_enable = True
for p in pkts:
await driver.ports[0].start_xmit(p)
for k in range(count):
pkt = await driver.ports[0].recv()
tb.log.info("Got RX packet: %s", pkt)
assert bytes(pkt) == pkts[k]
tb.loopback_enable = False
await RisingEdge(dut.pcie_clk)
await RisingEdge(dut.pcie_clk)
# cocotb-test
tests_dir = os.path.abspath(os.path.dirname(__file__))
rtl_dir = os.path.abspath(os.path.join(tests_dir, '..', '..', 'rtl'))
lib_dir = os.path.abspath(os.path.join(tests_dir, '..', '..', 'lib'))
taxi_src_dir = os.path.abspath(os.path.join(lib_dir, 'taxi', 'src'))
def process_f_files(files):
lst = {}
for f in files:
if f[-2:].lower() == '.f':
with open(f, 'r') as fp:
l = fp.read().split()
for f in process_f_files([os.path.join(os.path.dirname(f), x) for x in l]):
lst[os.path.basename(f)] = f
else:
lst[os.path.basename(f)] = f
return list(lst.values())
@pytest.mark.parametrize("mac_data_w", [32, 64])
def test_cndm_proto_pcie_us(request, mac_data_w):
dut = "cndm_proto_pcie_us"
module = os.path.splitext(os.path.basename(__file__))[0]
toplevel = module
verilog_sources = [
os.path.join(tests_dir, f"{toplevel}.sv"),
os.path.join(rtl_dir, f"{dut}.f"),
os.path.join(taxi_src_dir, "axis", "rtl", "taxi_axis_async_fifo.f"),
os.path.join(taxi_src_dir, "sync", "rtl", "taxi_sync_reset.sv"),
os.path.join(taxi_src_dir, "sync", "rtl", "taxi_sync_signal.sv"),
]
verilog_sources = process_f_files(verilog_sources)
parameters = {}
parameters['SIM'] = "1'b1"
parameters['VENDOR'] = "\"XILINX\""
parameters['FAMILY'] = "\"virtexuplus\""
parameters['PORTS'] = 2
parameters['MAC_DATA_W'] = mac_data_w
parameters['AXIS_PCIE_DATA_W'] = 256
parameters['BAR0_APERTURE'] = 24
extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()}
sim_build = os.path.join(tests_dir, "sim_build",
request.node.name.replace('[', '-').replace(']', ''))
cocotb_test.simulator.run(
simulator="verilator",
python_search=[tests_dir],
verilog_sources=verilog_sources,
toplevel=toplevel,
module=module,
parameters=parameters,
sim_build=sim_build,
extra_env=extra_env,
)

View File

@@ -0,0 +1,228 @@
// SPDX-License-Identifier: CERN-OHL-S-2.0
/*
Copyright (c) 2025 FPGA Ninja, LLC
Authors:
- Alex Forencich
*/
`resetall
`timescale 1ns / 1ps
`default_nettype none
/*
* FPGA core logic testbench
*/
module test_cndm_proto_pcie_us #
(
/* verilator lint_off WIDTHTRUNC */
parameter logic SIM = 1'b0,
parameter string VENDOR = "XILINX",
parameter string FAMILY = "virtexuplus",
parameter PORTS = 2,
parameter MAC_DATA_W = 32,
parameter AXIS_PCIE_DATA_W = 256,
parameter AXIS_PCIE_RC_USER_W = AXIS_PCIE_DATA_W < 512 ? 75 : 161,
parameter AXIS_PCIE_RQ_USER_W = AXIS_PCIE_DATA_W < 512 ? 62 : 137,
parameter AXIS_PCIE_CQ_USER_W = AXIS_PCIE_DATA_W < 512 ? 85 : 183,
parameter AXIS_PCIE_CC_USER_W = AXIS_PCIE_DATA_W < 512 ? 33 : 81,
parameter BAR0_APERTURE = 24
/* verilator lint_on WIDTHTRUNC */
)
();
localparam AXIS_PCIE_KEEP_W = (AXIS_PCIE_DATA_W/32);
localparam RQ_SEQ_NUM_W = AXIS_PCIE_RQ_USER_W == 60 ? 4 : 6;
logic sfp_mgt_refclk_p;
logic sfp_mgt_refclk_n;
logic sfp_mgt_refclk_out;
logic [1:0] sfp_npres;
logic [1:0] sfp_tx_fault;
logic [1:0] sfp_los;
logic pcie_clk;
logic pcie_rst;
taxi_axis_if #(
.DATA_W(AXIS_PCIE_DATA_W),
.KEEP_EN(1),
.KEEP_W(AXIS_PCIE_KEEP_W),
.USER_EN(1),
.USER_W(AXIS_PCIE_CQ_USER_W)
) s_axis_pcie_cq();
taxi_axis_if #(
.DATA_W(AXIS_PCIE_DATA_W),
.KEEP_EN(1),
.KEEP_W(AXIS_PCIE_KEEP_W),
.USER_EN(1),
.USER_W(AXIS_PCIE_CC_USER_W)
) m_axis_pcie_cc();
taxi_axis_if #(
.DATA_W(AXIS_PCIE_DATA_W),
.KEEP_EN(1),
.KEEP_W(AXIS_PCIE_KEEP_W),
.USER_EN(1),
.USER_W(AXIS_PCIE_RQ_USER_W)
) m_axis_pcie_rq();
taxi_axis_if #(
.DATA_W(AXIS_PCIE_DATA_W),
.KEEP_EN(1),
.KEEP_W(AXIS_PCIE_KEEP_W),
.USER_EN(1),
.USER_W(AXIS_PCIE_RC_USER_W)
) s_axis_pcie_rc();
logic [RQ_SEQ_NUM_W-1:0] pcie_rq_seq_num0;
logic pcie_rq_seq_num_vld0;
logic [RQ_SEQ_NUM_W-1:0] pcie_rq_seq_num1;
logic pcie_rq_seq_num_vld1;
logic [2:0] cfg_max_payload;
logic [2:0] cfg_max_read_req;
logic [3:0] cfg_rcb_status;
logic [9:0] cfg_mgmt_addr;
logic [7:0] cfg_mgmt_function_number;
logic cfg_mgmt_write;
logic [31:0] cfg_mgmt_write_data;
logic [3:0] cfg_mgmt_byte_enable;
logic cfg_mgmt_read;
logic [31:0] cfg_mgmt_read_data;
logic cfg_mgmt_read_write_done;
logic [7:0] cfg_fc_ph;
logic [11:0] cfg_fc_pd;
logic [7:0] cfg_fc_nph;
logic [11:0] cfg_fc_npd;
logic [7:0] cfg_fc_cplh;
logic [11:0] cfg_fc_cpld;
logic [2:0] cfg_fc_sel;
logic [3:0] cfg_interrupt_msi_enable;
logic [11:0] cfg_interrupt_msi_mmenable;
logic cfg_interrupt_msi_mask_update;
logic [31:0] cfg_interrupt_msi_data;
logic [1:0] cfg_interrupt_msi_select;
logic [31:0] cfg_interrupt_msi_int;
logic [31:0] cfg_interrupt_msi_pending_status;
logic cfg_interrupt_msi_pending_status_data_enable;
logic [1:0] cfg_interrupt_msi_pending_status_function_num;
logic cfg_interrupt_msi_sent;
logic cfg_interrupt_msi_fail;
logic [2:0] cfg_interrupt_msi_attr;
logic cfg_interrupt_msi_tph_present;
logic [1:0] cfg_interrupt_msi_tph_type;
logic [7:0] cfg_interrupt_msi_tph_st_tag;
logic [7:0] cfg_interrupt_msi_function_number;
logic mac_tx_clk[PORTS];
logic mac_tx_rst[PORTS];
taxi_axis_if #(
.DATA_W(MAC_DATA_W),
.ID_W(8),
.USER_EN(1),
.USER_W(1)
) mac_axis_tx[PORTS]();
logic mac_rx_clk[PORTS];
logic mac_rx_rst[PORTS];
taxi_axis_if #(
.DATA_W(96),
.KEEP_W(1),
.ID_W(8)
) mac_axis_tx_cpl[PORTS]();
taxi_axis_if #(
.DATA_W(MAC_DATA_W),
.ID_W(8),
.USER_EN(1),
.USER_W(1)
) mac_axis_rx[PORTS]();
cndm_proto_pcie_us #(
.SIM(SIM),
.VENDOR(VENDOR),
.FAMILY(FAMILY),
.PORTS(PORTS),
.RQ_SEQ_NUM_W(RQ_SEQ_NUM_W),
.BAR0_APERTURE(BAR0_APERTURE)
)
uut (
/*
* PCIe
*/
.pcie_clk(pcie_clk),
.pcie_rst(pcie_rst),
.s_axis_pcie_cq(s_axis_pcie_cq),
.m_axis_pcie_cc(m_axis_pcie_cc),
.m_axis_pcie_rq(m_axis_pcie_rq),
.s_axis_pcie_rc(s_axis_pcie_rc),
.pcie_rq_seq_num0(pcie_rq_seq_num0),
.pcie_rq_seq_num_vld0(pcie_rq_seq_num_vld0),
.pcie_rq_seq_num1(pcie_rq_seq_num1),
.pcie_rq_seq_num_vld1(pcie_rq_seq_num_vld1),
.cfg_max_payload(cfg_max_payload),
.cfg_max_read_req(cfg_max_read_req),
.cfg_rcb_status(cfg_rcb_status),
.cfg_mgmt_addr(cfg_mgmt_addr),
.cfg_mgmt_function_number(cfg_mgmt_function_number),
.cfg_mgmt_write(cfg_mgmt_write),
.cfg_mgmt_write_data(cfg_mgmt_write_data),
.cfg_mgmt_byte_enable(cfg_mgmt_byte_enable),
.cfg_mgmt_read(cfg_mgmt_read),
.cfg_mgmt_read_data(cfg_mgmt_read_data),
.cfg_mgmt_read_write_done(cfg_mgmt_read_write_done),
.cfg_fc_ph(cfg_fc_ph),
.cfg_fc_pd(cfg_fc_pd),
.cfg_fc_nph(cfg_fc_nph),
.cfg_fc_npd(cfg_fc_npd),
.cfg_fc_cplh(cfg_fc_cplh),
.cfg_fc_cpld(cfg_fc_cpld),
.cfg_fc_sel(cfg_fc_sel),
.cfg_interrupt_msi_enable(cfg_interrupt_msi_enable),
.cfg_interrupt_msi_mmenable(cfg_interrupt_msi_mmenable),
.cfg_interrupt_msi_mask_update(cfg_interrupt_msi_mask_update),
.cfg_interrupt_msi_data(cfg_interrupt_msi_data),
.cfg_interrupt_msi_select(cfg_interrupt_msi_select),
.cfg_interrupt_msi_int(cfg_interrupt_msi_int),
.cfg_interrupt_msi_pending_status(cfg_interrupt_msi_pending_status),
.cfg_interrupt_msi_pending_status_data_enable(cfg_interrupt_msi_pending_status_data_enable),
.cfg_interrupt_msi_pending_status_function_num(cfg_interrupt_msi_pending_status_function_num),
.cfg_interrupt_msi_sent(cfg_interrupt_msi_sent),
.cfg_interrupt_msi_fail(cfg_interrupt_msi_fail),
.cfg_interrupt_msi_attr(cfg_interrupt_msi_attr),
.cfg_interrupt_msi_tph_present(cfg_interrupt_msi_tph_present),
.cfg_interrupt_msi_tph_type(cfg_interrupt_msi_tph_type),
.cfg_interrupt_msi_tph_st_tag(cfg_interrupt_msi_tph_st_tag),
.cfg_interrupt_msi_function_number(cfg_interrupt_msi_function_number),
/*
* Ethernet: SFP+
*/
.mac_tx_clk(mac_tx_clk),
.mac_tx_rst(mac_tx_rst),
.mac_axis_tx(mac_axis_tx),
.mac_axis_tx_cpl(mac_axis_tx_cpl),
.mac_rx_clk(mac_rx_clk),
.mac_rx_rst(mac_rx_rst),
.mac_axis_rx(mac_axis_rx)
);
endmodule
`resetall