Compare commits

...

7 Commits

Author SHA1 Message Date
Alex Forencich
962950a1e3 cndm: Use event queues in driver
Signed-off-by: Alex Forencich <alex@alexforencich.com>
2026-03-10 13:06:40 -07:00
Alex Forencich
d7eb1b21a2 cndm: Use event queues in driver model
Signed-off-by: Alex Forencich <alex@alexforencich.com>
2026-03-10 00:58:52 -07:00
Alex Forencich
c7279a1ea2 cndm: Add support for event queues
Signed-off-by: Alex Forencich <alex@alexforencich.com>
2026-03-10 00:57:20 -07:00
Alex Forencich
ed61857bc3 cndm: Move interrupt handling out of CQ
Signed-off-by: Alex Forencich <alex@alexforencich.com>
2026-03-09 22:23:56 -07:00
Alex Forencich
2ea3c204de cndm: Rework queue notification mechanism to eliminate ream race
Signed-off-by: Alex Forencich <alex@alexforencich.com>
2026-03-09 21:41:36 -07:00
Alex Forencich
c37d967de9 cndm: Add consumer pointer and arm bit to completion queue
Signed-off-by: Alex Forencich <alex@alexforencich.com>
2026-03-09 14:50:07 -07:00
Alex Forencich
e514869d70 cndm: Clean up resource handling in datapath manager
Signed-off-by: Alex Forencich <alex@alexforencich.com>
2026-03-09 00:56:27 -07:00
15 changed files with 873 additions and 185 deletions

View File

@@ -12,6 +12,7 @@ cndm-y += cndm_dev.o
cndm-y += cndm_netdev.o
cndm-y += cndm_ethtool.o
cndm-y += cndm_ptp.o
cndm-y += cndm_eq.o
cndm-y += cndm_cq.o
cndm-y += cndm_sq.o
cndm-y += cndm_rq.o

View File

@@ -173,6 +173,9 @@ struct cndm_cq {
int enabled;
struct cndm_ring *src_ring;
struct cndm_eq *eq;
struct cndm_irq *irq;
struct notifier_block irq_nb;
void (*handler)(struct cndm_cq *cq);
@@ -180,6 +183,35 @@ struct cndm_cq {
u8 __iomem *db_addr;
};
struct cndm_eq {
u32 cons_ptr;
u32 size;
u32 size_mask;
u32 stride;
size_t buf_size;
u8 *buf;
dma_addr_t buf_dma_addr;
struct device *dev;
struct cndm_dev *cdev;
struct cndm_priv *priv;
int eqn;
int enabled;
struct cndm_irq *irq;
struct notifier_block irq_nb;
void (*handler)(struct cndm_eq *eq);
spinlock_t table_lock;
struct radix_tree_root cq_table;
u32 db_offset;
u8 __iomem *db_addr;
};
struct cndm_priv {
struct device *dev;
struct net_device *ndev;
@@ -190,14 +222,13 @@ struct cndm_priv {
void __iomem *hw_addr;
struct cndm_irq *irq;
struct notifier_block irq_nb;
struct hwtstamp_config hwts_config;
int rxq_count;
int txq_count;
struct cndm_eq *eq;
struct cndm_ring *txq;
struct cndm_ring *rxq;
};
@@ -230,17 +261,32 @@ ktime_t cndm_read_cpl_ts(struct cndm_ring *ring, const struct cndm_cpl *cpl);
void cndm_register_phc(struct cndm_dev *cdev);
void cndm_unregister_phc(struct cndm_dev *cdev);
// cndm_eq.c
struct cndm_eq *cndm_create_eq(struct cndm_priv *priv);
void cndm_destroy_eq(struct cndm_eq *eq);
int cndm_open_eq(struct cndm_eq *eq, struct cndm_irq *irq, int size);
void cndm_close_eq(struct cndm_eq *eq);
int cndm_eq_attach_cq(struct cndm_eq *eq, struct cndm_cq *cq);
void cndm_eq_detach_cq(struct cndm_eq *eq, struct cndm_cq *cq);
void cndm_eq_write_cons_ptr(const struct cndm_eq *eq);
void cndm_eq_write_cons_ptr_arm(const struct cndm_eq *eq);
// cndm_cq.c
struct cndm_cq *cndm_create_cq(struct cndm_priv *priv);
void cndm_destroy_cq(struct cndm_cq *cq);
int cndm_open_cq(struct cndm_cq *cq, int irqn, int size);
int cndm_open_cq(struct cndm_cq *cq, struct cndm_eq *eq, struct cndm_irq *irq, int size);
void cndm_close_cq(struct cndm_cq *cq);
void cndm_cq_write_cons_ptr(const struct cndm_cq *cq);
void cndm_cq_write_cons_ptr_arm(const struct cndm_cq *cq);
// cndm_sq.c
struct cndm_ring *cndm_create_sq(struct cndm_priv *priv);
void cndm_destroy_sq(struct cndm_ring *sq);
int cndm_open_sq(struct cndm_ring *sq, struct cndm_priv *priv, struct cndm_cq *cq, int size);
void cndm_close_sq(struct cndm_ring *sq);
bool cndm_is_sq_ring_empty(const struct cndm_ring *sq);
bool cndm_is_sq_ring_full(const struct cndm_ring *sq);
void cndm_sq_write_prod_ptr(const struct cndm_ring *sq);
int cndm_free_tx_buf(struct cndm_ring *sq);
int cndm_poll_tx_cq(struct napi_struct *napi, int budget);
int cndm_start_xmit(struct sk_buff *skb, struct net_device *ndev);
@@ -250,6 +296,9 @@ struct cndm_ring *cndm_create_rq(struct cndm_priv *priv);
void cndm_destroy_rq(struct cndm_ring *rq);
int cndm_open_rq(struct cndm_ring *rq, struct cndm_priv *priv, struct cndm_cq *cq, int size);
void cndm_close_rq(struct cndm_ring *rq);
bool cndm_is_rq_ring_empty(const struct cndm_ring *rq);
bool cndm_is_rq_ring_full(const struct cndm_ring *rq);
void cndm_rq_write_prod_ptr(const struct cndm_ring *rq);
int cndm_free_rx_buf(struct cndm_ring *rq);
int cndm_refill_rx_buffers(struct cndm_ring *rq);
int cndm_poll_rx_cq(struct napi_struct *napi, int budget);

View File

@@ -10,6 +10,8 @@ Authors:
#include "cndm.h"
static int cndm_cq_int(struct notifier_block *nb, unsigned long action, void *data);
struct cndm_cq *cndm_create_cq(struct cndm_priv *priv)
{
struct cndm_cq *cq;
@@ -25,6 +27,8 @@ struct cndm_cq *cndm_create_cq(struct cndm_priv *priv)
cq->cqn = -1;
cq->enabled = 0;
cq->irq_nb.notifier_call = cndm_cq_int;
cq->cons_ptr = 0;
cq->db_offset = 0;
@@ -40,14 +44,15 @@ void cndm_destroy_cq(struct cndm_cq *cq)
kfree(cq);
}
int cndm_open_cq(struct cndm_cq *cq, int irqn, int size)
int cndm_open_cq(struct cndm_cq *cq, struct cndm_eq *eq, struct cndm_irq *irq, int size)
{
u32 dqn;
int ret = 0;
struct cndm_cmd_queue cmd;
struct cndm_cmd_queue rsp;
if (cq->enabled || cq->buf)
if (cq->enabled || cq->buf || (!irq && !eq))
return -EINVAL;
cq->size = roundup_pow_of_two(size);
@@ -59,6 +64,19 @@ int cndm_open_cq(struct cndm_cq *cq, int irqn, int size)
if (!cq->buf)
return -ENOMEM;
// can use either EQ or IRQ, but prefer EQ if both are specified
if (eq) {
cq->eq = eq;
dqn = eq->eqn;
} else if (irq) {
ret = atomic_notifier_chain_register(&irq->nh, &cq->irq_nb);
if (ret)
goto fail;
cq->irq = irq;
dqn = irq->index | 0x80000000;
}
cq->cons_ptr = 0;
// clear all phase tag bits
@@ -68,7 +86,7 @@ int cndm_open_cq(struct cndm_cq *cq, int irqn, int size)
cmd.flags = 0x00000000;
cmd.port = cq->priv->ndev->dev_port;
cmd.qn = 0;
cmd.qn2 = irqn; // TODO
cmd.qn2 = dqn;
cmd.pd = 0;
cmd.size = ilog2(cq->size);
cmd.dboffs = 0;
@@ -87,8 +105,14 @@ int cndm_open_cq(struct cndm_cq *cq, int irqn, int size)
cq->db_offset = rsp.dboffs;
cq->db_addr = cq->cdev->hw_addr + rsp.dboffs;
if (eq) {
cndm_eq_attach_cq(eq, cq);
}
cq->enabled = 1;
cndm_cq_write_cons_ptr_arm(cq);
netdev_dbg(cq->priv->ndev, "Opened CQ %d", cq->cqn);
return 0;
@@ -119,9 +143,39 @@ void cndm_close_cq(struct cndm_cq *cq)
cq->db_addr = NULL;
}
if (cq->eq) {
cndm_eq_detach_cq(cq->eq, cq);
cq->eq = NULL;
}
if (cq->irq) {
atomic_notifier_chain_unregister(&cq->irq->nh, &cq->irq_nb);
cq->irq = NULL;
}
if (cq->buf) {
dma_free_coherent(cq->dev, cq->buf_size, cq->buf, cq->buf_dma_addr);
cq->buf = NULL;
cq->buf_dma_addr = 0;
}
}
void cndm_cq_write_cons_ptr(const struct cndm_cq *cq)
{
iowrite32(cq->cons_ptr & 0xffff, cq->db_addr);
}
void cndm_cq_write_cons_ptr_arm(const struct cndm_cq *cq)
{
iowrite32((cq->cons_ptr & 0xffff) | 0x80000000, cq->db_addr);
}
static int cndm_cq_int(struct notifier_block *nb, unsigned long action, void *data)
{
struct cndm_cq *cq = container_of(nb, struct cndm_cq, irq_nb);
if (likely(cq->handler))
cq->handler(cq);
return NOTIFY_DONE;
}

View File

@@ -202,14 +202,10 @@ struct cndm_cpl {
};
struct cndm_event {
__le16 rsvd0;
__le16 type;
__le16 source;
__le32 rsvd0;
__le32 source;
__le32 rsvd1;
__le32 rsvd2;
__le32 rsvd3;
__le32 rsvd4;
__le32 rsvd5;
__le32 phase;
};

View File

@@ -38,7 +38,7 @@ static int cndm_open(struct net_device *ndev)
goto fail;
}
ret = cndm_open_cq(cq, 0, 256);
ret = cndm_open_cq(cq, priv->eq, &priv->cdev->irq[0], 256);
if (ret) {
cndm_destroy_cq(cq);
goto fail;
@@ -75,7 +75,7 @@ static int cndm_open(struct net_device *ndev)
goto fail;
}
ret = cndm_open_cq(cq, 0, 256);
ret = cndm_open_cq(cq, priv->eq, &priv->cdev->irq[0], 256);
if (ret) {
cndm_destroy_cq(cq);
goto fail;
@@ -278,20 +278,6 @@ static const struct net_device_ops cndm_netdev_ops = {
#endif
};
static int cndm_netdev_irq(struct notifier_block *nb, unsigned long action, void *data)
{
struct cndm_priv *priv = container_of(nb, struct cndm_priv, irq_nb);
netdev_dbg(priv->ndev, "Interrupt");
if (priv->port_up) {
napi_schedule_irqoff(&priv->txq->cq->napi);
napi_schedule_irqoff(&priv->rxq->cq->napi);
}
return NOTIFY_DONE;
}
struct net_device *cndm_create_netdev(struct cndm_dev *cdev, int port)
{
struct device *dev = cdev->dev;
@@ -340,6 +326,19 @@ struct net_device *cndm_create_netdev(struct cndm_dev *cdev, int port)
ndev->min_mtu = ETH_MIN_MTU;
ndev->max_mtu = 1500;
priv->eq = cndm_create_eq(priv);
if (IS_ERR_OR_NULL(priv->eq)) {
ret = PTR_ERR(priv->eq);
priv->eq = NULL;
goto fail;
}
ret = cndm_open_eq(priv->eq, &priv->cdev->irq[0], 256);
if (ret) {
cndm_destroy_eq(priv->eq);
goto fail;
}
netif_carrier_off(ndev);
ret = register_netdev(ndev);
@@ -350,14 +349,6 @@ struct net_device *cndm_create_netdev(struct cndm_dev *cdev, int port)
priv->registered = 1;
priv->irq_nb.notifier_call = cndm_netdev_irq;
priv->irq = &cdev->irq[port % cdev->irq_count];
ret = atomic_notifier_chain_register(&priv->irq->nh, &priv->irq_nb);
if (ret) {
priv->irq = NULL;
goto fail;
}
return ndev;
fail:
@@ -372,10 +363,8 @@ void cndm_destroy_netdev(struct net_device *ndev)
if (priv->registered)
unregister_netdev(ndev);
if (priv->irq)
atomic_notifier_chain_unregister(&priv->irq->nh, &priv->irq_nb);
priv->irq = NULL;
if (priv->eq)
cndm_destroy_eq(priv->eq);
free_netdev(ndev);
}

View File

@@ -10,6 +10,8 @@ Authors:
#include "cndm.h"
static void cndm_rq_cq_handler(struct cndm_cq *cq);
struct cndm_ring *cndm_create_rq(struct cndm_priv *priv)
{
struct cndm_ring *rq;
@@ -69,7 +71,7 @@ int cndm_open_rq(struct cndm_ring *rq, struct cndm_priv *priv, struct cndm_cq *c
rq->priv = priv;
rq->cq = cq;
cq->src_ring = rq;
// cq->handler = cndm_rx_irq;
cq->handler = cndm_rq_cq_handler;
rq->prod_ptr = 0;
rq->cons_ptr = 0;
@@ -163,6 +165,21 @@ void cndm_close_rq(struct cndm_ring *rq)
rq->priv = NULL;
}
bool cndm_is_rq_ring_empty(const struct cndm_ring *rq)
{
return rq->prod_ptr == rq->cons_ptr;
}
bool cndm_is_rq_ring_full(const struct cndm_ring *rq)
{
return (rq->prod_ptr - rq->cons_ptr) >= rq->size;
}
void cndm_rq_write_prod_ptr(const struct cndm_ring *rq)
{
iowrite32(rq->prod_ptr & 0xffff, rq->db_addr);
}
static void cndm_free_rx_desc(struct cndm_ring *rq, int index)
{
struct cndm_priv *priv = rq->priv;
@@ -185,7 +202,7 @@ int cndm_free_rx_buf(struct cndm_ring *rq)
u32 index;
int cnt = 0;
while (rq->prod_ptr != rq->cons_ptr) {
while (!cndm_is_rq_ring_empty(rq)) {
index = rq->cons_ptr & rq->size_mask;
cndm_free_rx_desc(rq, index);
rq->cons_ptr++;
@@ -247,7 +264,7 @@ int cndm_refill_rx_buffers(struct cndm_ring *rq)
}
dma_wmb();
iowrite32(rq->prod_ptr & 0xffff, rq->db_addr);
cndm_rq_write_prod_ptr(rq);
return ret;
}
@@ -332,10 +349,16 @@ rx_drop:
rq->cons_ptr = cons_ptr;
cndm_refill_rx_buffers(rq);
cndm_cq_write_cons_ptr(cq);
return done;
}
static void cndm_rq_cq_handler(struct cndm_cq *cq)
{
napi_schedule_irqoff(&cq->napi);
}
int cndm_poll_rx_cq(struct napi_struct *napi, int budget)
{
struct cndm_cq *cq = container_of(napi, struct cndm_cq, napi);
@@ -348,7 +371,7 @@ int cndm_poll_rx_cq(struct napi_struct *napi, int budget)
napi_complete(napi);
// TODO re-enable interrupts
cndm_cq_write_cons_ptr_arm(cq);
return done;
}

View File

@@ -10,6 +10,8 @@ Authors:
#include "cndm.h"
static void cndm_sq_cq_handler(struct cndm_cq *cq);
struct cndm_ring *cndm_create_sq(struct cndm_priv *priv)
{
struct cndm_ring *sq;
@@ -69,7 +71,7 @@ int cndm_open_sq(struct cndm_ring *sq, struct cndm_priv *priv, struct cndm_cq *c
sq->priv = priv;
sq->cq = cq;
cq->src_ring = sq;
// cq->handler = cndm_tx_irq;
cq->handler = cndm_sq_cq_handler;
sq->prod_ptr = 0;
sq->cons_ptr = 0;
@@ -152,6 +154,21 @@ void cndm_close_sq(struct cndm_ring *sq)
sq->priv = NULL;
}
bool cndm_is_sq_ring_empty(const struct cndm_ring *sq)
{
return sq->prod_ptr == sq->cons_ptr;
}
bool cndm_is_sq_ring_full(const struct cndm_ring *sq)
{
return (sq->prod_ptr - sq->cons_ptr) >= sq->size;
}
void cndm_sq_write_prod_ptr(const struct cndm_ring *sq)
{
iowrite32(sq->prod_ptr & 0xffff, sq->db_addr);
}
static void cndm_free_tx_desc(struct cndm_ring *sq, int index, int napi_budget)
{
struct cndm_priv *priv = sq->priv;
@@ -173,7 +190,7 @@ int cndm_free_tx_buf(struct cndm_ring *sq)
u32 index;
int cnt = 0;
while (sq->prod_ptr != sq->cons_ptr) {
while (!cndm_is_sq_ring_empty(sq)) {
index = sq->cons_ptr & sq->size_mask;
cndm_free_tx_desc(sq, index, 0);
sq->cons_ptr++;
@@ -229,12 +246,19 @@ static int cndm_process_tx_cq(struct cndm_cq *cq, int napi_budget)
cq->cons_ptr = cq_cons_ptr;
sq->cons_ptr = cons_ptr;
cndm_cq_write_cons_ptr(cq);
if (netif_tx_queue_stopped(sq->tx_queue) && (done != 0 || sq->prod_ptr == sq->cons_ptr))
netif_tx_wake_queue(sq->tx_queue);
return done;
}
static void cndm_sq_cq_handler(struct cndm_cq *cq)
{
napi_schedule_irqoff(&cq->napi);
}
int cndm_poll_tx_cq(struct napi_struct *napi, int budget)
{
struct cndm_cq *cq = container_of(napi, struct cndm_cq, napi);
@@ -247,7 +271,7 @@ int cndm_poll_tx_cq(struct napi_struct *napi, int budget)
napi_complete(napi);
// TODO re-enable interrupts
cndm_cq_write_cons_ptr_arm(cq);
return done;
}
@@ -315,7 +339,7 @@ int cndm_start_xmit(struct sk_buff *skb, struct net_device *ndev)
}
dma_wmb();
iowrite32(sq->prod_ptr & 0xffff, sq->db_addr);
cndm_sq_write_prod_ptr(sq);
return NETDEV_TX_OK;

View File

@@ -335,8 +335,10 @@ cndm_micro_dp_mgr #(
.SYS_CLK_PER_NS_DEN(SYS_CLK_PER_NS_DEN),
// Queue configuration
.WQN_W(WQN_W),
.EQN_W(CQN_W),
.CQN_W(CQN_W),
.SQN_W(WQN_W),
.RQN_W(WQN_W),
// PTP configuration
.PTP_EN(PTP_TS_EN),

View File

@@ -335,8 +335,10 @@ cndm_micro_dp_mgr #(
.SYS_CLK_PER_NS_DEN(SYS_CLK_PER_NS_DEN),
// Queue configuration
.WQN_W(WQN_W),
.EQN_W(CQN_W),
.CQN_W(CQN_W),
.SQN_W(WQN_W),
.RQN_W(WQN_W),
// PTP configuration
.PTP_EN(PTP_TS_EN),

View File

@@ -16,7 +16,10 @@ Authors:
* Corundum-micro completion write module
*/
module cndm_micro_cpl_wr #(
parameter CQN_W = 5
parameter CQN_W = 5,
parameter logic IS_CQ = 1'b1,
parameter logic IS_EQ = 1'b0,
parameter logic CQ_IRQ = IS_CQ
)
(
input wire logic clk,
@@ -45,28 +48,54 @@ module cndm_micro_cpl_wr #(
*/
taxi_axis_if.src m_axis_irq,
taxi_axis_if.snk s_axis_cpl
/*
* Completion input
*/
taxi_axis_if.snk s_axis_cpl,
/*
* Event input
*/
taxi_axis_if.snk s_axis_event,
/*
* Event output
*/
taxi_axis_if.src m_axis_event
);
localparam DMA_ADDR_W = dma_wr_desc_req.DST_ADDR_W;
localparam IRQN_W = m_axis_irq.DATA_W;
typedef enum logic [2:0] {
QTYPE_EQ,
QTYPE_CQ,
QTYPE_SQ,
QTYPE_RQ
} qtype_t;
logic [CQN_W-1:0] cq_req_cqn_reg = '0;
logic [2:0] cq_req_qtype_reg = '0;
logic cq_req_valid_reg = 1'b0;
logic cq_req_ready;
logic [IRQN_W-1:0] cq_rsp_irqn;
logic [DMA_ADDR_W-1:0] cq_rsp_addr;
logic cq_rsp_phase_tag;
logic cq_rsp_error;
logic cq_rsp_valid;
logic cq_rsp_ready_reg = 1'b0;
logic [CQN_W-1:0] notify_req_qn_reg = '0;
logic notify_req_valid_reg = 1'b0;
logic notify_req_ready;
cndm_micro_queue_state #(
.QN_W(CQN_W),
.DQN_W(IRQN_W),
.IS_CQ(1),
.QTYPE_EN(0),
.IS_CQ(IS_CQ || !IS_EQ),
.IS_EQ(IS_EQ),
.CQ_IRQ(CQ_IRQ),
.QTYPE_EN(IS_CQ && IS_EQ),
.QE_SIZE(16),
.DMA_ADDR_W(DMA_ADDR_W)
)
@@ -89,16 +118,33 @@ cq_mgr_inst (
* Queue management interface
*/
.req_qn(cq_req_cqn_reg),
.req_qtype('0),
.req_qtype(cq_req_qtype_reg),
.req_valid(cq_req_valid_reg),
.req_ready(cq_req_ready),
.rsp_qn(),
.rsp_dqn(cq_rsp_irqn),
.rsp_dqn(),
.rsp_addr(cq_rsp_addr),
.rsp_phase_tag(cq_rsp_phase_tag),
.rsp_error(cq_rsp_error),
.rsp_valid(cq_rsp_valid),
.rsp_ready(cq_rsp_ready_reg)
.rsp_ready(cq_rsp_ready_reg),
/*
* Notification interface
*/
.notify_req_qn(notify_req_qn_reg),
.notify_req_valid(notify_req_valid_reg),
.notify_req_ready(notify_req_ready),
/*
* Interrupts
*/
.m_axis_irq(m_axis_irq),
/*
* Event output
*/
.m_axis_event(m_axis_event)
);
typedef enum logic [1:0] {
@@ -111,20 +157,11 @@ state_t state_reg = STATE_IDLE;
logic phase_tag_reg = 1'b0;
logic [IRQN_W-1:0] m_axis_irq_irqn_reg = '0;
logic m_axis_irq_tvalid_reg = 1'b0;
assign m_axis_irq.tdata = m_axis_irq_irqn_reg;
assign m_axis_irq.tkeep = '1;
assign m_axis_irq.tstrb = m_axis_irq.tkeep;
assign m_axis_irq.tvalid = m_axis_irq_tvalid_reg;
assign m_axis_irq.tlast = 1'b1;
assign m_axis_irq.tid = '0;
assign m_axis_irq.tdest = '0;
assign m_axis_irq.tuser = '0;
logic [127:0] data_reg = '0;
always_ff @(posedge clk) begin
s_axis_cpl.tready <= 1'b0;
s_axis_event.tready <= 1'b0;
dma_wr_desc_req.req_src_sel <= '0;
dma_wr_desc_req.req_src_asid <= '0;
@@ -142,16 +179,27 @@ always_ff @(posedge clk) begin
cq_req_valid_reg <= cq_req_valid_reg && !cq_req_ready;
cq_rsp_ready_reg <= 1'b0;
m_axis_irq_tvalid_reg <= m_axis_irq_tvalid_reg && !m_axis_irq.tready;
notify_req_valid_reg <= notify_req_valid_reg && !notify_req_ready;
case (state_reg)
STATE_IDLE: begin
dma_wr_desc_req.req_src_addr <= '0;
cq_req_cqn_reg <= s_axis_cpl.tdest;
if (s_axis_cpl.tvalid && !s_axis_cpl.tready) begin
if (IS_EQ && s_axis_event.tvalid && !s_axis_event.tready && (!notify_req_valid_reg || notify_req_ready)) begin
data_reg <= s_axis_event.tdata;
cq_req_cqn_reg <= s_axis_event.tdest;
cq_req_qtype_reg <= QTYPE_EQ;
cq_req_valid_reg <= 1'b1;
notify_req_qn_reg <= s_axis_event.tdest;
s_axis_event.tready <= 1'b1;
state_reg <= STATE_QUERY_CQ;
end else if ((IS_CQ || !IS_EQ) && s_axis_cpl.tvalid && !s_axis_cpl.tready && (!notify_req_valid_reg || notify_req_ready)) begin
data_reg <= s_axis_cpl.tdata;
cq_req_cqn_reg <= s_axis_cpl.tdest;
cq_req_qtype_reg <= QTYPE_CQ;
cq_req_valid_reg <= 1'b1;
notify_req_qn_reg <= s_axis_cpl.tdest;
s_axis_cpl.tready <= 1'b1;
state_reg <= STATE_QUERY_CQ;
end else begin
state_reg <= STATE_IDLE;
@@ -164,13 +212,11 @@ always_ff @(posedge clk) begin
if (cq_rsp_valid && cq_rsp_ready_reg) begin
cq_rsp_ready_reg <= 1'b0;
m_axis_irq_irqn_reg <= cq_rsp_irqn;
dma_wr_desc_req.req_dst_addr <= cq_rsp_addr;
phase_tag_reg <= cq_rsp_phase_tag;
if (cq_rsp_error) begin
// drop completion
s_axis_cpl.tready <= 1'b1;
state_reg <= STATE_IDLE;
end else begin
dma_wr_desc_req.req_valid <= 1'b1;
@@ -180,8 +226,7 @@ always_ff @(posedge clk) begin
end
STATE_WRITE_DATA: begin
if (dma_wr_desc_sts.sts_valid) begin
s_axis_cpl.tready <= 1'b1;
m_axis_irq_tvalid_reg <= 1'b1;
notify_req_valid_reg <= 1'b1;
state_reg <= STATE_IDLE;
end
end
@@ -194,7 +239,6 @@ always_ff @(posedge clk) begin
state_reg <= STATE_IDLE;
cq_req_valid_reg <= 1'b0;
cq_rsp_ready_reg <= 1'b0;
m_axis_irq_tvalid_reg <= 1'b0;
end
end
@@ -207,7 +251,7 @@ localparam SEG_BE_W = dma_ram_rd.SEG_BE_W;
if (SEGS*SEG_DATA_W < 128)
$fatal(0, "Total segmented interface width must be at least 128 (instance %m)");
wire [SEGS-1:0][SEG_DATA_W-1:0] ram_data = (SEG_DATA_W*SEGS)'({phase_tag_reg, s_axis_cpl.tdata[126:0]});
wire [SEGS-1:0][SEG_DATA_W-1:0] ram_data = (SEG_DATA_W*SEGS)'({phase_tag_reg, data_reg[126:0]});
for (genvar n = 0; n < SEGS; n = n + 1) begin

View File

@@ -66,10 +66,15 @@ logic wq_rsp_error;
logic wq_rsp_valid;
logic wq_rsp_ready_reg = 1'b0;
taxi_axis_if axis_irq_stub();
taxi_axis_if axis_event_stub();
cndm_micro_queue_state #(
.QN_W(WQN_W),
.DQN_W(CQN_W),
.IS_CQ(0),
.IS_EQ(0),
.CQ_IRQ(0),
.QTYPE_EN(1),
.QE_SIZE(16),
.DMA_ADDR_W(DMA_ADDR_W)
@@ -102,7 +107,24 @@ wq_mgr_inst (
.rsp_phase_tag(),
.rsp_error(wq_rsp_error),
.rsp_valid(wq_rsp_valid),
.rsp_ready(wq_rsp_ready_reg)
.rsp_ready(wq_rsp_ready_reg),
/*
* Notification interface
*/
.notify_req_qn('0),
.notify_req_valid(1'b0),
.notify_req_ready(),
/*
* Interrupts
*/
.m_axis_irq(axis_irq_stub),
/*
* Event output
*/
.m_axis_event(axis_event_stub)
);
taxi_dma_desc_if #(

View File

@@ -33,8 +33,15 @@ module cndm_micro_dp_mgr #
parameter SYS_CLK_PER_NS_DEN = 1,
// Queue configuration
parameter WQN_W = 5,
parameter CQN_W = WQN_W,
parameter EQN_W = 5,
parameter CQN_W = 5,
parameter SQN_W = 5,
parameter RQN_W = 5,
parameter EQE_VER = 1,
parameter CQE_VER = 1,
parameter SQE_VER = 1,
parameter RQE_VER = 1,
parameter LOG_MAX_WQ_SZ = 15,
// PTP configuration
parameter logic PTP_EN = 1'b1,
@@ -47,8 +54,10 @@ module cndm_micro_dp_mgr #
parameter PORT_BASE_ADDR_HOST = 0,
parameter PORT_STRIDE = 'h10000,
parameter WQ_REG_STRIDE = 32,
parameter QM_OFFSET = 'h0000,
parameter EQM_OFFSET = 'h4000,
parameter CQM_OFFSET = 'h4000,
parameter SQM_OFFSET = 'h0000,
parameter RQM_OFFSET = 'h0000,
parameter PORT_CTRL_OFFSET = 'h8000
)
(
@@ -166,6 +175,13 @@ wire [31:0] cmd_ram_rd_data = cmd_ram[cmd_ram_rd_addr];
// ID ROM
localparam ID_PAGES = 3;
localparam ID_AW = $clog2((ID_PAGES+1)*8);
// detect sharing between EQ/CQ and SQ/RQ (same queue manager)
localparam EQ_POOL = 1;
localparam CQ_POOL = EQ_POOL + (EQM_OFFSET == CQM_OFFSET ? 0 : 1);
localparam SQ_POOL = CQ_POOL + 1;
localparam RQ_POOL = SQ_POOL + (SQM_OFFSET == RQM_OFFSET ? 0 : 1);
logic [31:0] id_rom[(ID_PAGES+1)*8] = '{
// Common
0, // 0: status
@@ -213,28 +229,28 @@ logic [31:0] id_rom[(ID_PAGES+1)*8] = '{
0, // 23
// Page 2: Resources
{ // 24
8'd0, // [31:24] EQE_VER
8'd0, // [23:16] EQ_POOL
8'd0, // [15:8] LOG_MAX_EQ_SZ
8'd0 // [7:0] LOG_MAX_EQ
8'(EQE_VER), // [31:24] EQE_VER
8'(EQ_POOL), // [23:16] EQ_POOL
8'(LOG_MAX_WQ_SZ), // [15:8] LOG_MAX_EQ_SZ
8'(EQN_W) // [7:0] LOG_MAX_EQ
},
{ // 25
8'd1, // [31:24] CQE_VER
8'd0, // [23:16] CQ_POOL
8'd15, // [15:8] LOG_MAX_CQ_SZ
8'(CQE_VER), // [31:24] CQE_VER
8'(CQ_POOL), // [23:16] CQ_POOL
8'(LOG_MAX_WQ_SZ), // [15:8] LOG_MAX_CQ_SZ
8'(CQN_W) // [7:0] LOG_MAX_CQ
},
{ // 26
8'd1, // [31:24] SQE_VER
8'd1, // [23:16] SQ_POOL
8'd15, // [15:8] LOG_MAX_SQ_SZ
8'(WQN_W) // [7:0] LOG_MAX_SQ
8'(SQE_VER), // [31:24] SQE_VER
8'(SQ_POOL), // [23:16] SQ_POOL
8'(LOG_MAX_WQ_SZ), // [15:8] LOG_MAX_SQ_SZ
8'(SQN_W) // [7:0] LOG_MAX_SQ
},
{ // 27
8'd1, // [31:24] RQE_VER
8'd1, // [23:16] RQ_POOL
8'd15, // [15:8] LOG_MAX_RQ_SZ
8'(WQN_W) // [7:0] LOG_MAX_RQ
8'(RQE_VER), // [31:24] RQE_VER
8'(RQ_POOL), // [23:16] RQ_POOL
8'(LOG_MAX_WQ_SZ), // [15:8] LOG_MAX_RQ_SZ
8'(RQN_W) // [7:0] LOG_MAX_RQ
},
0, // 28
0, // 29
@@ -373,20 +389,22 @@ always_comb begin
// determine block base address
case (opcode_reg)
// // EQ
// CMD_OP_CREATE_EQ:
// begin
// dw3_next = 0;
// dp_ptr_next = DP_APB_ADDR_W'({dw2_reg, 16'd0} | 'h8000) + DP_APB_ADDR_W'(PORT_BASE_ADDR_DP);
// host_ptr_next = 32'({dw2_reg, 16'd0} | 'h8000) + PORT_BASE_ADDR_HOST;
// end
// CMD_OP_MODIFY_EQ,
// CMD_OP_QUERY_EQ,
// CMD_OP_DESTROY_EQ:
// begin
// dp_ptr_next = DP_APB_ADDR_W'({dw2_reg, 16'd0} | 'h8000) + DP_APB_ADDR_W'(PORT_BASE_ADDR_DP);
// host_ptr_next = 32'({dw2_reg, 16'd0} | 'h8000) + PORT_BASE_ADDR_HOST;
// end
// EQ
CMD_OP_CREATE_EQ:
begin
cnt_next = 2**EQN_W-1;
qtype_next = QTYPE_EQ;
dp_ptr_next = DP_APB_ADDR_W'((dw2_reg[15:0] * PORT_STRIDE) + EQM_OFFSET + PORT_BASE_ADDR_DP);
host_ptr_next = (dw2_reg[15:0] * PORT_STRIDE) + EQM_OFFSET + PORT_BASE_ADDR_HOST;
end
CMD_OP_MODIFY_EQ,
CMD_OP_QUERY_EQ,
CMD_OP_DESTROY_EQ:
begin
qtype_next = QTYPE_EQ;
dp_ptr_next = DP_APB_ADDR_W'((dw2_reg[15:0] * PORT_STRIDE) + EQM_OFFSET + (dw3_reg[15:0] * WQ_REG_STRIDE) + PORT_BASE_ADDR_DP);
host_ptr_next = (dw2_reg[15:0] * PORT_STRIDE) + EQM_OFFSET + (dw3_reg[15:0] * WQ_REG_STRIDE) + PORT_BASE_ADDR_HOST;
end
// CQ
CMD_OP_CREATE_CQ:
begin
@@ -406,34 +424,34 @@ always_comb begin
// SQ
CMD_OP_CREATE_SQ:
begin
cnt_next = 2**WQN_W-1;
cnt_next = 2**SQN_W-1;
qtype_next = QTYPE_SQ;
dp_ptr_next = DP_APB_ADDR_W'((dw2_reg[15:0] * PORT_STRIDE) + QM_OFFSET + PORT_BASE_ADDR_DP);
host_ptr_next = (dw2_reg[15:0] * PORT_STRIDE) + QM_OFFSET + PORT_BASE_ADDR_HOST;
dp_ptr_next = DP_APB_ADDR_W'((dw2_reg[15:0] * PORT_STRIDE) + SQM_OFFSET + PORT_BASE_ADDR_DP);
host_ptr_next = (dw2_reg[15:0] * PORT_STRIDE) + SQM_OFFSET + PORT_BASE_ADDR_HOST;
end
CMD_OP_MODIFY_SQ,
CMD_OP_QUERY_SQ,
CMD_OP_DESTROY_SQ:
begin
qtype_next = QTYPE_SQ;
dp_ptr_next = DP_APB_ADDR_W'((dw2_reg[15:0] * PORT_STRIDE) + QM_OFFSET + (dw3_reg[15:0] * WQ_REG_STRIDE) + PORT_BASE_ADDR_DP);
host_ptr_next = (dw2_reg[15:0] * PORT_STRIDE) + QM_OFFSET + (dw3_reg[15:0] * WQ_REG_STRIDE) + PORT_BASE_ADDR_HOST;
dp_ptr_next = DP_APB_ADDR_W'((dw2_reg[15:0] * PORT_STRIDE) + SQM_OFFSET + (dw3_reg[15:0] * WQ_REG_STRIDE) + PORT_BASE_ADDR_DP);
host_ptr_next = (dw2_reg[15:0] * PORT_STRIDE) + SQM_OFFSET + (dw3_reg[15:0] * WQ_REG_STRIDE) + PORT_BASE_ADDR_HOST;
end
// RQ
CMD_OP_CREATE_RQ:
begin
cnt_next = 2**WQN_W-1;
cnt_next = 2**RQN_W-1;
qtype_next = QTYPE_RQ;
dp_ptr_next = DP_APB_ADDR_W'((dw2_reg[15:0] * PORT_STRIDE) + QM_OFFSET + PORT_BASE_ADDR_DP);
host_ptr_next = (dw2_reg[15:0] * PORT_STRIDE) + QM_OFFSET + PORT_BASE_ADDR_HOST;
dp_ptr_next = DP_APB_ADDR_W'((dw2_reg[15:0] * PORT_STRIDE) + RQM_OFFSET + PORT_BASE_ADDR_DP);
host_ptr_next = (dw2_reg[15:0] * PORT_STRIDE) + RQM_OFFSET + PORT_BASE_ADDR_HOST;
end
CMD_OP_MODIFY_RQ,
CMD_OP_QUERY_RQ,
CMD_OP_DESTROY_RQ:
begin
qtype_next = QTYPE_RQ;
dp_ptr_next = DP_APB_ADDR_W'((dw2_reg[15:0] * PORT_STRIDE) + QM_OFFSET + (dw3_reg[15:0] * WQ_REG_STRIDE) + PORT_BASE_ADDR_DP);
host_ptr_next = (dw2_reg[15:0] * PORT_STRIDE) + QM_OFFSET + (dw3_reg[15:0] * WQ_REG_STRIDE) + PORT_BASE_ADDR_HOST;
dp_ptr_next = DP_APB_ADDR_W'((dw2_reg[15:0] * PORT_STRIDE) + RQM_OFFSET + (dw3_reg[15:0] * WQ_REG_STRIDE) + PORT_BASE_ADDR_DP);
host_ptr_next = (dw2_reg[15:0] * PORT_STRIDE) + RQM_OFFSET + (dw3_reg[15:0] * WQ_REG_STRIDE) + PORT_BASE_ADDR_HOST;
end
default: begin end
endcase
@@ -676,7 +694,11 @@ always_comb begin
// reset queue 2
// store doorbell offset
cmd_ram_wr_data = host_ptr_reg + 'h0008;
if (qtype_reg == QTYPE_SQ || qtype_reg == QTYPE_RQ) begin
cmd_ram_wr_data = host_ptr_reg + 'h0008;
end else begin
cmd_ram_wr_data = host_ptr_reg + 'h000C;
end
cmd_ram_wr_addr = 7;
cmd_ram_wr_en = 1'b1;

View File

@@ -483,8 +483,21 @@ cpl_mux_inst (
.m_axis(axis_cpl)
);
taxi_axis_if #(
.DATA_W(16*8),
.KEEP_EN(1),
.LAST_EN(1),
.ID_EN(0),
.DEST_EN(1),
.DEST_W(CQN_W),
.USER_EN(0)
) axis_event();
cndm_micro_cpl_wr #(
.CQN_W(CQN_W)
.CQN_W(CQN_W),
.IS_CQ(1),
.IS_EQ(1),
.CQ_IRQ(1)
)
cpl_wr_inst (
.clk(clk),
@@ -513,7 +526,20 @@ cpl_wr_inst (
*/
.m_axis_irq(m_axis_irq),
.s_axis_cpl(axis_cpl)
/*
* Completion input
*/
.s_axis_cpl(axis_cpl),
/*
* Event input
*/
.s_axis_event(axis_event),
/*
* Event output
*/
.m_axis_event(axis_event)
);
// TX path

View File

@@ -19,39 +19,58 @@ module cndm_micro_queue_state #(
parameter QN_W = 5,
parameter DQN_W = 5,
parameter logic IS_CQ = 1'b0,
parameter logic QTYPE_EN = !IS_CQ,
parameter logic IS_EQ = 1'b0,
parameter logic CQ_IRQ = IS_CQ,
parameter logic QTYPE_EN = 1'b1,
parameter QE_SIZE = 16,
parameter DMA_ADDR_W = 64
)
(
input wire logic clk,
input wire logic rst,
input wire logic clk,
input wire logic rst,
/*
* Control register interface
*/
taxi_axil_if.wr_slv s_axil_ctrl_wr,
taxi_axil_if.rd_slv s_axil_ctrl_rd,
taxi_axil_if.wr_slv s_axil_ctrl_wr,
taxi_axil_if.rd_slv s_axil_ctrl_rd,
/*
* Datapath control register interface
*/
taxi_apb_if.slv s_apb_dp_ctrl,
taxi_apb_if.slv s_apb_dp_ctrl,
/*
* Queue management interface
*/
input wire logic [QN_W-1:0] req_qn,
input wire logic [2:0] req_qtype,
input wire logic req_valid,
output wire logic req_ready,
output wire logic [QN_W-1:0] rsp_qn,
output wire logic [DQN_W-1:0] rsp_dqn,
output wire logic [DMA_ADDR_W-1:0] rsp_addr,
output wire logic rsp_phase_tag,
output wire logic rsp_error,
output wire logic rsp_valid,
input wire logic rsp_ready
input wire logic [QN_W-1:0] req_qn,
input wire logic [2:0] req_qtype,
input wire logic req_valid,
output wire logic req_ready,
output wire logic [QN_W-1:0] rsp_qn,
output wire logic [DQN_W-1:0] rsp_dqn,
output wire logic [DMA_ADDR_W-1:0] rsp_addr,
output wire logic rsp_phase_tag,
output wire logic rsp_error,
output wire logic rsp_valid,
input wire logic rsp_ready,
/*
* Notification interface
*/
input wire logic [QN_W-1:0] notify_req_qn,
input wire logic notify_req_valid,
output wire logic notify_req_ready,
/*
* Interrupts
*/
taxi_axis_if.src m_axis_irq,
/*
* Event output
*/
taxi_axis_if.src m_axis_event
);
localparam PTR_W = 16;
@@ -64,6 +83,12 @@ localparam AXIL_DATA_W = s_axil_ctrl_wr.DATA_W;
localparam APB_ADDR_W = s_apb_dp_ctrl.ADDR_W;
localparam APB_DATA_W = s_apb_dp_ctrl.DATA_W;
localparam IRQN_W = m_axis_irq.DATA_W;
localparam EQN_W = m_axis_event.DEST_W;
localparam EVENT_DATA_W = 64;
localparam EVENT_DEST_W = m_axis_event.DEST_W;
// check configuration
if (s_axil_ctrl_rd.DATA_W != 32 || s_axil_ctrl_wr.DATA_W != 32)
$fatal(0, "Error: AXI data width must be 32 (instance %m)");
@@ -77,6 +102,13 @@ if (s_apb_dp_ctrl.DATA_W != 32)
if (s_apb_dp_ctrl.ADDR_W < ADDR_W)
$fatal(0, "Error: APB address width is insufficient (instance %m)");
typedef enum logic [2:0] {
QTYPE_EQ,
QTYPE_CQ,
QTYPE_SQ,
QTYPE_RQ
} qtype_t;
logic s_axil_ctrl_awready_reg = 1'b0, s_axil_ctrl_awready_next;
logic s_axil_ctrl_wready_reg = 1'b0, s_axil_ctrl_wready_next;
logic s_axil_ctrl_bvalid_reg = 1'b0, s_axil_ctrl_bvalid_next;
@@ -130,8 +162,43 @@ assign rsp_phase_tag = rsp_phase_tag_reg;
assign rsp_error = rsp_error_reg;
assign rsp_valid = rsp_valid_reg;
logic notify_req_ready_reg = 1'b0, notify_req_ready_next;
assign notify_req_ready = notify_req_ready_reg;
logic [IRQN_W-1:0] m_axis_irq_irqn_reg = '0, m_axis_irq_irqn_next;
logic m_axis_irq_tvalid_reg = 1'b0, m_axis_irq_tvalid_next;
assign m_axis_irq.tdata = m_axis_irq_irqn_reg;
assign m_axis_irq.tkeep = '1;
assign m_axis_irq.tstrb = m_axis_irq.tkeep;
assign m_axis_irq.tvalid = m_axis_irq_tvalid_reg;
assign m_axis_irq.tlast = 1'b1;
assign m_axis_irq.tid = '0;
assign m_axis_irq.tdest = '0;
assign m_axis_irq.tuser = '0;
logic [EVENT_DATA_W-1:0] m_axis_event_tdata_reg = '0, m_axis_event_tdata_next;
logic [EQN_W-1:0] m_axis_event_tdest_reg = '0, m_axis_event_tdest_next;
logic m_axis_event_tvalid_reg = 1'b0, m_axis_event_tvalid_next;
assign m_axis_event.tdata = m_axis_event.DATA_W'(m_axis_event_tdata_reg);
assign m_axis_event.tkeep = '1;
assign m_axis_event.tstrb = m_axis_event.tkeep;
assign m_axis_event.tvalid = m_axis_event_tvalid_reg;
assign m_axis_event.tlast = 1'b1;
assign m_axis_event.tid = '0;
assign m_axis_event.tdest = m_axis_event_tdest_reg;
assign m_axis_event.tuser = '0;
logic [2**QN_W-1:0] queue_enable_reg = '0;
(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *)
logic queue_mem_arm[2**QN_W] = '{default: '0};
(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *)
logic queue_mem_fire[2**QN_W] = '{default: '0};
(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *)
logic queue_mem_cq_irq[2**QN_W] = '{default: '0};
(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *)
logic [2:0] queue_mem_qtype[2**QN_W] = '{default: '0};
(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *)
logic [DQN_W-1:0] queue_mem_dqn[2**QN_W] = '{default: '0};
@@ -148,6 +215,9 @@ logic queue_mem_wr_en;
logic [QN_W-1:0] queue_mem_addr;
wire queue_mem_rd_enable = queue_enable_reg[queue_mem_addr];
wire queue_mem_rd_arm = queue_mem_arm[queue_mem_addr];
wire queue_mem_rd_fire = queue_mem_fire[queue_mem_addr];
wire queue_mem_rd_cq_irq = queue_mem_cq_irq[queue_mem_addr];
wire [2:0] queue_mem_rd_qtype = queue_mem_qtype[queue_mem_addr];
wire [DQN_W-1:0] queue_mem_rd_dqn = queue_mem_dqn[queue_mem_addr];
wire [3:0] queue_mem_rd_log_size = queue_mem_log_size[queue_mem_addr];
@@ -155,7 +225,13 @@ wire [DMA_ADDR_W-1:0] queue_mem_rd_base_addr = queue_mem_base_addr[queue_mem_add
wire [PTR_W-1:0] queue_mem_rd_prod_ptr = queue_mem_prod_ptr[queue_mem_addr];
wire [PTR_W-1:0] queue_mem_rd_cons_ptr = queue_mem_cons_ptr[queue_mem_addr];
wire queue_mem_rd_status_empty = queue_mem_rd_prod_ptr == queue_mem_rd_cons_ptr;
wire queue_mem_rd_status_full = ($unsigned(queue_mem_rd_prod_ptr - queue_mem_rd_cons_ptr) & ({PTR_W{1'b1}} << queue_mem_rd_log_size)) != 0;
logic queue_mem_wr_enable;
logic queue_mem_wr_arm;
logic queue_mem_wr_fire;
logic queue_mem_wr_cq_irq;
logic [2:0] queue_mem_wr_qtype;
logic [DQN_W-1:0] queue_mem_wr_dqn;
logic [3:0] queue_mem_wr_log_size;
@@ -163,6 +239,8 @@ logic [DMA_ADDR_W-1:0] queue_mem_wr_base_addr;
logic [PTR_W-1:0] queue_mem_wr_prod_ptr;
logic [PTR_W-1:0] queue_mem_wr_cons_ptr;
logic [QN_W-1:0] scrub_ptr_reg = '0, scrub_ptr_next;
always_comb begin
s_axil_ctrl_awready_next = 1'b0;
s_axil_ctrl_wready_next = 1'b0;
@@ -183,10 +261,22 @@ always_comb begin
rsp_error_next = rsp_error_reg;
rsp_valid_next = rsp_valid_reg && !rsp_ready;
notify_req_ready_next = 1'b0;
m_axis_irq_irqn_next = m_axis_irq_irqn_reg;
m_axis_irq_tvalid_next = m_axis_irq_tvalid_reg && !m_axis_irq.tready;
m_axis_event_tdata_next = m_axis_event_tdata_reg;
m_axis_event_tdest_next = m_axis_event_tdest_reg;
m_axis_event_tvalid_next = m_axis_event_tvalid_reg && !m_axis_event.tready;
queue_mem_wr_en = 1'b0;
queue_mem_addr = '0;
queue_mem_wr_enable = queue_mem_rd_enable;
queue_mem_wr_arm = queue_mem_rd_arm;
queue_mem_wr_fire = queue_mem_rd_fire;
queue_mem_wr_cq_irq = queue_mem_rd_cq_irq;
queue_mem_wr_qtype = queue_mem_rd_qtype;
queue_mem_wr_dqn = queue_mem_rd_dqn;
queue_mem_wr_log_size = queue_mem_rd_log_size;
@@ -194,12 +284,7 @@ always_comb begin
queue_mem_wr_prod_ptr = queue_mem_rd_prod_ptr;
queue_mem_wr_cons_ptr = queue_mem_rd_cons_ptr;
// terminate AXI lite writes
if (IS_CQ && s_axil_ctrl_wr.awvalid && s_axil_ctrl_wr.wvalid && !s_axil_ctrl_bvalid_reg) begin
s_axil_ctrl_awready_next = 1'b1;
s_axil_ctrl_wready_next = 1'b1;
s_axil_ctrl_bvalid_next = 1'b1;
end
scrub_ptr_next = scrub_ptr_reg;
// terminate AXI lite reads
if (s_axil_ctrl_rd.arvalid && !s_axil_ctrl_rvalid_reg) begin
@@ -209,7 +294,7 @@ always_comb begin
s_axil_ctrl_rvalid_next = 1'b1;
end
if (!IS_CQ && s_axil_ctrl_wr.awvalid && s_axil_ctrl_wr.wvalid && !s_axil_ctrl_bvalid_reg) begin
if (s_axil_ctrl_wr.awvalid && s_axil_ctrl_wr.wvalid && !s_axil_ctrl_bvalid_reg) begin
// AXI lite write
s_axil_ctrl_awready_next = 1'b1;
s_axil_ctrl_wready_next = 1'b1;
@@ -219,7 +304,19 @@ always_comb begin
queue_mem_addr = s_axil_ctrl_awaddr_queue_index;
case (s_axil_ctrl_awaddr_reg_index)
3'd2: queue_mem_wr_prod_ptr = s_axil_ctrl_wr.wdata[15:0];
3'd2: begin
if (!IS_CQ && !IS_EQ) begin
queue_mem_wr_prod_ptr = s_axil_ctrl_wr.wdata[15:0];
end
end
3'd3: begin
if (IS_CQ || IS_EQ) begin
queue_mem_wr_cons_ptr = s_axil_ctrl_wr.wdata[15:0];
if (s_axil_ctrl_wr.wdata[31]) begin
queue_mem_wr_arm = 1'b1;
end
end
end
default: begin end
endcase
@@ -236,12 +333,24 @@ always_comb begin
case (s_apb_dp_ctrl_paddr_reg_index)
3'd0: begin
queue_mem_wr_enable = s_apb_dp_ctrl.pwdata[0];
queue_mem_wr_arm = s_apb_dp_ctrl.pwdata[1];
queue_mem_wr_log_size = s_apb_dp_ctrl.pwdata[19:16];
queue_mem_wr_qtype = 3'(s_apb_dp_ctrl.pwdata[23:20]);
queue_mem_wr_fire = 1'b0;
end
3'd1: begin
queue_mem_wr_dqn = s_apb_dp_ctrl.pwdata[DQN_W-1:0];
queue_mem_wr_cq_irq = s_apb_dp_ctrl.pwdata[31];
end
3'd1: queue_mem_wr_dqn = s_apb_dp_ctrl.pwdata[DQN_W-1:0];
3'd2: queue_mem_wr_prod_ptr = s_apb_dp_ctrl.pwdata[15:0];
3'd3: queue_mem_wr_cons_ptr = s_apb_dp_ctrl.pwdata[15:0];
3'd3: begin
queue_mem_wr_cons_ptr = s_apb_dp_ctrl.pwdata[15:0];
if (s_apb_dp_ctrl.pwdata[31]) begin
// rearm
queue_mem_wr_arm = 1'b1;
end
end
3'd6: queue_mem_wr_base_addr[31:0] = s_apb_dp_ctrl.pwdata;
3'd7: queue_mem_wr_base_addr[63:32] = s_apb_dp_ctrl.pwdata;
default: begin end
@@ -251,17 +360,29 @@ always_comb begin
case (s_apb_dp_ctrl_paddr_reg_index)
3'd0: begin
s_apb_dp_ctrl_prdata_next[0] = queue_mem_rd_enable;
s_apb_dp_ctrl_prdata_next[1] = (IS_CQ || IS_EQ) ? queue_mem_rd_arm : 1'b0;
s_apb_dp_ctrl_prdata_next[19:16] = queue_mem_rd_log_size;
s_apb_dp_ctrl_prdata_next[23:20] = 4'(queue_mem_rd_qtype);
s_apb_dp_ctrl_prdata_next[23:20] = QTYPE_EN ? 4'(queue_mem_rd_qtype) : '0;
end
3'd1: s_apb_dp_ctrl_prdata_next = 32'(queue_mem_rd_dqn);
3'd2: s_apb_dp_ctrl_prdata_next[15:0] = queue_mem_rd_prod_ptr;
3'd3: s_apb_dp_ctrl_prdata_next[15:0] = IS_CQ ? '0 : queue_mem_rd_cons_ptr;
3'd1: begin
s_apb_dp_ctrl_prdata_next[30:0] = 31'(queue_mem_rd_dqn);
s_apb_dp_ctrl_prdata_next[31] = (IS_CQ && CQ_IRQ) ? queue_mem_rd_cq_irq : 1'b0;
end
3'd2: s_apb_dp_ctrl_prdata_next = 32'(queue_mem_rd_prod_ptr);
3'd3: s_apb_dp_ctrl_prdata_next = 32'(queue_mem_rd_cons_ptr);
3'd6: s_apb_dp_ctrl_prdata_next = queue_mem_rd_base_addr[31:0];
3'd7: s_apb_dp_ctrl_prdata_next = queue_mem_rd_base_addr[63:32];
default: begin end
endcase
end else if (notify_req_valid && !notify_req_ready) begin
// notify request
notify_req_ready_next = 1'b1;
queue_mem_addr = notify_req_qn;
queue_mem_wr_fire = 1'b1;
queue_mem_wr_en = 1'b1;
end else if (req_valid && !req_ready && (!rsp_valid || rsp_ready)) begin
// completion enqueue request
req_ready_next = 1'b1;
@@ -271,13 +392,15 @@ always_comb begin
rsp_qn_next = req_qn;
rsp_dqn_next = queue_mem_rd_dqn;
rsp_error_next = !queue_mem_rd_enable || (QTYPE_EN && req_qtype != queue_mem_rd_qtype);
if (IS_CQ) begin
if (IS_CQ || IS_EQ) begin
rsp_addr_next = queue_mem_rd_base_addr + DMA_ADDR_W'(16'(queue_mem_rd_prod_ptr & ({16{1'b1}} >> (16 - queue_mem_rd_log_size))) * QE_SIZE);
rsp_phase_tag_next = !queue_mem_rd_prod_ptr[queue_mem_rd_log_size];
if (queue_mem_rd_status_full)
rsp_error_next = 1'b1;
queue_mem_wr_prod_ptr = queue_mem_rd_prod_ptr + 1;
end else begin
rsp_addr_next = queue_mem_rd_base_addr + DMA_ADDR_W'(16'(queue_mem_rd_cons_ptr & ({16{1'b1}} >> (16 - queue_mem_rd_log_size))) * QE_SIZE);
if (queue_mem_rd_prod_ptr == queue_mem_rd_cons_ptr)
if (queue_mem_rd_status_empty)
rsp_error_next = 1'b1;
queue_mem_wr_cons_ptr = queue_mem_rd_cons_ptr + 1;
end
@@ -286,6 +409,44 @@ always_comb begin
if (!rsp_error_next) begin
queue_mem_wr_en = 1'b1;
end
end else begin
// scrub
queue_mem_addr = scrub_ptr_reg;
if ((IS_CQ || IS_EQ) && queue_mem_rd_enable && queue_mem_rd_arm && queue_mem_rd_fire) begin
if ((IS_CQ && !IS_EQ && (!CQ_IRQ || !queue_mem_rd_cq_irq)) || (QTYPE_EN && queue_mem_rd_qtype == QTYPE_CQ && (!CQ_IRQ || !queue_mem_rd_cq_irq))) begin
// event - only for CQ
if (!m_axis_event_tvalid_reg || m_axis_event.tready) begin
// fire in the hole
m_axis_event_tdata_next = '0;
m_axis_event_tdata_next[15:0] = '0; // rsvd
m_axis_event_tdata_next[31:16] = '0; // CPL
m_axis_event_tdata_next[63:32] = 32'(scrub_ptr_reg); // CQN
m_axis_event_tdest_next = EQN_W'(queue_mem_rd_dqn);
m_axis_event_tvalid_next = 1'b1;
queue_mem_wr_arm = 1'b0;
queue_mem_wr_fire = 1'b0;
queue_mem_wr_en = 1'b1;
end
end else if ((!IS_CQ && IS_EQ) || (IS_CQ && !IS_EQ && (CQ_IRQ && queue_mem_rd_cq_irq)) || (QTYPE_EN && (queue_mem_rd_qtype == QTYPE_EQ || (queue_mem_rd_qtype == QTYPE_CQ && (CQ_IRQ && queue_mem_rd_cq_irq))))) begin
// interrupt - EQ or CQ, but CQ requires config bit set to select interrupts
if (!m_axis_irq_tvalid_reg || m_axis_irq.tready) begin
// fire in the hole
m_axis_irq_irqn_next = IRQN_W'(queue_mem_rd_dqn);
m_axis_irq_tvalid_next = 1'b1;
queue_mem_wr_arm = 1'b0;
queue_mem_wr_fire = 1'b0;
queue_mem_wr_en = 1'b1;
end
end
end
scrub_ptr_next = scrub_ptr_reg + 1;
end
end
@@ -309,8 +470,22 @@ always @(posedge clk) begin
rsp_error_reg <= rsp_error_next;
rsp_valid_reg <= rsp_valid_next;
notify_req_ready_reg <= notify_req_ready_next;
m_axis_irq_irqn_reg <= m_axis_irq_irqn_next;
m_axis_irq_tvalid_reg <= m_axis_irq_tvalid_next;
m_axis_event_tdata_reg <= m_axis_event_tdata_next;
m_axis_event_tdest_reg <= m_axis_event_tdest_next;
m_axis_event_tvalid_reg <= m_axis_event_tvalid_next;
scrub_ptr_reg <= scrub_ptr_next;
if (queue_mem_wr_en) begin
queue_enable_reg[queue_mem_addr] <= queue_mem_wr_enable;
queue_mem_arm[queue_mem_addr] <= queue_mem_wr_arm;
queue_mem_fire[queue_mem_addr] <= queue_mem_wr_fire;
queue_mem_cq_irq[queue_mem_addr] <= queue_mem_wr_cq_irq;
queue_mem_qtype[queue_mem_addr] <= queue_mem_wr_qtype;
queue_mem_dqn[queue_mem_addr] <= queue_mem_wr_dqn;
queue_mem_log_size[queue_mem_addr] <= queue_mem_wr_log_size;
@@ -332,6 +507,14 @@ always @(posedge clk) begin
req_ready_reg <= 1'b0;
rsp_valid_reg <= 1'b0;
notify_req_ready_reg <= 1'b0;
m_axis_irq_tvalid_reg <= 1'b0;
m_axis_event_tvalid_reg <= 1'b0;
scrub_ptr_reg <= '0;
queue_enable_reg <= '0;
end
end

View File

@@ -14,7 +14,9 @@ import logging
import struct
from collections import deque
import cocotb
from cocotb.queue import Queue
from cocotb.triggers import RisingEdge
# Command opcodes
@@ -61,19 +63,18 @@ CNDM_CMD_PTP_FLG_OFFSET_FNS = 0x00000010
CNDM_CMD_PTP_FLG_SET_PERIOD = 0x00000080
class Cq:
class Eq:
def __init__(self, driver, port):
self.driver = driver
self.log = driver.log
self.port = port
self.irqn = None
self.log_size = 0
self.size = 0
self.size_mask = 0
self.stride = 0
self.cqn = None
self.eqn = None
self.enabled = False
self.buf_size = 0
@@ -81,23 +82,19 @@ class Cq:
self.buf_dma = 0
self.buf = None
self.eq = None
self.irqn = None
self.src_ring = None
self.handler = None
self.cq_table = {}
self.cons_ptr = None
self.db_offset = None
self.hw_regs = self.driver.hw_regs
async def open(self, irqn, size):
if self.cqn is not None:
if self.eqn is not None:
raise Exception("Already open")
self.irqn = irqn
self.log_size = size.bit_length() - 1
self.size = 2**self.log_size
self.size_mask = self.size-1
@@ -112,13 +109,177 @@ class Cq:
self.cons_ptr = 0
self.irqn = irqn
self.cq_table = {}
rsp = await self.driver.exec_cmd(struct.pack("<HHLLLLLLLQQLLLL",
0, # rsvd
CNDM_CMD_OP_CREATE_EQ, # opcode
0x00000000, # flags
self.port.index, # port
0, # eqn
self.irqn, # irqn
0, # pd
self.log_size, # size
0, # dboffs
self.buf_dma, # base addr
0, # ptr2
0, # prod_ptr
0, # cons_ptr
0, # rsvd
0, # rsvd
))
rsp_unpacked = struct.unpack("<HHLLLLLLLQQLLLL", rsp)
print(rsp_unpacked)
self.eqn = rsp_unpacked[4]
self.db_offset = rsp_unpacked[8]
if self.db_offset == 0:
self.eqn = None
self.db_offset = None
self.log.error("Failed to allocate EQ")
return
await self.write_cons_ptr_arm()
self.log.info("Opened EQ %d", self.eqn)
self.log.info("Using doorbell at offset 0x%08x", self.db_offset)
self.enabled = True
async def close(self):
if self.eqn is None:
return
self.enabled = False
rsp = await self.driver.exec_cmd(struct.pack("<HHLLLLLLLQQLLLL",
0, # rsvd
CNDM_CMD_OP_DESTROY_EQ, # opcode
0x00000000, # flags
self.port.index, # port
self.eqn, # eqn
0, # irqn
0, # pd
0, # size
0, # dboffs
0, # base addr
0, # ptr2
0, # prod_ptr
0, # cons_ptr
0, # rsvd
0, # rsvd
))
self.eqn = None
# TODO free buffer
def attach_cq(self, cq):
self.cq_table[cq.cqn] = cq
def detach_cq(self, cq):
del self.cq_table[cq.cqn]
async def write_cons_ptr(self):
await self.hw_regs.write_dword(self.db_offset, self.cons_ptr & 0xffff)
async def write_cons_ptr_arm(self):
await self.hw_regs.write_dword(self.db_offset, (self.cons_ptr & 0xffff) | 0x80000000)
async def process_eq(self):
self.log.info("Process EQ")
eq_cons_ptr = self.cons_ptr
eq_index = eq_cons_ptr & self.size_mask
while True:
# event_data = struct.unpack_from("<HHLLLLLLL", self.buf, eq_index*self.stride)
event_data = struct.unpack_from("<HHLLL", self.buf, eq_index*self.stride)
self.log.info("EQ %d index %d data: %s", self.eqn, eq_index, repr(event_data))
if bool(event_data[-1] & 0x80000000) == bool(eq_cons_ptr & self.size):
self.log.info("EQ %d empty", self.eqn)
break
if event_data[1] == 0x0000:
# completion
self.log.info("Event from CQ %d", event_data[2])
cq = self.cq_table[event_data[2]]
await cq.handler(cq)
eq_cons_ptr += 1
eq_index = eq_cons_ptr & self.size_mask
self.cons_ptr = eq_cons_ptr
await self.write_cons_ptr_arm()
class Cq:
def __init__(self, driver, port):
self.driver = driver
self.log = driver.log
self.port = port
self.log_size = 0
self.size = 0
self.size_mask = 0
self.stride = 0
self.cqn = None
self.enabled = False
self.buf_size = 0
self.buf_region = None
self.buf_dma = 0
self.buf = None
self.eq = None
self.irqn = None
self.src_ring = None
self.handler = None
self.cons_ptr = None
self.db_offset = None
self.hw_regs = self.driver.hw_regs
async def open(self, eq, size):
if self.cqn is not None:
raise Exception("Already open")
self.log_size = size.bit_length() - 1
self.size = 2**self.log_size
self.size_mask = self.size-1
self.stride = 16
self.buf_size = self.size*self.stride
self.buf_region = self.driver.pool.alloc_region(self.buf_size)
self.buf_dma = self.buf_region.get_absolute_address(0)
self.buf = self.buf_region.mem
self.buf[0:self.buf_size] = b'\x00'*self.buf_size
self.cons_ptr = 0
if isinstance(eq, Eq):
self.eq = eq
dqn = eq.eqn
else:
self.irqn = eq
dqn = eq | 0x80000000
rsp = await self.driver.exec_cmd(struct.pack("<HHLLLLLLLQQLLLL",
0, # rsvd
CNDM_CMD_OP_CREATE_CQ, # opcode
0x00000000, # flags
self.port.index, # port
0, # cqn
self.irqn, # eqn
dqn, # eqn
0, # pd
self.log_size, # size
0, # dboffs
@@ -141,7 +302,13 @@ class Cq:
self.log.error("Failed to allocate CQ")
return
if self.eq:
self.eq.attach_cq(self)
await self.write_cons_ptr_arm()
self.log.info("Opened CQ %d", self.cqn)
self.log.info("Using doorbell at offset 0x%08x", self.db_offset)
self.enabled = True
@@ -173,6 +340,12 @@ class Cq:
# TODO free buffer
async def write_cons_ptr(self):
await self.hw_regs.write_dword(self.db_offset, self.cons_ptr & 0xffff)
async def write_cons_ptr_arm(self):
await self.hw_regs.write_dword(self.db_offset, (self.cons_ptr & 0xffff) | 0x80000000)
class Sq:
def __init__(self, driver, port):
@@ -203,7 +376,6 @@ class Sq:
self.bytes = 0
self.db_offset = None
self.hw_regs = self.driver.hw_regs
async def open(self, cq, size):
@@ -261,6 +433,7 @@ class Sq:
return
self.log.info("Opened SQ %d (CQ %d)", self.sqn, cq.cqn)
self.log.info("Using doorbell at offset 0x%08x", self.db_offset)
self.enabled = True
@@ -292,6 +465,15 @@ class Sq:
# TODO free buffer
def is_ring_empty(self):
return self.prod_ptr == self.cons_ptr
def is_ring_full(self):
return ((self.prod_ptr - self.cons_ptr) & 0xffffffff) > self.size
async def write_prod_ptr(self):
await self.hw_regs.write_dword(self.db_offset, self.prod_ptr & 0xffff)
async def start_xmit(self, data):
headroom = 10
tx_buf = self.driver.alloc_pkt()
@@ -301,7 +483,7 @@ class Sq:
struct.pack_into('<xxxxLQ', self.buf, 16*index, len(data), ptr+headroom)
self.tx_info[index] = tx_buf
self.prod_ptr += 1
await self.hw_regs.write_dword(self.db_offset, self.prod_ptr & 0xffff)
await self.write_prod_ptr()
def free_tx_desc(self, index):
pkt = self.tx_info[index]
@@ -309,7 +491,7 @@ class Sq:
self.tx_info[index] = None
def free_tx_buf(self):
while self.cons_ptr != self.txq_prod:
while not self.is_ring_empty():
index = self.cons_ptr & self.size_mask
self.free_tx_desc(index)
self.cons_ptr += 1
@@ -345,6 +527,8 @@ class Sq:
cq.cons_ptr = cq_cons_ptr
sq.cons_ptr = cons_ptr
await cq.write_cons_ptr_arm()
class Rq:
def __init__(self, driver, port):
@@ -375,7 +559,6 @@ class Rq:
self.bytes = 0
self.db_offset = None
self.hw_regs = self.driver.hw_regs
async def open(self, cq, size):
@@ -433,6 +616,7 @@ class Rq:
return
self.log.info("Opened RQ %d (CQ %d)", self.rqn, cq.cqn)
self.log.info("Using doorbell at offset 0x%08x", self.db_offset)
self.enabled = True
@@ -466,13 +650,22 @@ class Rq:
# TODO free buffer
def is_ring_empty(self):
return self.prod_ptr == self.cons_ptr
def is_ring_full(self):
return ((self.prod_ptr - self.cons_ptr) & 0xffffffff) > self.size
async def write_prod_ptr(self):
await self.hw_regs.write_dword(self.db_offset, self.prod_ptr & 0xffff)
def free_rx_desc(self, index):
pkt = self.rx_info[index]
self.driver.free_pkt(pkt)
self.rx_info[index] = None
def free_rx_buf(self):
while self.cons_ptr != self.prod_ptr:
while not self.is_ring_empty():
index = self.cons_ptr & self.size_mask
self.free_rx_desc(index)
self.cons_ptr += 1
@@ -496,7 +689,7 @@ class Rq:
self.prepare_rx_desc(self.prod_ptr & self.size_mask)
self.prod_ptr += 1
await self.hw_regs.write_dword(self.db_offset, self.prod_ptr & 0xffff)
await self.write_prod_ptr()
@staticmethod
async def process_rx_cq(cq):
@@ -537,6 +730,7 @@ class Rq:
rq.cons_ptr = cons_ptr
await rq.refill_rx_buffers()
await cq.write_cons_ptr_arm()
class Port:
@@ -546,6 +740,9 @@ class Port:
self.index = index
self.hw_regs = driver.hw_regs
self.eq_count = 1
self.eq = []
self.rxq_count = 1
self.rxq = []
@@ -555,12 +752,17 @@ class Port:
self.rx_queue = Queue()
async def init(self):
for k in range(self.eq_count):
eq = Eq(self.driver, self)
await eq.open(self.index, 256)
self.eq.append(eq)
await self.open()
async def open(self):
for k in range(self.rxq_count):
cq = Cq(self.driver, self)
await cq.open(self.index, 256)
await cq.open(self.eq[0], 256)
q = Rq(self.driver, self)
await q.open(cq, 256)
@@ -569,7 +771,7 @@ class Port:
for k in range(self.txq_count):
cq = Cq(self.driver, self)
await cq.open(self.index, 256)
await cq.open(self.eq[0], 256)
q = Sq(self.driver, self)
await q.open(cq, 256)
@@ -585,12 +787,36 @@ class Port:
async def recv_nowait(self):
return self.rx_queue.get_nowait()
async def interrupt_handler(self):
self.log.info("Interrupt")
for q in self.rxq:
await q.cq.handler(q.cq)
for q in self.txq:
await q.cq.handler(q.cq)
class Interrupt:
def __init__(self, index, handler=None):
self.index = index
self.queue = Queue()
self.handler = handler
self.signal = None
cocotb.start_soon(self._run())
@classmethod
def from_edge(cls, index, signal, handler=None):
obj = cls(index, handler)
obj.signal = signal
cocotb.start_soon(obj._run_edge())
return obj
async def interrupt(self):
self.queue.put_nowait(None)
async def _run(self):
while True:
await self.queue.get()
if self.handler:
await self.handler(self.index)
async def _run_edge(self):
while True:
await RisingEdge(self.signal)
await self.interrupt()
class Driver:
@@ -601,6 +827,8 @@ class Driver:
self.pool = None
self.hw_regs = None
self.irq_list = []
self.port_count = None
self.ports = []
@@ -656,6 +884,12 @@ class Driver:
self.hw_regs = dev.bar_window[0]
# set up MSI
for index in range(32):
irq = Interrupt(index, self.interrupt_handler)
self.dev.request_irq(index, irq.interrupt)
self.irq_list.append(irq)
await self.init_common()
async def init_common(self):
@@ -853,7 +1087,6 @@ class Driver:
for k in range(self.port_count):
port = Port(self, k)
await port.init()
self.dev.request_irq(k, port.interrupt_handler)
self.ports.append(port)
@@ -912,6 +1145,24 @@ class Driver:
a[k] = await self.hw_regs.read_dword(0x10040+k*4)
return a.tobytes()
async def interrupt_handler(self, irqn):
self.log.info("Interrupt handler start (IRQ %d)", irqn)
for p in self.ports:
if p.eq:
# using EQs
for eq in p.eq:
if eq.irqn == irqn:
await eq.process_eq()
else:
# using IRQs directly from CQs
for q in p.rxq:
if q.cq.irqn == irqn:
await q.cq.handler(q.cq)
for q in p.txq:
if q.cq.irqn == irqn:
await q.cq.handler(q.cq)
self.log.info("Interrupt handler end (IRQ %d)", irqn)
def alloc_pkt(self):
if self.free_packets:
return self.free_packets.popleft()