RDMA : polling strategy

55 Views Asked by At

I tried to wrap RDMA in a helper class. I had some issues with send events not generating a CQE on the remote, and I fixed by replacing this snippet of code:

ibv_wc RdmaBase::wait_event(bool ignore_errors)
{
    ibv_wc ret{};

    void* ctxt;
    HENSURE_ERRNO(ibv_get_cq_event(m_comp_channel, &m_cq, &ctxt) == 0);
    ibv_ack_cq_events(m_cq, 1);
    HENSURE_ERRNO(ibv_req_notify_cq(m_cq, 0) == 0);

    while(true)
    {
        const int num_completions = ibv_poll_cq(m_cq, 1, &ret);
        HENSURE_ERRNO(num_completions >= 0);

        if(num_completions == 0)
        {
            continue;
        }
        else
        {
            if(!ignore_errors && ret.status != IBV_WC_SUCCESS)
            {
                FATAL_ERROR("Failed status %s (%d) for wr_id %d\n",
                            ibv_wc_status_str(ret.status),
                            ret.status,
                            (int) ret.wr_id);
            }

            break;
        }
    }

    return ret;
}

By this one:

ibv_wc RdmaBase::wait_event(bool ignore_errors)
{
    // This is kind of a coroutine instead of polling the events in a different thread

    ibv_wc ret{};

start:
    if(!m_polling)
    {
        void* ctxt;
        HENSURE_ERRNO(ibv_get_cq_event(m_comp_channel, &m_cq, &ctxt) == 0);
        ibv_ack_cq_events(m_cq, 1);
        HENSURE_ERRNO(ibv_req_notify_cq(m_cq, 0) == 0);
        m_polling = true;
    }

    while(true)
    {
        const int num_completions = ibv_poll_cq(m_cq, 1, &ret);
        HENSURE_ERRNO(num_completions >= 0);

        if(num_completions == 0)
        {
            m_polling = false;
            goto start;
        }
        else
        {
            if(!ignore_errors && ret.status != IBV_WC_SUCCESS)
            {
                FATAL_ERROR("Failed status %s (%d) for wr_id %d\n",
                            ibv_wc_status_str(ret.status),
                            ret.status,
                            (int) ret.wr_id);
            }

            break;
        }
    }

    return ret;
}

wait_event() should get the next WC on the CQ. If the CQ is not empty, it should pop one WC. If the CQ is empty, the function should be blocking until a WC is generated.

Is there a functional difference between these two codes? I am using SoftRoCE on Ubuntu 22.04.

0

There are 0 best solutions below