APISIX源码解析-插件-API 熔断【api-breaker】

api-breaker API 熔断插件。关键属性

熔断超时逻辑

由代码逻辑自动按触发不健康状态的次数递增运算：

每当上游服务返回unhealthy.http_statuses配置中的状态码(比如：500)，达到unhealthy.failures次时(比如：3 次)，认为上游服务处于不健康状态。

第一次触发不健康状态，熔断 2 秒。

然后，2 秒过后重新开始转发请求到上游服务，如果继续返回unhealthy.http_statuses状态码，记数再次达到unhealthy.failures次时，熔断 4 秒（倍数方式）。

依次类推，2, 4, 8, 16, 32, 64, …, 256, 最大到 300。 300 是 max_breaker_sec 的最大值，允许自定义修改。

在不健康状态时，当转发请求到上游服务并返回healthy.http_statuses配置中的状态码(比如：200)，达到healthy.successes次时(比如：3 次)，认为上游服务恢复健康状态。

源码实现

function _M.access(conf, ctx)
    local unhealthy_key = gen_unhealthy_key(ctx)
    -- unhealthy counts
    -- 第一次感知到unhealthy，是在log阶段中，并往缓存中设置次数
    local unhealthy_count, err = shared_buffer:get(unhealthy_key)
    if err then
        core.log.warn("failed to get unhealthy_key: ",
                      unhealthy_key, " err: ", err)
        return
    end

    if not unhealthy_count then
        return
    end

    -- timestamp of the last time a unhealthy state was triggered
    -- 上次触发不正常状态的时间戳
    local lasttime_key = gen_lasttime_key(ctx)
    local lasttime, err = shared_buffer:get(lasttime_key)
    if err then
        core.log.warn("failed to get lasttime_key: ",
                      lasttime_key, " err: ", err)
        return
    end

    if not lasttime then
        return
    end

    local failure_times = math.ceil(unhealthy_count / conf.unhealthy.failures)
    if failure_times < 1 then
        failure_times = 1
    end

    -- cannot exceed the maximum value of the user configuration
    -- 不能超过用户配置的最大值
    local breaker_time = 2 ^ failure_times
    if breaker_time > conf.max_breaker_sec then
        breaker_time = conf.max_breaker_sec
    end
    core.log.info("breaker_time: ", breaker_time)

    -- breaker
    -- 在熔断范围时间内
    if lasttime + breaker_time >= ngx.time() then
        return conf.break_response_code
    end

    return
end


function _M.log(conf, ctx)
    local unhealthy_key = gen_unhealthy_key(ctx)
    local healthy_key = gen_healthy_key(ctx)
    local upstream_status = core.response.get_upstream_status(ctx)
    -- 上游服务状态 
    if not upstream_status then
        return
    end

    -- unhealthy process
    -- 当上游服务状态，在错误范围时
    if core.table.array_find(conf.unhealthy.http_statuses,
                             upstream_status)
    then
        --  在这边累加不健康次数
        local unhealthy_count, err = shared_buffer:incr(unhealthy_key, 1, 0)
        if err then
            core.log.warn("failed to incr unhealthy_key: ", unhealthy_key,
                          " err: ", err)
        end
        core.log.info("unhealthy_key: ", unhealthy_key, " count: ",
                      unhealthy_count)

        shared_buffer:delete(healthy_key)

        -- whether the user-configured number of failures has been reached,
        -- and if so, the timestamp for entering the unhealthy state.
        -- 是否已达到用户配置的故障数，
        -- 如果是，记录发生时的时间戳，以及最大熔断持续时间
        -- 每隔unhealthy.failures次错误，算一次不健康
        if unhealthy_count % conf.unhealthy.failures == 0 then
            -- 不健康状态的有效期为max_breaker_sec，超过这个时间没有新的不健康状态，
            -- 表示自动到达或超过最大熔断时间，自动跳出不健康状态
            shared_buffer:set(gen_lasttime_key(ctx), ngx.time(),
                              conf.max_breaker_sec)
            core.log.info("update unhealthy_key: ", unhealthy_key, " to ",
                          unhealthy_count)
        end

        return
    end

    -- health process
    -- 当上游服务状态，不在正确范围时
    -- 既不在定义的错误范围，也不在定义的正确范围，直接返回，跳过熔断计数
    if not core.table.array_find(conf.healthy.http_statuses, upstream_status) then
        return
    end

    local unhealthy_count, err = shared_buffer:get(unhealthy_key)
    if err then
        core.log.warn("failed to `get` unhealthy_key: ", unhealthy_key,
                      " err: ", err)
    end
    -- 判断此次成功前，是否是不健康的，若不是直接返回
    if not unhealthy_count then
        return
    end
    -- 此前是不健康的
    local healthy_count, err = shared_buffer:incr(healthy_key, 1, 0)
    if err then
        core.log.warn("failed to `incr` healthy_key: ", healthy_key,
                      " err: ", err)
    end

    -- clear related status
    -- 当健康次数超过healthy.successes时，恢复健康状态，跳出熔断
    if healthy_count >= conf.healthy.successes then
        -- stat change to normal
        core.log.info("change to normal, ", healthy_key, " ", healthy_count)
        shared_buffer:delete(gen_lasttime_key(ctx))
        shared_buffer:delete(unhealthy_key)
        shared_buffer:delete(healthy_key)
    end

    return
end

APISIX源码解析-插件-API 熔断【api-breaker】

Java相关栏目本月热门文章