diff --git a/lightllm/server/router/dynamic_prompt/linear_att_radix_cache.py b/lightllm/server/router/dynamic_prompt/linear_att_radix_cache.py index c7408add3..bf07e121e 100644 --- a/lightllm/server/router/dynamic_prompt/linear_att_radix_cache.py +++ b/lightllm/server/router/dynamic_prompt/linear_att_radix_cache.py @@ -163,7 +163,10 @@ def _discard_node(self, node: LinearAttPagedTreeNode): return def _add_node(self, node: LinearAttPagedTreeNode): - if node.is_leaf(): + # root 永远不参与回收:当树为空时 root 自身也满足 is_leaf(),若加入 _evict_tree_set, + # 会与 _evict 中 "node is not self.root_node" 的断言相矛盾(当前仅靠 root 的 ref_counter>=1 + # 和回收水位 guard 掩盖)。这里显式排除,使数据结构与回收逻辑的意图一致。 + if node.is_leaf() and node is not self.root_node: self._evict_tree_set.add(node) if node.small_page_buffer_idx is not None: self._evict_tree_set_for_linear_att.add(node) diff --git a/lightllm/server/router/model_infer/infer_batch.py b/lightllm/server/router/model_infer/infer_batch.py index bae5ea1e3..5c2d0d45f 100644 --- a/lightllm/server/router/model_infer/infer_batch.py +++ b/lightllm/server/router/model_infer/infer_batch.py @@ -226,6 +226,19 @@ def _linear_att_free_req(self, free_token_index: List, req: "InferReq"): if shared_kv_len <= req.cur_kv_len: free_token_index.append(self.req_manager.req_to_token_indexs[req.req_idx][shared_kv_len : req.cur_kv_len]) + # 该分支不会把 prefill 阶段累积的 big page id 插入 radix cache(典型为 pause/abort + # 在 prefill 跨过 big page 边界后、到达末尾前触发),需在此显式释放,避免泄漏。 + + # 释放本请求 prefill 阶段在 big page 边界上申请、但尚未插入 radix cache 的 big page + # state buffer。仅当请求未走 insert 分支(小页/大页插入)就被释放时才会有残留,典型场景: + # big page 模式下请求在 prefill 跨过 big page 边界后、到达末尾前被 pause / abort。 + # 若不释放,会泄漏 big page state slot,并触发 free_a_req_mem 中 dict 为空的断言。 + if req.linear_att_len_to_big_page_id: + self.radix_cache.linear_att_big_page_buffers.free_state_cache( + list(req.linear_att_len_to_big_page_id.values()) + ) + req.linear_att_len_to_big_page_id.clear() + req.cur_kv_len = shared_kv_len assert req.tail_linear_att_small_page_buffer_id is None if req.shared_kv_node is not None: