Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,10 @@ def _discard_node(self, node: LinearAttPagedTreeNode):
return

def _add_node(self, node: LinearAttPagedTreeNode):
if node.is_leaf():
# root 永远不参与回收:当树为空时 root 自身也满足 is_leaf(),若加入 _evict_tree_set,
# 会与 _evict 中 "node is not self.root_node" 的断言相矛盾(当前仅靠 root 的 ref_counter>=1
# 和回收水位 guard 掩盖)。这里显式排除,使数据结构与回收逻辑的意图一致。
if node.is_leaf() and node is not self.root_node:
self._evict_tree_set.add(node)
if node.small_page_buffer_idx is not None:
self._evict_tree_set_for_linear_att.add(node)
Expand Down
13 changes: 13 additions & 0 deletions lightllm/server/router/model_infer/infer_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,19 @@ def _linear_att_free_req(self, free_token_index: List, req: "InferReq"):

if shared_kv_len <= req.cur_kv_len:
free_token_index.append(self.req_manager.req_to_token_indexs[req.req_idx][shared_kv_len : req.cur_kv_len])
# 该分支不会把 prefill 阶段累积的 big page id 插入 radix cache(典型为 pause/abort
# 在 prefill 跨过 big page 边界后、到达末尾前触发),需在此显式释放,避免泄漏。

# 释放本请求 prefill 阶段在 big page 边界上申请、但尚未插入 radix cache 的 big page
# state buffer。仅当请求未走 insert 分支(小页/大页插入)就被释放时才会有残留,典型场景:
# big page 模式下请求在 prefill 跨过 big page 边界后、到达末尾前被 pause / abort。
# 若不释放,会泄漏 big page state slot,并触发 free_a_req_mem 中 dict 为空的断言。
if req.linear_att_len_to_big_page_id:
self.radix_cache.linear_att_big_page_buffers.free_state_cache(
list(req.linear_att_len_to_big_page_id.values())
)
req.linear_att_len_to_big_page_id.clear()

req.cur_kv_len = shared_kv_len
assert req.tail_linear_att_small_page_buffer_id is None
if req.shared_kv_node is not None:
Expand Down
Loading