bugfix streaming mode of openai_api.py

This commit is contained in:
Jianxin Ma
2024-03-15 19:02:59 +08:00
committed by GitHub
parent 11e00874a9
commit a6085c2a91

View File

@@ -484,7 +484,7 @@ async def predict(
stop_words_ids = [tokenizer.encode(s)
for s in stop_words] if stop_words else None
delay_token_num = max([len(x) for x in stop_words])
delay_token_num = max([len(x) for x in stop_words]) if stop_words_ids else 0
response_generator = model.chat_stream(tokenizer,
query,
history=history,
@@ -493,8 +493,8 @@ async def predict(
**gen_kwargs)
for _new_response in response_generator:
if len(_new_response) <= delay_token_num:
continue
new_response = _new_response[:-delay_token_num]
continue
new_response = _new_response[:-delay_token_num] if delay_token_num else _new_response
if len(new_response) == current_length:
continue