diff --git a/config/nim.py b/config/nim.py index 29e3681..6c01c47 100644 --- a/config/nim.py +++ b/config/nim.py @@ -23,7 +23,6 @@ class NimSettings(BaseModel): parallel_tool_calls: bool = True return_tokens_as_token_ids: bool = False - include_stop_str_in_output: bool = False ignore_eos: bool = False min_tokens: int = Field(0, ge=0) diff --git a/providers/nvidia_nim/request.py b/providers/nvidia_nim/request.py index 532c37c..03b2883 100644 --- a/providers/nvidia_nim/request.py +++ b/providers/nvidia_nim/request.py @@ -87,7 +87,6 @@ def build_request_body(request_data: Any, nim: NimSettings) -> dict: _set_extra(extra_body, "chat_template", nim.chat_template) _set_extra(extra_body, "request_id", nim.request_id) _set_extra(extra_body, "return_tokens_as_token_ids", nim.return_tokens_as_token_ids) - _set_extra(extra_body, "include_stop_str_in_output", nim.include_stop_str_in_output) _set_extra(extra_body, "ignore_eos", nim.ignore_eos) _set_extra(extra_body, "reasoning_effort", nim.reasoning_effort) _set_extra(extra_body, "include_reasoning", nim.include_reasoning) diff --git a/tests/providers/test_nvidia_nim_request.py b/tests/providers/test_nvidia_nim_request.py index 64f61b0..8b0ff8f 100644 --- a/tests/providers/test_nvidia_nim_request.py +++ b/tests/providers/test_nvidia_nim_request.py @@ -2,6 +2,8 @@ from unittest.mock import MagicMock +import pytest + from config.nim import NimSettings from providers.common.utils import set_if_not_none from providers.nvidia_nim.request import ( @@ -10,6 +12,23 @@ from providers.nvidia_nim.request import ( ) +@pytest.fixture +def req(): + r = MagicMock() + r.model = "test" + r.messages = [MagicMock(role="user", content="hi")] + r.max_tokens = 100 + r.system = None + r.temperature = None + r.top_p = None + r.stop_sequences = None + r.tools = None + r.tool_choice = None + r.extra_body = None + r.top_k = None + return r + + class TestSetIfNotNone: def test_value_not_none_sets(self): body = {} @@ -45,57 +64,22 @@ class TestSetExtra: class TestBuildRequestBody: - def test_max_tokens_capped_by_nim(self): - """Request max_tokens exceeds nim.max_tokens -> capped.""" - req = MagicMock() - req.model = "test" - req.messages = [MagicMock(role="user", content="hi")] + def test_max_tokens_capped_by_nim(self, req): req.max_tokens = 100000 - req.system = None - req.temperature = None - req.top_p = None - req.stop_sequences = None - req.tools = None - req.tool_choice = None - req.extra_body = None - req.top_k = None - nim = NimSettings(max_tokens=4096) body = build_request_body(req, nim) assert body["max_tokens"] == 4096 - def test_presence_penalty_included_when_nonzero(self): - req = MagicMock() - req.model = "test" - req.messages = [MagicMock(role="user", content="hi")] - req.max_tokens = 100 - req.system = None - req.temperature = None - req.top_p = None - req.stop_sequences = None - req.tools = None - req.tool_choice = None - req.extra_body = None - req.top_k = None - + def test_presence_penalty_included_when_nonzero(self, req): nim = NimSettings(presence_penalty=0.5) body = build_request_body(req, nim) assert body["presence_penalty"] == 0.5 - def test_parallel_tool_calls_included(self): - req = MagicMock() - req.model = "test" - req.messages = [MagicMock(role="user", content="hi")] - req.max_tokens = 100 - req.system = None - req.temperature = None - req.top_p = None - req.stop_sequences = None - req.tools = None - req.tool_choice = None - req.extra_body = None - req.top_k = None + def test_include_stop_str_in_output_not_sent(self, req): + body = build_request_body(req, NimSettings()) + assert "include_stop_str_in_output" not in body.get("extra_body", {}) + def test_parallel_tool_calls_included(self, req): nim = NimSettings(parallel_tool_calls=False) body = build_request_body(req, nim) assert body["parallel_tool_calls"] is False