From c947e08cdc9e8e69b8c4e82cddb4be76a09bc0a4 Mon Sep 17 00:00:00 2001 From: Pat Wendorf <dungeons@gmail.com> Date: Sat, 8 Feb 2025 08:54:29 -0500 Subject: [PATCH] Allow for adjustment of temp, top_p and max tokens for models that have issues with repeating. --- summarize.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/summarize.py b/summarize.py index ea7bf8f..a50677f 100644 --- a/summarize.py +++ b/summarize.py @@ -20,6 +20,8 @@ FACT_PROMPT = os.getenv("FACT_PROMPT") SENTIMENT_PROMPT = os.getenv("SENTIMENT_PROMPT") CHUNK_SIZE = int(os.getenv("CHUNK_SIZE")) TEMPERATURE = float(os.getenv("TEMPERATURE")) +TOP_P = float(os.getenv("TOP_P")) +MAX_TOKENS = float(os.getenv("MAX_TOKENS")) def whisper_api(file): """Transcribe audio file using Whisper API.""" @@ -34,7 +36,7 @@ def whisper_api(file): def llm_local(prompt): client = OpenAI(api_key="doesntmatter", base_url=LLAMACPP_URL) messages=[{"role": "system", "content": SYSTEM_MESSAGE},{"role": "user", "content": prompt}] - response = client.chat.completions.create(model="whatever", temperature=TEMPERATURE, messages=messages) + response = client.chat.completions.create(model="whatever", max_tokens=MAX_TOKENS, temperature=TEMPERATURE, top_p=TOP_P, messages=messages) return response.choices[0].message.content def trim_silence(filename):