From c947e08cdc9e8e69b8c4e82cddb4be76a09bc0a4 Mon Sep 17 00:00:00 2001
From: Pat Wendorf <dungeons@gmail.com>
Date: Sat, 8 Feb 2025 08:54:29 -0500
Subject: [PATCH] Allow for adjustment of temp, top_p and max tokens for models
 that have issues with repeating.

---
 summarize.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/summarize.py b/summarize.py
index ea7bf8f..a50677f 100644
--- a/summarize.py
+++ b/summarize.py
@@ -20,6 +20,8 @@ FACT_PROMPT = os.getenv("FACT_PROMPT")
 SENTIMENT_PROMPT = os.getenv("SENTIMENT_PROMPT")
 CHUNK_SIZE = int(os.getenv("CHUNK_SIZE"))
 TEMPERATURE = float(os.getenv("TEMPERATURE"))
+TOP_P = float(os.getenv("TOP_P"))
+MAX_TOKENS = float(os.getenv("MAX_TOKENS"))
 
 def whisper_api(file):
     """Transcribe audio file using Whisper API."""
@@ -34,7 +36,7 @@ def whisper_api(file):
 def llm_local(prompt):
     client = OpenAI(api_key="doesntmatter", base_url=LLAMACPP_URL)
     messages=[{"role": "system", "content": SYSTEM_MESSAGE},{"role": "user", "content": prompt}]
-    response = client.chat.completions.create(model="whatever", temperature=TEMPERATURE, messages=messages)
+    response = client.chat.completions.create(model="whatever", max_tokens=MAX_TOKENS, temperature=TEMPERATURE, top_p=TOP_P, messages=messages)
     return response.choices[0].message.content
 
 def trim_silence(filename):