Allow for adjustment of temp, top_p and max tokens for models that have issues with repeating.

2025-04-13 20:39:09 +00:00 · 2025-02-08 08:54:29 -05:00 · 2025-02-08 08:54:29 -05:00 · c947e08cdc
commit c947e08cdc
parent 933b9e24d4
1 changed files with 3 additions and 1 deletions
--- a/summarize.py
+++ b/summarize.py
@ -20,6 +20,8 @@ FACT_PROMPT = os.getenv("FACT_PROMPT")
 SENTIMENT_PROMPT = os.getenv("SENTIMENT_PROMPT")
 CHUNK_SIZE = int(os.getenv("CHUNK_SIZE"))
 TEMPERATURE = float(os.getenv("TEMPERATURE"))
+TOP_P = float(os.getenv("TOP_P"))
+MAX_TOKENS = float(os.getenv("MAX_TOKENS"))

 def whisper_api(file):
    """Transcribe audio file using Whisper API."""
@ -34,7 +36,7 @@ def whisper_api(file):
 def llm_local(prompt):
    client = OpenAI(api_key="doesntmatter", base_url=LLAMACPP_URL)
    messages=[{"role": "system", "content": SYSTEM_MESSAGE},{"role": "user", "content": prompt}]
-    response = client.chat.completions.create(model="whatever", temperature=TEMPERATURE, messages=messages)
+    response = client.chat.completions.create(model="whatever", max_tokens=MAX_TOKENS, temperature=TEMPERATURE, top_p=TOP_P, messages=messages)
    return response.choices[0].message.content

 def trim_silence(filename):