added a basic websearch proxy

This commit is contained in:
Concedo 2024-12-28 19:07:00 +08:00
parent 7c671f289e
commit baaecd1c65

View file

@ -23,6 +23,8 @@ import time
import asyncio import asyncio
import socket import socket
import threading import threading
import html
import urllib.parse as urlparse
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from datetime import datetime, timezone from datetime import datetime, timezone
@ -1268,6 +1270,71 @@ def detokenize_ids(tokids):
detokstr = ctypes.string_at(detok).decode("UTF-8","ignore") detokstr = ctypes.string_at(detok).decode("UTF-8","ignore")
return detokstr return detokstr
# Performs a web search using DuckDuckGo and extracts text content from the top results.
def websearch(query):
if not query or query=="":
return []
import urllib.parse
import urllib.request
from html.parser import HTMLParser
num_results = 3
searchresults = []
class ExtractResultsParser(HTMLParser):
def __init__(self):
super().__init__()
self.results = []
self.recordingTitle = False
self.recordingDesc = False
self.currentrytxt = ""
self.currsegmenttxt = ""
def handle_starttag(self, tag, attrs):
if tag == "a":
# Check if the "class" attribute matches the target class
for attr_name, attr_value in attrs:
if not self.recordingTitle and attr_name == "class" and "result__a" in attr_value.split():
self.recordingTitle = True
self.currentrytxt = ""
self.currsegmenttxt = ""
if not self.recordingTitle and attr_name == "class" and "result__url" in attr_value.split():
self.recordingTitle = True
self.currsegmenttxt = ""
if not self.recordingDesc and attr_name == "class" and "result__snippet" in attr_value.split():
self.recordingDesc = True
self.currsegmenttxt = ""
def handle_endtag(self, tag):
if tag == "a" and self.recordingTitle:
self.recordingTitle = False
self.currentrytxt += self.currsegmenttxt.strip() + "\n"
self.currsegmenttxt = ""
if tag == "a" and self.recordingDesc:
self.recordingDesc = False
self.currentrytxt += self.currsegmenttxt.strip()
self.currsegmenttxt = ""
if self.currentrytxt != "":
self.results.append(self.currentrytxt.strip())
self.currentrytxt = ""
def handle_data(self, data):
if self.recordingTitle or self.recordingDesc:
self.currsegmenttxt += data
encoded_query = urllib.parse.quote(query)
search_url = f"https://html.duckduckgo.com/html/?q={encoded_query}"
try:
req = urllib.request.Request(search_url, headers={'User-Agent': 'Mozilla/5.0'})
with urllib.request.urlopen(req) as response:
search_html = response.read().decode('utf-8', errors='ignore')
parser = ExtractResultsParser()
parser.feed(search_html)
searchresults = parser.results[:num_results]
except Exception as e:
print(f"Error fetching URL {search_url}: {e}")
return ""
return searchresults
################################################################# #################################################################
### A hacky simple HTTP server simulating a kobold api by Concedo ### A hacky simple HTTP server simulating a kobold api by Concedo
### we are intentionally NOT using flask, because we want MINIMAL dependencies ### we are intentionally NOT using flask, because we want MINIMAL dependencies
@ -1797,8 +1864,6 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
def noscript_webui(self): def noscript_webui(self):
global modelbusy, sslvalid global modelbusy, sslvalid
import html
import urllib.parse as urlparse
parsed_url = urlparse.urlparse(self.path) parsed_url = urlparse.urlparse(self.path)
parsed_dict = urlparse.parse_qs(parsed_url.query) parsed_dict = urlparse.parse_qs(parsed_url.query)
reply = "" reply = ""
@ -2022,6 +2087,18 @@ Enter Prompt:<br>
}, },
}).encode()) }).encode())
elif self.path.startswith(("/websearch")):
if args.websearch:
parsed_url = urlparse.urlparse(self.path)
parsed_dict = urlparse.parse_qs(parsed_url.query)
searchstr = (parsed_dict['q'][0]) if 'q' in parsed_dict else ""
if args.debugmode:
print(f"Searching web for: {searchstr}")
searchres = websearch(searchstr)
response_body = (json.dumps(searchres).encode())
else:
response_body = (json.dumps([]).encode())
elif self.path=="/api" or self.path=="/docs" or self.path.startswith(('/api/?json=','/api?json=','/docs/?json=','/docs?json=')): elif self.path=="/api" or self.path=="/docs" or self.path.startswith(('/api/?json=','/api?json=','/docs/?json=','/docs?json=')):
content_type = 'text/html' content_type = 'text/html'
if embedded_kcpp_docs is None: if embedded_kcpp_docs is None:
@ -2765,6 +2842,7 @@ def show_gui():
host_var = ctk.StringVar(value="") host_var = ctk.StringVar(value="")
multiuser_var = ctk.IntVar(value=1) multiuser_var = ctk.IntVar(value=1)
multiplayer_var = ctk.IntVar(value=has_multiplayer) multiplayer_var = ctk.IntVar(value=has_multiplayer)
websearch_var = ctk.IntVar(value=0)
horde_name_var = ctk.StringVar(value="koboldcpp") horde_name_var = ctk.StringVar(value="koboldcpp")
horde_gen_var = ctk.StringVar(value=maxhordelen) horde_gen_var = ctk.StringVar(value=maxhordelen)
horde_context_var = ctk.StringVar(value=maxhordectx) horde_context_var = ctk.StringVar(value=maxhordectx)
@ -3274,6 +3352,7 @@ def show_gui():
makecheckbox(network_tab, "Quiet Mode", quietmode, 4,tooltiptxt="Prevents all generation related terminal output from being displayed.") makecheckbox(network_tab, "Quiet Mode", quietmode, 4,tooltiptxt="Prevents all generation related terminal output from being displayed.")
makecheckbox(network_tab, "NoCertify Mode (Insecure)", nocertifymode, 4, 1,tooltiptxt="Allows insecure SSL connections. Use this if you have cert errors and need to bypass certificate restrictions.") makecheckbox(network_tab, "NoCertify Mode (Insecure)", nocertifymode, 4, 1,tooltiptxt="Allows insecure SSL connections. Use this if you have cert errors and need to bypass certificate restrictions.")
makecheckbox(network_tab, "Shared Multiplayer", multiplayer_var, 5,tooltiptxt="Hosts a shared multiplayer session that others can join.") makecheckbox(network_tab, "Shared Multiplayer", multiplayer_var, 5,tooltiptxt="Hosts a shared multiplayer session that others can join.")
makecheckbox(network_tab, "Enable WebSearch", websearch_var, 5, 1,tooltiptxt="Enable the local search engine proxy so Web Searches can be done.")
makefileentry(network_tab, "SSL Cert:", "Select SSL cert.pem file",ssl_cert_var, 7, width=200 ,filetypes=[("Unencrypted Certificate PEM", "*.pem")], singlerow=True, singlecol=False,tooltiptxt="Select your unencrypted .pem SSL certificate file for https.\nCan be generated with OpenSSL.") makefileentry(network_tab, "SSL Cert:", "Select SSL cert.pem file",ssl_cert_var, 7, width=200 ,filetypes=[("Unencrypted Certificate PEM", "*.pem")], singlerow=True, singlecol=False,tooltiptxt="Select your unencrypted .pem SSL certificate file for https.\nCan be generated with OpenSSL.")
makefileentry(network_tab, "SSL Key:", "Select SSL key.pem file", ssl_key_var, 9, width=200, filetypes=[("Unencrypted Key PEM", "*.pem")], singlerow=True, singlecol=False, tooltiptxt="Select your unencrypted .pem SSL key file for https.\nCan be generated with OpenSSL.") makefileentry(network_tab, "SSL Key:", "Select SSL key.pem file", ssl_key_var, 9, width=200, filetypes=[("Unencrypted Key PEM", "*.pem")], singlerow=True, singlecol=False, tooltiptxt="Select your unencrypted .pem SSL key file for https.\nCan be generated with OpenSSL.")
@ -3523,6 +3602,7 @@ def show_gui():
args.host = host_var.get() args.host = host_var.get()
args.multiuser = multiuser_var.get() args.multiuser = multiuser_var.get()
args.multiplayer = (multiplayer_var.get()==1) args.multiplayer = (multiplayer_var.get()==1)
args.websearch = (websearch_var.get()==1)
if usehorde_var.get() != 0: if usehorde_var.get() != 0:
args.hordemodelname = horde_name_var.get() args.hordemodelname = horde_name_var.get()
@ -3700,6 +3780,7 @@ def show_gui():
host_var.set(dict["host"] if ("host" in dict and dict["host"]) else "") host_var.set(dict["host"] if ("host" in dict and dict["host"]) else "")
multiuser_var.set(dict["multiuser"] if ("multiuser" in dict) else 1) multiuser_var.set(dict["multiuser"] if ("multiuser" in dict) else 1)
multiplayer_var.set(dict["multiplayer"] if ("multiplayer" in dict) else 0) multiplayer_var.set(dict["multiplayer"] if ("multiplayer" in dict) else 0)
websearch_var.set(dict["websearch"] if ("websearch" in dict) else 0)
horde_name_var.set(dict["hordemodelname"] if ("hordemodelname" in dict and dict["hordemodelname"]) else "koboldcpp") horde_name_var.set(dict["hordemodelname"] if ("hordemodelname" in dict and dict["hordemodelname"]) else "koboldcpp")
horde_context_var.set(dict["hordemaxctx"] if ("hordemaxctx" in dict and dict["hordemaxctx"]) else maxhordectx) horde_context_var.set(dict["hordemaxctx"] if ("hordemaxctx" in dict and dict["hordemaxctx"]) else maxhordectx)
@ -4984,6 +5065,7 @@ if __name__ == '__main__':
advparser.add_argument("--promptlimit", help="Sets the maximum number of generated tokens, usable only with --prompt or --benchmark",metavar=('[token limit]'), type=int, default=100) advparser.add_argument("--promptlimit", help="Sets the maximum number of generated tokens, usable only with --prompt or --benchmark",metavar=('[token limit]'), type=int, default=100)
advparser.add_argument("--multiuser", help="Runs in multiuser mode, which queues incoming requests instead of blocking them.", metavar=('limit'), nargs='?', const=1, type=int, default=1) advparser.add_argument("--multiuser", help="Runs in multiuser mode, which queues incoming requests instead of blocking them.", metavar=('limit'), nargs='?', const=1, type=int, default=1)
advparser.add_argument("--multiplayer", help="Hosts a shared multiplayer session that others can join.", action='store_true') advparser.add_argument("--multiplayer", help="Hosts a shared multiplayer session that others can join.", action='store_true')
advparser.add_argument("--websearch", help="Enable the local search engine proxy so Web Searches can be done.", action='store_true')
advparser.add_argument("--remotetunnel", help="Uses Cloudflare to create a remote tunnel, allowing you to access koboldcpp remotely over the internet even behind a firewall.", action='store_true') advparser.add_argument("--remotetunnel", help="Uses Cloudflare to create a remote tunnel, allowing you to access koboldcpp remotely over the internet even behind a firewall.", action='store_true')
advparser.add_argument("--highpriority", help="Experimental flag. If set, increases the process CPU priority, potentially speeding up generation. Use caution.", action='store_true') advparser.add_argument("--highpriority", help="Experimental flag. If set, increases the process CPU priority, potentially speeding up generation. Use caution.", action='store_true')
advparser.add_argument("--foreground", help="Windows only. Sends the terminal to the foreground every time a new prompt is generated. This helps avoid some idle slowdown issues.", action='store_true') advparser.add_argument("--foreground", help="Windows only. Sends the terminal to the foreground every time a new prompt is generated. This helps avoid some idle slowdown issues.", action='store_true')