mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 01:24:36 +00:00
websearch functional
This commit is contained in:
parent
709dab6289
commit
6026501ed2
1 changed files with 10 additions and 3 deletions
13
koboldcpp.py
13
koboldcpp.py
|
@ -58,7 +58,7 @@ maxhordelen = 400
|
||||||
modelbusy = threading.Lock()
|
modelbusy = threading.Lock()
|
||||||
requestsinqueue = 0
|
requestsinqueue = 0
|
||||||
defaultport = 5001
|
defaultport = 5001
|
||||||
KcppVersion = "1.80.3"
|
KcppVersion = "1.81"
|
||||||
showdebug = True
|
showdebug = True
|
||||||
guimode = False
|
guimode = False
|
||||||
showsamplerwarning = True
|
showsamplerwarning = True
|
||||||
|
@ -1310,6 +1310,11 @@ def websearch(query):
|
||||||
results = list(executor.map(fetch_searched_webpage, urls))
|
results = list(executor.map(fetch_searched_webpage, urls))
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
def normalize_page_text(text):
|
||||||
|
text = re.sub(r'\s+([.,!?])', r'\1', text) # Remove spaces before punctuation
|
||||||
|
text = re.sub(r'([.,!?])([^\s])', r'\1 \2', text) # Ensure a single space follows punctuation, if not at the end of a line
|
||||||
|
return text
|
||||||
|
|
||||||
class VisibleTextParser(HTMLParser):
|
class VisibleTextParser(HTMLParser):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
@ -1393,12 +1398,14 @@ def websearch(query):
|
||||||
parser2 = VisibleTextParser()
|
parser2 = VisibleTextParser()
|
||||||
parser2.feed(html_content)
|
parser2.feed(html_content)
|
||||||
scraped = parser2.get_text().strip()
|
scraped = parser2.get_text().strip()
|
||||||
|
scraped = normalize_page_text(scraped)
|
||||||
|
desc = normalize_page_text(desc)
|
||||||
s = difflib.SequenceMatcher(None, scraped.lower(), desc.lower(), autojunk=False)
|
s = difflib.SequenceMatcher(None, scraped.lower(), desc.lower(), autojunk=False)
|
||||||
matches = s.find_longest_match(0, len(scraped), 0, desclen)
|
matches = s.find_longest_match(0, len(scraped), 0, desclen)
|
||||||
if matches.size > 100 and desclen-matches.size < 100: #good enough match
|
if matches.size > 100 and desclen-matches.size < 100: #good enough match
|
||||||
# expand description by some chars both sides
|
# expand description by some chars both sides
|
||||||
expandamtbefore = 250
|
expandamtbefore = 200
|
||||||
expandamtafter = 750
|
expandamtafter = 800
|
||||||
startpt = matches.a - expandamtbefore
|
startpt = matches.a - expandamtbefore
|
||||||
startpt = 0 if startpt < 0 else startpt
|
startpt = 0 if startpt < 0 else startpt
|
||||||
endpt = matches.a + expandamtafter + desclen
|
endpt = matches.a + expandamtafter + desclen
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue