mirror of
https://github.com/lfnovo/open-notebook.git
synced 2026-05-01 04:50:01 +00:00
improve search functions
This commit is contained in:
parent
b04761affc
commit
066c7a06e2
7 changed files with 287 additions and 38 deletions
134
migrations/4.surrealql
Normal file
134
migrations/4.surrealql
Normal file
|
|
@ -0,0 +1,134 @@
|
|||
|
||||
REMOVE FUNCTION IF EXISTS fn::text_search;
|
||||
|
||||
|
||||
DEFINE FUNCTION IF NOT EXISTS fn::text_search($query_text: string, $match_count: int, $sources:bool, $show_notes:bool) {
|
||||
|
||||
let $source_title_search =
|
||||
IF $sources {(
|
||||
SELECT id, title,
|
||||
search::highlight('`', '`', 1) as content,
|
||||
id as parent_id,
|
||||
math::max(search::score(1)) AS relevance
|
||||
FROM source
|
||||
WHERE title @1@ $query_text
|
||||
GROUP BY id)}
|
||||
ELSE { [] };
|
||||
|
||||
let $source_embedding_search =
|
||||
IF $sources {(
|
||||
SELECT source.id as id, source.title as title, search::highlight('`', '`', 1) as content, source.id as parent_id, math::max(search::score(1)) AS relevance
|
||||
FROM source_embedding
|
||||
WHERE content @1@ $query_text
|
||||
GROUP BY id)}
|
||||
ELSE { [] };
|
||||
|
||||
let $source_full_search =
|
||||
IF $sources {(
|
||||
SELECT id, title, search::highlight('`', '`', 1) as content, id as parent_id, math::max(search::score(1)) AS relevance
|
||||
FROM source
|
||||
WHERE full_text @1@ $query_text
|
||||
GROUP BY id)}
|
||||
ELSE { [] };
|
||||
|
||||
let $source_insight_search =
|
||||
IF $sources {(
|
||||
SELECT id, insight_type + " - " + (source.title OR '') as title, search::highlight('`', '`', 1) as content, id as parent_id, math::max(search::score(1)) AS relevance
|
||||
FROM source_insight
|
||||
WHERE content @1@ $query_text
|
||||
GROUP BY id)}
|
||||
ELSE { [] };
|
||||
|
||||
let $note_title_search =
|
||||
IF $show_notes {(
|
||||
SELECT id, title, search::highlight('`', '`', 1) as content, id as parent_id, math::max(search::score(1)) AS relevance
|
||||
FROM note
|
||||
WHERE title @1@ $query_text
|
||||
GROUP BY id)}
|
||||
ELSE { [] };
|
||||
|
||||
let $note_content_search =
|
||||
IF $show_notes {(
|
||||
SELECT id, title, search::highlight('`', '`', 1) as content, id as parent_id, math::max(search::score(1)) AS relevance
|
||||
FROM note
|
||||
WHERE content @1@ $query_text
|
||||
GROUP BY id)}
|
||||
ELSE { [] };
|
||||
|
||||
let $source_chunk_results = array::union($source_embedding_search, $source_full_search);
|
||||
|
||||
let $source_asset_results = array::union($source_title_search, $source_insight_search);
|
||||
|
||||
let $source_results = array::union($source_chunk_results, $source_asset_results );
|
||||
let $note_results = array::union($note_title_search, $note_content_search );
|
||||
let $final_results = array::union($source_results, $note_results );
|
||||
|
||||
RETURN (select id, parent_id, title, math::max(relevance) as relevance,
|
||||
array::flatten(content) as matches
|
||||
from $final_results where id is not None
|
||||
group by id, parent_id, title ORDER BY relevance DESC LIMIT $match_count);
|
||||
|
||||
};
|
||||
|
||||
REMOVE FUNCTION IF EXISTS fn::vector_search;
|
||||
|
||||
DEFINE FUNCTION IF NOT EXISTS fn::vector_search($query: array<float>, $match_count: int, $sources: bool, $show_notes: bool, $min_similarity: float) {
|
||||
let $source_embedding_search =
|
||||
IF $sources {(
|
||||
SELECT
|
||||
source.id as id,
|
||||
source.title as title,
|
||||
content,
|
||||
source.id as parent_id,
|
||||
vector::similarity::cosine(embedding, $query) as similarity
|
||||
FROM source_embedding
|
||||
WHERE vector::similarity::cosine(embedding, $query) >= $min_similarity
|
||||
ORDER BY similarity DESC
|
||||
LIMIT $match_count
|
||||
)}
|
||||
ELSE { [] };
|
||||
|
||||
let $source_insight_search =
|
||||
IF $sources {(
|
||||
SELECT
|
||||
id,
|
||||
insight_type + ' - ' + (source.title OR '') as title,
|
||||
content,
|
||||
source.id as parent_id,
|
||||
vector::similarity::cosine(embedding, $query) as similarity
|
||||
FROM source_insight
|
||||
WHERE vector::similarity::cosine(embedding, $query) >= $min_similarity
|
||||
ORDER BY similarity DESC
|
||||
LIMIT $match_count
|
||||
)}
|
||||
ELSE { [] };
|
||||
|
||||
|
||||
let $note_content_search =
|
||||
IF $show_notes {(
|
||||
SELECT
|
||||
id,
|
||||
title,
|
||||
content,
|
||||
id as parent_id,
|
||||
vector::similarity::cosine(embedding, $query) as similarity
|
||||
FROM note
|
||||
WHERE vector::similarity::cosine(embedding, $query) >= $min_similarity
|
||||
ORDER BY similarity DESC
|
||||
LIMIT $match_count
|
||||
)}
|
||||
ELSE { [] };
|
||||
|
||||
|
||||
let $all_results = array::union(
|
||||
array::union($source_embedding_search, $source_insight_search),
|
||||
$note_content_search
|
||||
);
|
||||
|
||||
|
||||
RETURN (select id, parent_id, title, math::max(similarity) as similarity,
|
||||
array::flatten(content) as matches
|
||||
from $all_results where id is not None
|
||||
group by id, parent_id, title ORDER BY similarity DESC LIMIT $match_count);
|
||||
|
||||
};
|
||||
139
migrations/4_down.surrealql
Normal file
139
migrations/4_down.surrealql
Normal file
|
|
@ -0,0 +1,139 @@
|
|||
|
||||
REMOVE FUNCTION IF EXISTS fn::vector_search;
|
||||
|
||||
DEFINE FUNCTION IF NOT EXISTS fn::vector_search($query: array<float>, $match_count: int, $sources: bool, $show_notes: bool, $min_similarity: float) {
|
||||
let $source_embedding_search =
|
||||
IF $sources {(
|
||||
SELECT
|
||||
id,
|
||||
source.title as title,
|
||||
content,
|
||||
source.id as parent_id,
|
||||
vector::similarity::cosine(embedding, $query) as similarity
|
||||
FROM source_embedding
|
||||
WHERE vector::similarity::cosine(embedding, $query) >= $min_similarity
|
||||
ORDER BY similarity DESC
|
||||
LIMIT $match_count
|
||||
)}
|
||||
ELSE { [] };
|
||||
|
||||
let $source_insight_search =
|
||||
IF $sources {(
|
||||
SELECT
|
||||
id,
|
||||
insight_type + ' - ' + source.title as title,
|
||||
content,
|
||||
source.id as parent_id,
|
||||
vector::similarity::cosine(embedding, $query) as similarity
|
||||
FROM source_insight
|
||||
WHERE vector::similarity::cosine(embedding, $query) >= $min_similarity
|
||||
ORDER BY similarity DESC
|
||||
LIMIT $match_count
|
||||
)}
|
||||
ELSE { [] };
|
||||
|
||||
|
||||
let $note_content_search =
|
||||
IF $show_notes {(
|
||||
SELECT
|
||||
id,
|
||||
title,
|
||||
content,
|
||||
id as parent_id,
|
||||
vector::similarity::cosine(embedding, $query) as similarity
|
||||
FROM note
|
||||
WHERE vector::similarity::cosine(embedding, $query) >= $min_similarity
|
||||
ORDER BY similarity DESC
|
||||
LIMIT $match_count
|
||||
)}
|
||||
ELSE { [] };
|
||||
|
||||
|
||||
let $all_results = array::union(
|
||||
array::union($source_embedding_search, $source_insight_search),
|
||||
$note_content_search
|
||||
);
|
||||
|
||||
|
||||
RETURN (
|
||||
SELECT
|
||||
id, title, content, parent_id,
|
||||
math::max(similarity) as similarity
|
||||
FROM $all_results
|
||||
GROUP BY id
|
||||
ORDER BY similarity DESC
|
||||
LIMIT $match_count
|
||||
);
|
||||
};
|
||||
|
||||
|
||||
REMOVE FUNCTION IF EXISTS fn::text_search;
|
||||
|
||||
|
||||
DEFINE FUNCTION IF NOT EXISTS fn::text_search($query_text: string, $match_count: int, $sources:bool, $show_notes:bool) {
|
||||
|
||||
let $source_title_search =
|
||||
IF $sources {(
|
||||
SELECT id, title,
|
||||
search::highlight('`', '`', 1) as content,
|
||||
id as parent_id,
|
||||
math::max(search::score(1)) AS relevance
|
||||
FROM source
|
||||
WHERE title @1@ $query_text
|
||||
GROUP BY id)}
|
||||
ELSE { [] };
|
||||
|
||||
let $source_embedding_search =
|
||||
IF $sources {(
|
||||
SELECT id as id, source.title as title, search::highlight('`', '`', 1) as content, source.id as parent_id, math::max(search::score(1)) AS relevance
|
||||
FROM source_embedding
|
||||
WHERE content @1@ $query_text
|
||||
GROUP BY id)}
|
||||
ELSE { [] };
|
||||
|
||||
let $source_full_search =
|
||||
IF $sources {(
|
||||
SELECT source.id as id, source.title as title, search::highlight('`', '`', 1) as content, source.id as parent_id, math::max(search::score(1)) AS relevance
|
||||
FROM source
|
||||
WHERE full_text @1@ $query_text
|
||||
GROUP BY id)}
|
||||
ELSE { [] };
|
||||
|
||||
let $source_insight_search =
|
||||
IF $sources {(
|
||||
SELECT id, insight_type + " - " + source.title as title, search::highlight('`', '`', 1) as content, source.id as parent_id, math::max(search::score(1)) AS relevance
|
||||
FROM source_insight
|
||||
WHERE content @1@ $query_text
|
||||
GROUP BY id)}
|
||||
ELSE { [] };
|
||||
|
||||
let $note_title_search =
|
||||
IF $show_notes {(
|
||||
SELECT id, title, search::highlight('`', '`', 1) as content, id as parent_id, math::max(search::score(1)) AS relevance
|
||||
FROM note
|
||||
WHERE title @1@ $query_text
|
||||
GROUP BY id)}
|
||||
ELSE { [] };
|
||||
|
||||
let $note_content_search =
|
||||
IF $show_notes {(
|
||||
SELECT id, title, search::highlight('`', '`', 1) as content, id as parent_id, math::max(search::score(1)) AS relevance
|
||||
FROM note
|
||||
WHERE content @1@ $query_text
|
||||
GROUP BY id)}
|
||||
ELSE { [] };
|
||||
|
||||
let $source_chunk_results = array::union($source_embedding_search, $source_full_search);
|
||||
|
||||
let $source_asset_results = array::union($source_title_search, $source_insight_search);
|
||||
|
||||
let $source_results = array::union($source_chunk_results, $source_asset_results );
|
||||
let $note_results = array::union($note_title_search, $note_content_search );
|
||||
let $final_results = array::union($source_results, $note_results );
|
||||
|
||||
RETURN (SELECT id, title, content, parent_id, math::max(relevance) as relevance from $final_results
|
||||
where id is not None
|
||||
group by id, title, content, parent_id ORDER BY relevance DESC LIMIT $match_count);
|
||||
|
||||
|
||||
};
|
||||
|
|
@ -22,6 +22,7 @@ class MigrationManager:
|
|||
Migration.from_file("migrations/1.surrealql"),
|
||||
Migration.from_file("migrations/2.surrealql"),
|
||||
Migration.from_file("migrations/3.surrealql"),
|
||||
Migration.from_file("migrations/4.surrealql"),
|
||||
]
|
||||
self.down_migrations = [
|
||||
Migration.from_file(
|
||||
|
|
@ -29,6 +30,7 @@ class MigrationManager:
|
|||
),
|
||||
Migration.from_file("migrations/2_down.surrealql"),
|
||||
Migration.from_file("migrations/3_down.surrealql"),
|
||||
Migration.from_file("migrations/4_down.surrealql"),
|
||||
]
|
||||
self.runner = MigrationRunner(
|
||||
up_migrations=self.up_migrations,
|
||||
|
|
|
|||
|
|
@ -7,7 +7,6 @@ from langchain_core.runnables import (
|
|||
)
|
||||
from langgraph.graph import END, START, StateGraph
|
||||
from langgraph.types import Send
|
||||
from loguru import logger
|
||||
from pydantic import BaseModel, Field
|
||||
from typing_extensions import TypedDict
|
||||
|
||||
|
|
@ -63,7 +62,6 @@ async def call_model_with_messages(state: ThreadState, config: RunnableConfig) -
|
|||
)
|
||||
# model = model.bind_tools(tools)
|
||||
ai_message = (model | parser).invoke(system_prompt)
|
||||
logger.debug(ai_message)
|
||||
return {"strategy": ai_message}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -280,12 +280,6 @@ class OpenAILanguageModel(LanguageModel):
|
|||
Convert the language model to a LangChain chat model.
|
||||
"""
|
||||
|
||||
data = {
|
||||
"model": self.model_name,
|
||||
"top_p": self.top_p,
|
||||
"temperature": self.temperature,
|
||||
}
|
||||
|
||||
kwargs = self.kwargs.copy() # Make a copy to avoid modifying the original
|
||||
if self.json:
|
||||
kwargs["response_format"] = {"type": "json_object"}
|
||||
|
|
@ -293,19 +287,19 @@ class OpenAILanguageModel(LanguageModel):
|
|||
# Set the token limit in kwargs with the appropriate key
|
||||
if self.model_name in ["o1-mini", "o1-preview"]:
|
||||
kwargs["max_completion_tokens"] = self.max_tokens
|
||||
data["top_p"] = 1
|
||||
data["streaming"] = False
|
||||
data["max_tokens"] = None
|
||||
top_p = 1
|
||||
streaming = False
|
||||
max_tokens = None
|
||||
else:
|
||||
data["max_tokens"] = self.max_tokens
|
||||
data["top_p"] = self.top_p
|
||||
data["streaming"] = self.streaming
|
||||
max_tokens = self.max_tokens
|
||||
top_p = self.top_p
|
||||
streaming = self.streaming
|
||||
|
||||
return ChatOpenAI(
|
||||
model_name=data.get("model_name"),
|
||||
temperature=data.get("temperature"),
|
||||
streaming=data.get("streaming"),
|
||||
max_tokens=data.get("max_tokens"),
|
||||
top_p=data.get("top_p"),
|
||||
model=self.model_name,
|
||||
temperature=self.temperature,
|
||||
streaming=streaming,
|
||||
max_tokens=max_tokens,
|
||||
top_p=top_p,
|
||||
model_kwargs=kwargs,
|
||||
)
|
||||
|
|
|
|||
19
poetry.lock
generated
19
poetry.lock
generated
|
|
@ -1826,23 +1826,6 @@ files = [
|
|||
[package.extras]
|
||||
tests = ["freezegun", "pytest", "pytest-cov"]
|
||||
|
||||
[[package]]
|
||||
name = "icecream"
|
||||
version = "2.1.3"
|
||||
description = "Never use print() to debug again; inspect variables, expressions, and program execution with a single, simple function call."
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "icecream-2.1.3-py2.py3-none-any.whl", hash = "sha256:757aec31ad4488b949bc4f499d18e6e5973c40cc4d4fc607229e78cfaec94c34"},
|
||||
{file = "icecream-2.1.3.tar.gz", hash = "sha256:0aa4a7c3374ec36153a1d08f81e3080e83d8ac1eefd97d2f4fe9544e8f9b49de"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
asttokens = ">=2.0.1"
|
||||
colorama = ">=0.3.9"
|
||||
executing = ">=0.3.1"
|
||||
pygments = ">=2.2.0"
|
||||
|
||||
[[package]]
|
||||
name = "identify"
|
||||
version = "2.6.2"
|
||||
|
|
@ -6465,4 +6448,4 @@ type = ["pytest-mypy"]
|
|||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.11"
|
||||
content-hash = "93b2d5c2ae9dd34b47c12f14b07b76d7d48c57c5eec78b09ae08a1d3a3e747dd"
|
||||
content-hash = "b672f17cddbf990c0d05737cc796ae92835864702a2eeee34732152ca796a0c7"
|
||||
|
|
|
|||
|
|
@ -17,7 +17,6 @@ streamlit = "^1.39.0"
|
|||
watchdog = "^5.0.3"
|
||||
pydantic = "^2.9.2"
|
||||
loguru = "^0.7.2"
|
||||
icecream = "^2.1.3"
|
||||
langchain = "^0.3.3"
|
||||
langgraph = "^0.2.38"
|
||||
humanize = "^4.11.0"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue