Merge remote-tracking branch 'upstream/main' into feature/elasticsearch-connector

This commit is contained in:
Anish Sarkar 2025-10-17 01:02:29 +05:30
commit 8e1e81ebae
12 changed files with 668 additions and 1 deletions

View file

@ -0,0 +1,73 @@
"""Add BAIDU_SEARCH_API to searchsourceconnectortype enum
Revision ID: 30
Revises: 29
Changes:
1. Add BAIDU_SEARCH_API value to searchsourceconnectortype enum
2. Add BAIDU_SEARCH_API value to documenttype enum for consistency
"""
from collections.abc import Sequence
from alembic import op
# revision identifiers, used by Alembic.
revision: str = "30"
down_revision: str | None = "29"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
def upgrade() -> None:
"""Add BAIDU_SEARCH_API to searchsourceconnectortype and documenttype enums."""
# Add BAIDU_SEARCH_API to searchsourceconnectortype enum
op.execute(
"""
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM pg_type t
JOIN pg_enum e ON t.oid = e.enumtypid
WHERE t.typname = 'searchsourceconnectortype' AND e.enumlabel = 'BAIDU_SEARCH_API'
) THEN
ALTER TYPE searchsourceconnectortype ADD VALUE 'BAIDU_SEARCH_API';
END IF;
END
$$;
"""
)
# Add BAIDU_SEARCH_API to documenttype enum for consistency
op.execute(
"""
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM pg_type t
JOIN pg_enum e ON t.oid = e.enumtypid
WHERE t.typname = 'documenttype' AND e.enumlabel = 'BAIDU_SEARCH_API'
) THEN
ALTER TYPE documenttype ADD VALUE 'BAIDU_SEARCH_API';
END IF;
END
$$;
"""
)
def downgrade() -> None:
"""
Downgrade is not supported for enum values in PostgreSQL.
Removing enum values can break existing data and is generally not safe.
To remove these values, you would need to:
1. Remove all references to BAIDU_SEARCH_API in the database
2. Recreate the enum type without BAIDU_SEARCH_API
3. Reapply all other enum values
This is intentionally left as a no-op for safety.
"""
pass

View file

@ -1057,6 +1057,32 @@ async def fetch_relevant_documents(
}
)
elif connector == "BAIDU_SEARCH_API":
(
source_object,
baidu_chunks,
) = await connector_service.search_baidu(
user_query=reformulated_query,
user_id=user_id,
search_space_id=search_space_id,
top_k=top_k,
)
# Add to sources and raw documents
if source_object:
all_sources.append(source_object)
all_raw_documents.extend(baidu_chunks)
# Stream found document count
if streaming_service and writer:
writer(
{
"yield_value": streaming_service.format_terminal_info_delta(
f"🇨🇳 Found {len(baidu_chunks)} Baidu Search results related to your query"
)
}
)
elif connector == "DISCORD_CONNECTOR":
(
source_object,

View file

@ -48,6 +48,7 @@ def get_connector_emoji(connector_name: str) -> str:
"DISCORD_CONNECTOR": "🗨️",
"TAVILY_API": "🔍",
"LINKUP_API": "🔗",
"BAIDU_SEARCH_API": "🇨🇳",
"GOOGLE_CALENDAR_CONNECTOR": "📅",
"AIRTABLE_CONNECTOR": "🗃️",
"LUMA_CONNECTOR": "",
@ -73,6 +74,7 @@ def get_connector_friendly_name(connector_name: str) -> str:
"DISCORD_CONNECTOR": "Discord",
"TAVILY_API": "Tavily Search",
"LINKUP_API": "Linkup Search",
"BAIDU_SEARCH_API": "Baidu Search",
"AIRTABLE_CONNECTOR": "Airtable",
"LUMA_CONNECTOR": "Luma",
"ELASTICSEARCH_CONNECTOR": "Elasticsearch",

View file

@ -58,6 +58,7 @@ class SearchSourceConnectorType(str, Enum):
TAVILY_API = "TAVILY_API"
SEARXNG_API = "SEARXNG_API"
LINKUP_API = "LINKUP_API"
BAIDU_SEARCH_API = "BAIDU_SEARCH_API" # Baidu AI Search API for Chinese web search
SLACK_CONNECTOR = "SLACK_CONNECTOR"
NOTION_CONNECTOR = "NOTION_CONNECTOR"
GITHUB_CONNECTOR = "GITHUB_CONNECTOR"

View file

@ -560,6 +560,226 @@ class ConnectorService:
return result_object, documents
async def search_baidu(
self,
user_query: str,
user_id: str,
search_space_id: int,
top_k: int = 20,
) -> tuple:
"""
Search using Baidu AI Search API and return both sources and documents.
Baidu AI Search provides intelligent search with automatic summarization.
We extract the raw search results (references) from the API response.
Args:
user_query: User's search query
user_id: User ID
search_space_id: Search space ID
top_k: Maximum number of results to return
Returns:
tuple: (sources_info_dict, documents_list)
"""
# Get Baidu connector configuration
baidu_connector = await self.get_connector_by_type(
user_id, SearchSourceConnectorType.BAIDU_SEARCH_API, search_space_id
)
if not baidu_connector:
return {
"id": 12,
"name": "Baidu Search",
"type": "BAIDU_SEARCH_API",
"sources": [],
}, []
config = baidu_connector.config or {}
api_key = config.get("BAIDU_API_KEY")
if not api_key:
print("ERROR: Baidu connector is missing BAIDU_API_KEY configuration")
print(f"Connector config: {config}")
return {
"id": 12,
"name": "Baidu Search",
"type": "BAIDU_SEARCH_API",
"sources": [],
}, []
# Optional configuration parameters
model = config.get("BAIDU_MODEL", "ernie-3.5-8k")
search_source = config.get("BAIDU_SEARCH_SOURCE", "baidu_search_v2")
enable_deep_search = config.get("BAIDU_ENABLE_DEEP_SEARCH", False)
# Baidu AI Search API endpoint
baidu_endpoint = "https://qianfan.baidubce.com/v2/ai_search/chat/completions"
# Prepare request headers
# Note: Baidu uses X-Appbuilder-Authorization instead of standard Authorization header
headers = {
"X-Appbuilder-Authorization": f"Bearer {api_key}",
"Content-Type": "application/json",
}
# Prepare request payload
# Calculate resource_type_filter top_k values
# Baidu v2 supports max 20 per type
max_per_type = min(top_k, 20)
payload = {
"messages": [{"role": "user", "content": user_query}],
"model": model,
"search_source": search_source,
"resource_type_filter": [
{"type": "web", "top_k": max_per_type},
{"type": "video", "top_k": max(1, max_per_type // 4)}, # Fewer videos
],
"stream": False, # Non-streaming for simpler processing
"enable_deep_search": enable_deep_search,
"enable_corner_markers": True, # Enable reference markers
}
try:
# Baidu AI Search may take longer as it performs search + summarization
# Increase timeout to 90 seconds
async with httpx.AsyncClient(timeout=90.0) as client:
response = await client.post(
baidu_endpoint,
headers=headers,
json=payload,
)
response.raise_for_status()
except httpx.TimeoutException as exc:
print(f"ERROR: Baidu API request timeout after 90s: {exc!r}")
print(f"Endpoint: {baidu_endpoint}")
return {
"id": 12,
"name": "Baidu Search",
"type": "BAIDU_SEARCH_API",
"sources": [],
}, []
except httpx.HTTPStatusError as exc:
print(f"ERROR: Baidu API HTTP Status Error: {exc.response.status_code}")
print(f"Response text: {exc.response.text[:500]}")
print(f"Request URL: {exc.request.url}")
return {
"id": 12,
"name": "Baidu Search",
"type": "BAIDU_SEARCH_API",
"sources": [],
}, []
except httpx.RequestError as exc:
print(f"ERROR: Baidu API Request Error: {type(exc).__name__}: {exc!r}")
print(f"Endpoint: {baidu_endpoint}")
return {
"id": 12,
"name": "Baidu Search",
"type": "BAIDU_SEARCH_API",
"sources": [],
}, []
except Exception as exc:
print(f"ERROR: Unexpected error calling Baidu API: {type(exc).__name__}: {exc!r}")
print(f"Endpoint: {baidu_endpoint}")
print(f"Payload: {payload}")
return {
"id": 12,
"name": "Baidu Search",
"type": "BAIDU_SEARCH_API",
"sources": [],
}, []
try:
data = response.json()
except ValueError as e:
print(f"ERROR: Failed to decode JSON response from Baidu AI Search: {e}")
print(f"Response status: {response.status_code}")
print(f"Response text: {response.text[:500]}") # First 500 chars
return {
"id": 12,
"name": "Baidu Search",
"type": "BAIDU_SEARCH_API",
"sources": [],
}, []
# Extract references (search results) from the response
baidu_references = data.get("references", [])
if "code" in data or "message" in data:
print(f"WARNING: Baidu API returned error - Code: {data.get('code')}, Message: {data.get('message')}")
if not baidu_references:
print("WARNING: No references found in Baidu API response")
print(f"Response keys: {list(data.keys())}")
return {
"id": 12,
"name": "Baidu Search",
"type": "BAIDU_SEARCH_API",
"sources": [],
}, []
sources_list: list[dict[str, Any]] = []
documents: list[dict[str, Any]] = []
async with self.counter_lock:
for reference in baidu_references:
# Extract basic fields
title = reference.get("title", "Baidu Search Result")
url = reference.get("url", "")
content = reference.get("content", "")
date = reference.get("date", "")
ref_type = reference.get("type", "web") # web, image, video
# Create a source entry
source = {
"id": self.source_id_counter,
"title": title,
"description": content[:300] if content else "", # Limit description length
"url": url,
}
sources_list.append(source)
# Prepare metadata
metadata = {
"url": url,
"date": date,
"type": ref_type,
"source": "BAIDU_SEARCH_API",
"web_anchor": reference.get("web_anchor", ""),
"website": reference.get("website", ""),
}
# Add type-specific metadata
if ref_type == "image" and reference.get("image"):
metadata["image"] = reference["image"]
elif ref_type == "video" and reference.get("video"):
metadata["video"] = reference["video"]
# Create a document entry
document = {
"chunk_id": self.source_id_counter,
"content": content,
"score": 1.0, # Baidu doesn't provide relevance scores
"document": {
"id": self.source_id_counter,
"title": title,
"document_type": "BAIDU_SEARCH_API",
"metadata": metadata,
},
}
documents.append(document)
self.source_id_counter += 1
result_object = {
"id": 12,
"name": "Baidu Search",
"type": "BAIDU_SEARCH_API",
"sources": sources_list,
}
return result_object, documents
async def search_slack(
self,
user_query: str,

View file

@ -434,6 +434,15 @@ def validate_connector_config(
},
},
"LINKUP_API": {"required": ["LINKUP_API_KEY"], "validators": {}},
"BAIDU_SEARCH_API": {
"required": ["BAIDU_API_KEY"],
"optional": [
"BAIDU_MODEL",
"BAIDU_SEARCH_SOURCE",
"BAIDU_ENABLE_DEEP_SEARCH",
],
"validators": {},
},
"SLACK_CONNECTOR": {"required": ["SLACK_BOT_TOKEN"], "validators": {}},
"NOTION_CONNECTOR": {
"required": ["NOTION_INTEGRATION_TOKEN"],

View file

@ -32,6 +32,11 @@ yarn-error.log*
# env files (can opt-in for committing if needed)
.env
.env.local
.env*.local
.env.development.local
.env.test.local
.env.production.local
# vercel
.vercel

View file

@ -0,0 +1,319 @@
"use client";
import { zodResolver } from "@hookform/resolvers/zod";
import { ArrowLeft, Check, Info, Loader2 } from "lucide-react";
import { motion } from "motion/react";
import { useParams, useRouter } from "next/navigation";
import { useState } from "react";
import { useForm } from "react-hook-form";
import { toast } from "sonner";
import * as z from "zod";
import { Alert, AlertDescription, AlertTitle } from "@/components/ui/alert";
import { Button } from "@/components/ui/button";
import {
Card,
CardContent,
CardDescription,
CardFooter,
CardHeader,
CardTitle,
} from "@/components/ui/card";
import {
Form,
FormControl,
FormDescription,
FormField,
FormItem,
FormLabel,
FormMessage,
} from "@/components/ui/form";
import { Input } from "@/components/ui/input";
import {
Select,
SelectContent,
SelectItem,
SelectTrigger,
SelectValue,
} from "@/components/ui/select";
import { Switch } from "@/components/ui/switch";
import { EnumConnectorName } from "@/contracts/enums/connector";
import { getConnectorIcon } from "@/contracts/enums/connectorIcons";
import { useSearchSourceConnectors } from "@/hooks/use-search-source-connectors";
// Define the form schema with Zod
const baiduSearchApiFormSchema = z.object({
name: z.string().min(3, {
message: "Connector name must be at least 3 characters.",
}),
api_key: z.string().min(10, {
message: "API key is required and must be valid.",
}),
model: z.string().optional(),
search_source: z.enum(["baidu_search_v1", "baidu_search_v2"]).optional(),
enable_deep_search: z.boolean().default(false),
});
// Define the type for the form values
type BaiduSearchApiFormValues = z.infer<typeof baiduSearchApiFormSchema>;
export default function BaiduSearchApiPage() {
const router = useRouter();
const params = useParams();
const searchSpaceId = params.search_space_id as string;
const [isSubmitting, setIsSubmitting] = useState(false);
const { createConnector } = useSearchSourceConnectors();
// Initialize the form
const form = useForm<BaiduSearchApiFormValues>({
resolver: zodResolver(baiduSearchApiFormSchema),
defaultValues: {
name: "Baidu Search Connector",
api_key: "",
model: "ernie-3.5-8k",
search_source: "baidu_search_v2",
enable_deep_search: false,
},
});
// Handle form submission
const onSubmit = async (values: BaiduSearchApiFormValues) => {
setIsSubmitting(true);
try {
// Build config object
const config: Record<string, unknown> = {
BAIDU_API_KEY: values.api_key,
};
// Add optional parameters if provided
if (values.model) {
config.BAIDU_MODEL = values.model;
}
if (values.search_source) {
config.BAIDU_SEARCH_SOURCE = values.search_source;
}
if (values.enable_deep_search !== undefined) {
config.BAIDU_ENABLE_DEEP_SEARCH = values.enable_deep_search;
}
await createConnector(
{
name: values.name,
connector_type: EnumConnectorName.BAIDU_SEARCH_API,
config,
is_indexable: false,
last_indexed_at: null,
},
parseInt(searchSpaceId)
);
toast.success("Baidu Search connector created successfully!");
// Navigate back to connectors page
router.push(`/dashboard/${searchSpaceId}/connectors`);
} catch (error) {
console.error("Error creating connector:", error);
toast.error(error instanceof Error ? error.message : "Failed to create connector");
} finally {
setIsSubmitting(false);
}
};
return (
<div className="container mx-auto py-8 max-w-3xl">
<Button
variant="ghost"
className="mb-6"
onClick={() => router.push(`/dashboard/${searchSpaceId}/connectors/add`)}
>
<ArrowLeft className="mr-2 h-4 w-4" />
Back to Connectors
</Button>
{/* Header */}
<div className="mb-8">
<div className="flex items-center gap-4">
<div className="flex h-12 w-12 items-center justify-center rounded-lg">
{getConnectorIcon(EnumConnectorName.BAIDU_SEARCH_API, "h-6 w-6")}
</div>
<div>
<h1 className="text-3xl font-bold tracking-tight">Connect Baidu Search</h1>
<p className="text-muted-foreground">
Connect Baidu AI Search for intelligent Chinese web search capabilities.
</p>
</div>
</div>
</div>
<motion.div
initial={{ opacity: 0, y: 20 }}
animate={{ opacity: 1, y: 0 }}
transition={{ duration: 0.5 }}
>
<Card className="border-2 border-border">
<CardHeader>
<CardTitle className="text-2xl font-bold">Connect Baidu Search</CardTitle>
<CardDescription>
Integrate with Baidu AI Search to enhance your search capabilities with
intelligent Chinese web search results.
</CardDescription>
</CardHeader>
<CardContent>
<Alert className="mb-6 bg-muted">
<Info className="h-4 w-4" />
<AlertTitle>API Key Required</AlertTitle>
<AlertDescription>
You'll need a Baidu AppBuilder API key to use this connector. You can get one by
signing up at{" "}
<a
href="https://qianfan.cloud.baidu.com/"
target="_blank"
rel="noopener noreferrer"
className="font-medium underline underline-offset-4"
>
qianfan.cloud.baidu.com
</a>
</AlertDescription>
</Alert>
<Form {...form}>
<form onSubmit={form.handleSubmit(onSubmit)} className="space-y-6">
<FormField
control={form.control}
name="name"
render={({ field }) => (
<FormItem>
<FormLabel>Connector Name</FormLabel>
<FormControl>
<Input placeholder="My Baidu Search Connector" {...field} />
</FormControl>
<FormDescription>A friendly name to identify this connector.</FormDescription>
<FormMessage />
</FormItem>
)}
/>
<FormField
control={form.control}
name="api_key"
render={({ field }) => (
<FormItem>
<FormLabel>Baidu AppBuilder API Key</FormLabel>
<FormControl>
<Input type="password" placeholder="Enter your Baidu API key" {...field} />
</FormControl>
<FormDescription>
Your API key will be encrypted and stored securely.
</FormDescription>
<FormMessage />
</FormItem>
)}
/>
<FormField
control={form.control}
name="model"
render={({ field }) => (
<FormItem>
<FormLabel>Model (Optional)</FormLabel>
<Select onValueChange={field.onChange} defaultValue={field.value}>
<FormControl>
<SelectTrigger>
<SelectValue placeholder="Select a model" />
</SelectTrigger>
</FormControl>
<SelectContent>
<SelectItem value="ernie-3.5-8k">ERNIE 3.5 8K</SelectItem>
<SelectItem value="ernie-4.5-turbo-32k">ERNIE 4.5 Turbo 32K</SelectItem>
<SelectItem value="ernie-4.5-turbo-128k">
ERNIE 4.5 Turbo 128K
</SelectItem>
<SelectItem value="deepseek-v3">DeepSeek V3</SelectItem>
<SelectItem value="qwen3-235b-a22b-instruct-2507">
Qwen3 235B
</SelectItem>
</SelectContent>
</Select>
<FormDescription>
The language model used for search summarization. Default: ERNIE 3.5 8K.
</FormDescription>
<FormMessage />
</FormItem>
)}
/>
<FormField
control={form.control}
name="search_source"
render={({ field }) => (
<FormItem>
<FormLabel>Search Source (Optional)</FormLabel>
<Select onValueChange={field.onChange} defaultValue={field.value}>
<FormControl>
<SelectTrigger>
<SelectValue placeholder="Select search source" />
</SelectTrigger>
</FormControl>
<SelectContent>
<SelectItem value="baidu_search_v1">Baidu Search V1</SelectItem>
<SelectItem value="baidu_search_v2">Baidu Search V2 (Recommended)</SelectItem>
</SelectContent>
</Select>
<FormDescription>
V2 provides better performance and richer content. Default: V2.
</FormDescription>
<FormMessage />
</FormItem>
)}
/>
<FormField
control={form.control}
name="enable_deep_search"
render={({ field }) => (
<FormItem className="flex flex-row items-center justify-between rounded-lg border p-4">
<div className="space-y-0.5">
<FormLabel className="text-base">Enable Deep Search</FormLabel>
<FormDescription>
Deep search retrieves up to 100 results per type (may incur additional
costs).
</FormDescription>
</div>
<FormControl>
<Switch checked={field.value} onCheckedChange={field.onChange} />
</FormControl>
</FormItem>
)}
/>
<div className="flex justify-end">
<Button type="submit" disabled={isSubmitting} className="w-full sm:w-auto">
{isSubmitting ? (
<>
<Loader2 className="mr-2 h-4 w-4 animate-spin" />
Connecting...
</>
) : (
<>
<Check className="mr-2 h-4 w-4" />
Connect Baidu Search
</>
)}
</Button>
</div>
</form>
</Form>
</CardContent>
<CardFooter className="flex flex-col items-start border-t bg-muted/50 px-6 py-4">
<h4 className="text-sm font-medium">What you get with Baidu Search:</h4>
<ul className="mt-2 list-disc pl-5 text-sm text-muted-foreground">
<li>Intelligent search tailored for Chinese web content</li>
<li>Real-time information from Baidu's search index</li>
<li>AI-powered summarization with source references</li>
<li>Support for web, video, and image search results</li>
</ul>
</CardFooter>
</Card>
</motion.div>
</div>
);
}

View file

@ -64,6 +64,12 @@ const connectorCategories: ConnectorCategory[] = [
title: "Elasticsearch",
description: "Connect to Elasticsearch to index and search documents, logs and metrics.",
icon: getConnectorIcon(EnumConnectorName.ELASTICSEARCH_CONNECTOR, "h-6 w-6"),
},
{
id: "baidu-search-api",
title: "Baidu Search",
description: "Search the Chinese web using Baidu AI Search API",
icon: getConnectorIcon(EnumConnectorName.BAIDU_SEARCH_API, "h-6 w-6"),
status: "available",
},
],

View file

@ -53,7 +53,10 @@ export function SourceDetailSheet({
// Check if this is a source type that should render directly from node
const isDirectRenderSource =
sourceType === "TAVILY_API" || sourceType === "LINKUP_API" || sourceType === "SEARXNG_API";
sourceType === "TAVILY_API" ||
sourceType === "LINKUP_API" ||
sourceType === "SEARXNG_API" ||
sourceType === "BAIDU_SEARCH_API";
useEffect(() => {
if (open && chunkId && !isDirectRenderSource) {

View file

@ -3,6 +3,7 @@ export enum EnumConnectorName {
TAVILY_API = "TAVILY_API",
SEARXNG_API = "SEARXNG_API",
LINKUP_API = "LINKUP_API",
BAIDU_SEARCH_API = "BAIDU_SEARCH_API",
SLACK_CONNECTOR = "SLACK_CONNECTOR",
NOTION_CONNECTOR = "NOTION_CONNECTOR",
GITHUB_CONNECTOR = "GITHUB_CONNECTOR",

View file

@ -35,6 +35,8 @@ export const getConnectorIcon = (connectorType: EnumConnectorName | string, clas
return <IconWorldWww {...iconProps} />;
case EnumConnectorName.SEARXNG_API:
return <Globe {...iconProps} />;
case EnumConnectorName.BAIDU_SEARCH_API:
return <Search {...iconProps} />;
case EnumConnectorName.SLACK_CONNECTOR:
return <IconBrandSlack {...iconProps} />;
case EnumConnectorName.NOTION_CONNECTOR: