select repos when adding gh connector

This commit is contained in:
Adamsmith6300 2025-04-16 19:59:38 -07:00
parent 2b7a1b1082
commit ae8c74a5aa
4 changed files with 330 additions and 138 deletions

View file

@ -9,7 +9,7 @@ POST /search-source-connectors/{connector_id}/index - Index content from a conne
Note: Each user can have only one connector of each type (SERPER_API, TAVILY_API, SLACK_CONNECTOR, NOTION_CONNECTOR).
"""
from fastapi import APIRouter, Depends, HTTPException, Query, BackgroundTasks
from fastapi import APIRouter, Depends, HTTPException, Query, BackgroundTasks, Body
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.future import select
from sqlalchemy.exc import IntegrityError
@ -18,8 +18,9 @@ from app.db import get_async_session, User, SearchSourceConnector, SearchSourceC
from app.schemas import SearchSourceConnectorCreate, SearchSourceConnectorUpdate, SearchSourceConnectorRead
from app.users import current_active_user
from app.utils.check_ownership import check_ownership
from pydantic import ValidationError
from pydantic import ValidationError, BaseModel, Field
from app.tasks.connectors_indexing_tasks import index_slack_messages, index_notion_pages, index_github_repos
from app.connectors.github_connector import GitHubConnector
from datetime import datetime, timezone
import logging
@ -28,6 +29,34 @@ logger = logging.getLogger(__name__)
router = APIRouter()
# --- New Schema for GitHub PAT ---
class GitHubPATRequest(BaseModel):
github_pat: str = Field(..., description="GitHub Personal Access Token")
# --- New Endpoint to list GitHub Repositories ---
@router.post("/github/repositories/", response_model=List[Dict[str, Any]])
async def list_github_repositories(
pat_request: GitHubPATRequest,
user: User = Depends(current_active_user) # Ensure the user is logged in
):
"""
Fetches a list of repositories accessible by the provided GitHub PAT.
The PAT is used for this request only and is not stored.
"""
try:
# Initialize GitHubConnector with the provided PAT
github_client = GitHubConnector(token=pat_request.github_pat)
# Fetch repositories
repositories = github_client.get_user_repositories()
return repositories
except ValueError as e:
# Handle invalid token error specifically
logger.error(f"GitHub PAT validation failed for user {user.id}: {str(e)}")
raise HTTPException(status_code=400, detail=f"Invalid GitHub PAT: {str(e)}")
except Exception as e:
logger.error(f"Failed to fetch GitHub repositories for user {user.id}: {str(e)}")
raise HTTPException(status_code=500, detail="Failed to fetch GitHub repositories.")
@router.post("/search-source-connectors/", response_model=SearchSourceConnectorRead)
async def create_search_source_connector(
connector: SearchSourceConnectorCreate,

View file

@ -4,7 +4,6 @@ from typing import Dict, Any
from pydantic import BaseModel, field_validator
from .base import IDModel, TimestampModel
from app.db import SearchSourceConnectorType
from fastapi import HTTPException
class SearchSourceConnectorBase(BaseModel):
name: str
@ -59,14 +58,19 @@ class SearchSourceConnectorBase(BaseModel):
raise ValueError("NOTION_INTEGRATION_TOKEN cannot be empty")
elif connector_type == SearchSourceConnectorType.GITHUB_CONNECTOR:
# For GITHUB_CONNECTOR, only allow GITHUB_PAT
allowed_keys = ["GITHUB_PAT"]
# For GITHUB_CONNECTOR, only allow GITHUB_PAT and repo_full_names
allowed_keys = ["GITHUB_PAT", "repo_full_names"]
if set(config.keys()) != set(allowed_keys):
raise ValueError(f"For GITHUB_CONNECTOR connector type, config must only contain these keys: {allowed_keys}")
# Ensure the token is not empty
if not config.get("GITHUB_PAT"):
raise ValueError("GITHUB_PAT cannot be empty")
# Ensure the repo_full_names is present and is a non-empty list
repo_full_names = config.get("repo_full_names")
if not isinstance(repo_full_names, list) or not repo_full_names:
raise ValueError("repo_full_names must be a non-empty list of strings")
return config

View file

@ -1,4 +1,4 @@
from typing import Optional, List, Dict, Any, Tuple
from typing import Optional, Tuple
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy.future import select
@ -626,10 +626,15 @@ async def index_github_repos(
if not connector:
return 0, f"Connector with ID {connector_id} not found or is not a GitHub connector"
# 2. Get the GitHub PAT from the connector config
# 2. Get the GitHub PAT and selected repositories from the connector config
github_pat = connector.config.get("GITHUB_PAT")
repo_full_names_to_index = connector.config.get("repo_full_names")
if not github_pat:
return 0, "GitHub Personal Access Token (PAT) not found in connector config"
if not repo_full_names_to_index or not isinstance(repo_full_names_to_index, list):
return 0, "'repo_full_names' not found or is not a list in connector config"
# 3. Initialize GitHub connector client
try:
@ -637,13 +642,10 @@ async def index_github_repos(
except ValueError as e:
return 0, f"Failed to initialize GitHub client: {str(e)}"
# 4. Get list of accessible repositories
repositories = github_client.get_user_repositories()
if not repositories:
logger.info("No accessible GitHub repositories found for the provided token.")
return 0, "No accessible GitHub repositories found."
logger.info(f"Found {len(repositories)} repositories to potentially index.")
# 4. Validate selected repositories
# For simplicity, we'll proceed with the list provided.
# If a repo is inaccessible, get_repository_files will likely fail gracefully later.
logger.info(f"Starting indexing for {len(repo_full_names_to_index)} selected repositories.")
# 5. Get existing documents for this search space and connector type to prevent duplicates
existing_docs_result = await session.execute(
@ -658,11 +660,10 @@ async def index_github_repos(
existing_docs_lookup = {doc.document_metadata.get("full_path"): doc for doc in existing_docs if doc.document_metadata.get("full_path")}
logger.info(f"Found {len(existing_docs_lookup)} existing GitHub documents in database for search space {search_space_id}")
# 6. Iterate through repositories and index files
for repo_info in repositories:
repo_full_name = repo_info.get("full_name")
if not repo_full_name:
logger.warning(f"Skipping repository with missing full_name: {repo_info.get('name')}")
# 6. Iterate through selected repositories and index files
for repo_full_name in repo_full_names_to_index:
if not repo_full_name or not isinstance(repo_full_name, str):
logger.warning(f"Skipping invalid repository entry: {repo_full_name}")
continue
logger.info(f"Processing repository: {repo_full_name}")

View file

@ -7,7 +7,7 @@ import { zodResolver } from "@hookform/resolvers/zod";
import { useForm } from "react-hook-form";
import * as z from "zod";
import { toast } from "sonner";
import { ArrowLeft, Check, Info, Loader2, Github } from "lucide-react";
import { ArrowLeft, Check, Info, Loader2, Github, CircleAlert, ListChecks } from "lucide-react";
// Assuming useSearchSourceConnectors hook exists and works similarly
import { useSearchSourceConnectors } from "@/hooks/useSearchSourceConnectors";
@ -42,9 +42,10 @@ import {
AccordionTrigger,
} from "@/components/ui/accordion";
import { Tabs, TabsContent, TabsList, TabsTrigger } from "@/components/ui/tabs";
import { Checkbox } from "@/components/ui/checkbox";
// Define the form schema with Zod for GitHub
const githubConnectorFormSchema = z.object({
// Define the form schema with Zod for GitHub PAT entry step
const githubPatFormSchema = z.object({
name: z.string().min(3, {
message: "Connector name must be at least 3 characters.",
}),
@ -58,61 +59,144 @@ const githubConnectorFormSchema = z.object({
});
// Define the type for the form values
type GithubConnectorFormValues = z.infer<typeof githubConnectorFormSchema>;
type GithubPatFormValues = z.infer<typeof githubPatFormSchema>;
// Type for fetched GitHub repositories
interface GithubRepo {
id: number;
name: string;
full_name: string;
private: boolean;
url: string;
description: string | null;
last_updated: string | null;
}
export default function GithubConnectorPage() {
const router = useRouter();
const params = useParams();
const searchSpaceId = params.search_space_id as string;
const [isSubmitting, setIsSubmitting] = useState(false);
const { createConnector } = useSearchSourceConnectors(); // Assuming this hook exists
const [step, setStep] = useState<'enter_pat' | 'select_repos'>('enter_pat');
const [isFetchingRepos, setIsFetchingRepos] = useState(false);
const [isCreatingConnector, setIsCreatingConnector] = useState(false);
const [repositories, setRepositories] = useState<GithubRepo[]>([]);
const [selectedRepos, setSelectedRepos] = useState<string[]>([]);
const [connectorName, setConnectorName] = useState<string>("GitHub Connector");
const [validatedPat, setValidatedPat] = useState<string>(""); // Store the validated PAT
// Initialize the form
const form = useForm<GithubConnectorFormValues>({
resolver: zodResolver(githubConnectorFormSchema),
const { createConnector } = useSearchSourceConnectors();
// Initialize the form for PAT entry
const form = useForm<GithubPatFormValues>({
resolver: zodResolver(githubPatFormSchema),
defaultValues: {
name: "GitHub Connector",
name: connectorName,
github_pat: "",
},
});
// Handle form submission
const onSubmit = async (values: GithubConnectorFormValues) => {
setIsSubmitting(true);
// Function to fetch repositories using the new backend endpoint
const fetchRepositories = async (values: GithubPatFormValues) => {
setIsFetchingRepos(true);
setConnectorName(values.name); // Store the name
setValidatedPat(values.github_pat); // Store the PAT temporarily
try {
const token = localStorage.getItem('surfsense_bearer_token');
if (!token) {
throw new Error('No authentication token found');
}
const response = await fetch(
`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/github/repositories/`,
{
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${token}`
},
body: JSON.stringify({ github_pat: values.github_pat })
}
);
if (!response.ok) {
const errorData = await response.json();
throw new Error(errorData.detail || `Failed to fetch repositories: ${response.statusText}`);
}
const data: GithubRepo[] = await response.json();
setRepositories(data);
setStep('select_repos'); // Move to the next step
toast.success(`Found ${data.length} repositories.`);
} catch (error) {
console.error("Error fetching GitHub repositories:", error);
const errorMessage = error instanceof Error ? error.message : "Failed to fetch repositories. Please check the PAT and try again.";
toast.error(errorMessage);
} finally {
setIsFetchingRepos(false);
}
};
// Handle final connector creation
const handleCreateConnector = async () => {
if (selectedRepos.length === 0) {
toast.warning("Please select at least one repository to index.");
return;
}
setIsCreatingConnector(true);
try {
await createConnector({
name: values.name,
name: connectorName, // Use the stored name
connector_type: "GITHUB_CONNECTOR",
config: {
GITHUB_PAT: values.github_pat,
GITHUB_PAT: validatedPat, // Use the stored validated PAT
repo_full_names: selectedRepos, // Add the selected repo names
},
is_indexable: true, // GitHub connector is indexable
last_indexed_at: null, // New connector hasn't been indexed
is_indexable: true,
last_indexed_at: null,
});
toast.success("GitHub connector created successfully!");
// Navigate back to connectors management page (or the add page)
router.push(`/dashboard/${searchSpaceId}/connectors`);
} catch (error) { // Added type check for error
} catch (error) {
console.error("Error creating GitHub connector:", error);
// Display specific backend error message if available
const errorMessage = error instanceof Error ? error.message : "Failed to create GitHub connector. Please check the PAT and permissions.";
const errorMessage = error instanceof Error ? error.message : "Failed to create GitHub connector.";
toast.error(errorMessage);
} finally {
setIsSubmitting(false);
setIsCreatingConnector(false);
}
};
// Handle checkbox changes
const handleRepoSelection = (repoFullName: string, checked: boolean) => {
setSelectedRepos(prev =>
checked
? [...prev, repoFullName]
: prev.filter(name => name !== repoFullName)
);
};
return (
<div className="container mx-auto py-8 max-w-3xl">
<Button
variant="ghost"
className="mb-6"
onClick={() => router.push(`/dashboard/${searchSpaceId}/connectors/add`)}
onClick={() => {
if (step === 'select_repos') {
// Go back to PAT entry, clear sensitive/fetched data
setStep('enter_pat');
setRepositories([]);
setSelectedRepos([]);
setValidatedPat("");
// Reset form PAT field, keep name
form.reset({ name: connectorName, github_pat: "" });
} else {
router.push(`/dashboard/${searchSpaceId}/connectors/add`);
}
}}
>
<ArrowLeft className="mr-2 h-4 w-4" />
Back to Add Connectors
{step === 'select_repos' ? "Back to PAT Entry" : "Back to Add Connectors"}
</Button>
<motion.div
@ -129,97 +213,174 @@ export default function GithubConnectorPage() {
<TabsContent value="connect">
<Card className="border-2 border-border">
<CardHeader>
<CardTitle className="text-2xl font-bold flex items-center gap-2"><Github className="h-6 w-6" /> Connect GitHub Account</CardTitle>
<CardTitle className="text-2xl font-bold flex items-center gap-2">
{step === 'enter_pat' ? <Github className="h-6 w-6" /> : <ListChecks className="h-6 w-6" />}
{step === 'enter_pat' ? "Connect GitHub Account" : "Select Repositories to Index"}
</CardTitle>
<CardDescription>
Integrate with GitHub using a Personal Access Token (PAT) to search and retrieve information from accessible repositories. This connector can index your code and documentation.
{step === 'enter_pat'
? "Provide a name and GitHub Personal Access Token (PAT) to fetch accessible repositories."
: `Select which repositories you want SurfSense to index for search. Found ${repositories.length} repositories accessible via your PAT.`
}
</CardDescription>
</CardHeader>
<CardContent>
<Alert className="mb-6 bg-muted">
<Info className="h-4 w-4" />
<AlertTitle>GitHub Personal Access Token (PAT) Required</AlertTitle>
<AlertDescription>
You'll need a GitHub PAT with the appropriate scopes (e.g., 'repo') to use this connector. You can create one from your
<a
href="https://github.com/settings/personal-access-tokens"
target="_blank"
rel="noopener noreferrer"
className="font-medium underline underline-offset-4 ml-1"
>
GitHub Developer Settings
</a>.
</AlertDescription>
</Alert>
<Form {...form}>
<form onSubmit={form.handleSubmit(onSubmit)} className="space-y-6">
<FormField
control={form.control}
name="name"
render={({ field }) => (
<FormItem>
<FormLabel>Connector Name</FormLabel>
<FormControl>
<Input placeholder="My GitHub Connector" {...field} />
</FormControl>
<FormDescription>
A friendly name to identify this GitHub connection.
</FormDescription>
<FormMessage />
</FormItem>
)}
/>
<Form {...form}>
{step === 'enter_pat' && (
<CardContent>
<Alert className="mb-6 bg-muted">
<Info className="h-4 w-4" />
<AlertTitle>GitHub Personal Access Token (PAT) Required</AlertTitle>
<AlertDescription>
You'll need a GitHub PAT with the appropriate scopes (e.g., 'repo') to fetch repositories. You can create one from your{' '}
<a
href="https://github.com/settings/personal-access-tokens"
target="_blank"
rel="noopener noreferrer"
className="font-medium underline underline-offset-4"
>
GitHub Developer Settings
</a>. The PAT will be used to fetch repositories and then stored securely to enable indexing.
</AlertDescription>
</Alert>
<FormField
control={form.control}
name="github_pat"
render={({ field }) => (
<FormItem>
<FormLabel>GitHub Personal Access Token (PAT)</FormLabel>
<FormControl>
<Input
type="password"
placeholder="ghp_... or github_pat_..."
{...field}
/>
</FormControl>
<FormDescription>
Your GitHub PAT will be encrypted and stored securely. Ensure it has the necessary 'repo' scopes.
</FormDescription>
<FormMessage />
</FormItem>
)}
/>
<div className="flex justify-end">
<Button
type="submit"
disabled={isSubmitting}
className="w-full sm:w-auto"
>
{isSubmitting ? (
<>
<Loader2 className="mr-2 h-4 w-4 animate-spin" />
Connecting...
</>
) : (
<>
<Check className="mr-2 h-4 w-4" />
Connect GitHub
</>
<form onSubmit={form.handleSubmit(fetchRepositories)} className="space-y-6">
<FormField
control={form.control}
name="name"
render={({ field }) => (
<FormItem>
<FormLabel>Connector Name</FormLabel>
<FormControl>
<Input placeholder="My GitHub Connector" {...field} />
</FormControl>
<FormDescription>
A friendly name to identify this GitHub connection.
</FormDescription>
<FormMessage />
</FormItem>
)}
</Button>
</div>
</form>
</Form>
</CardContent>
/>
<FormField
control={form.control}
name="github_pat"
render={({ field }) => (
<FormItem>
<FormLabel>GitHub Personal Access Token (PAT)</FormLabel>
<FormControl>
<Input
type="password"
placeholder="ghp_... or github_pat_..."
{...field}
/>
</FormControl>
<FormDescription>
Enter your GitHub PAT here to fetch your repositories. It will be stored encrypted later.
</FormDescription>
<FormMessage />
</FormItem>
)}
/>
<div className="flex justify-end">
<Button
type="submit"
disabled={isFetchingRepos}
className="w-full sm:w-auto"
>
{isFetchingRepos ? (
<>
<Loader2 className="mr-2 h-4 w-4 animate-spin" />
Fetching Repositories...
</>
) : (
"Fetch Repositories"
)}
</Button>
</div>
</form>
</CardContent>
)}
{step === 'select_repos' && (
<CardContent>
{repositories.length === 0 ? (
<Alert variant="destructive">
<CircleAlert className="h-4 w-4" />
<AlertTitle>No Repositories Found</AlertTitle>
<AlertDescription>
No repositories were found or accessible with the provided PAT. Please check the token and its permissions, then go back and try again.
</AlertDescription>
</Alert>
) : (
<div className="space-y-4">
<FormLabel>Repositories ({selectedRepos.length} selected)</FormLabel>
<div className="h-64 w-full rounded-md border p-4 overflow-y-auto">
{repositories.map((repo) => (
<div key={repo.id} className="flex items-center space-x-2 mb-2 py-1">
<Checkbox
id={`repo-${repo.id}`}
checked={selectedRepos.includes(repo.full_name)}
onCheckedChange={(checked) => handleRepoSelection(repo.full_name, !!checked)}
/>
<label
htmlFor={`repo-${repo.id}`}
className="text-sm font-medium leading-none peer-disabled:cursor-not-allowed peer-disabled:opacity-70"
>
{repo.full_name} {repo.private && "(Private)"}
</label>
</div>
))}
</div>
<FormDescription>
Select the repositories you wish to index. Only checked repositories will be processed.
</FormDescription>
<div className="flex justify-between items-center pt-4">
<Button
variant="outline"
onClick={() => {
setStep('enter_pat');
setRepositories([]);
setSelectedRepos([]);
setValidatedPat("");
form.reset({ name: connectorName, github_pat: "" });
}}
>
Back
</Button>
<Button
onClick={handleCreateConnector}
disabled={isCreatingConnector || selectedRepos.length === 0}
className="w-full sm:w-auto"
>
{isCreatingConnector ? (
<>
<Loader2 className="mr-2 h-4 w-4 animate-spin" />
Creating Connector...
</>
) : (
<>
<Check className="mr-2 h-4 w-4" />
Create Connector
</>
)}
</Button>
</div>
</div>
)}
</CardContent>
)}
</Form>
<CardFooter className="flex flex-col items-start border-t bg-muted/50 px-6 py-4">
<h4 className="text-sm font-medium">What you get with GitHub integration:</h4>
<ul className="mt-2 list-disc pl-5 text-sm text-muted-foreground">
<li>Search through code and documentation in your repositories</li>
<li>Search through code and documentation in your selected repositories</li>
<li>Access READMEs, Markdown files, and common code files</li>
<li>Connect your project knowledge directly to your search space</li>
<li>Index your repositories for enhanced search capabilities</li>
<li>Index your selected repositories for enhanced search capabilities</li>
</ul>
</CardFooter>
</Card>
@ -237,27 +398,20 @@ export default function GithubConnectorPage() {
<div>
<h3 className="text-xl font-semibold mb-2">How it works</h3>
<p className="text-muted-foreground">
The GitHub connector uses a Personal Access Token (PAT) to authenticate with the GitHub API. It fetches information about repositories accessible to the token and indexes relevant files (code, markdown, text).
The GitHub connector uses a Personal Access Token (PAT) to authenticate with the GitHub API. First, it fetches a list of repositories accessible to the token. You then select which repositories you want to index. The connector indexes relevant files (code, markdown, text) from only the selected repositories.
</p>
<ul className="mt-2 list-disc pl-5 text-muted-foreground">
<li>The connector indexes files based on common code and documentation extensions.</li>
<li>Large files (over 1MB) are skipped during indexing.</li>
<li>Only selected repositories are indexed.</li>
<li>Indexing runs periodically (check connector settings for frequency) to keep content up-to-date.</li>
</ul>
</div>
<Accordion type="single" collapsible className="w-full">
<AccordionItem value="create_pat">
<AccordionTrigger className="text-lg font-medium">Step 1: Create a GitHub PAT</AccordionTrigger>
<AccordionContent className="space-y-4">
<Alert className="bg-muted">
<Info className="h-4 w-4" />
<AlertTitle>Token Security</AlertTitle>
<AlertDescription>
Treat your PAT like a password. Store it securely and consider using fine-grained tokens if possible.
</AlertDescription>
</Alert>
<AccordionTrigger className="text-lg font-medium">Step 1: Generate GitHub PAT</AccordionTrigger>
<AccordionContent>
<div className="space-y-6">
<div>
<h4 className="font-medium mb-2">Generating a Token:</h4>
@ -280,9 +434,13 @@ export default function GithubConnectorPage() {
<AccordionTrigger className="text-lg font-medium">Step 2: Connect in SurfSense</AccordionTrigger>
<AccordionContent className="space-y-4">
<ol className="list-decimal pl-5 space-y-3">
<li>Paste the copied GitHub PAT into the "GitHub Personal Access Token (PAT)" field on the "Connect GitHub" tab.</li>
<li>Optionally, give the connector a custom name.</li>
<li>Click the <strong>Connect GitHub</strong> button.</li>
<li>Navigate to the "Connect GitHub" tab.</li>
<li>Enter a name for your connector.</li>
<li>Paste the copied GitHub PAT into the "GitHub Personal Access Token (PAT)" field.</li>
<li>Click <strong>Fetch Repositories</strong>.</li>
<li>If the PAT is valid, you'll see a list of your accessible repositories.</li>
<li>Select the repositories you want SurfSense to index using the checkboxes.</li>
<li>Click the <strong>Create Connector</strong> button.</li>
<li>If the connection is successful, you will be redirected and can start indexing from the Connectors page.</li>
</ol>
</AccordionContent>