diff --git a/surfsense_backend/app/routes/search_source_connectors_routes.py b/surfsense_backend/app/routes/search_source_connectors_routes.py index 482a825..5bfe0a9 100644 --- a/surfsense_backend/app/routes/search_source_connectors_routes.py +++ b/surfsense_backend/app/routes/search_source_connectors_routes.py @@ -9,7 +9,7 @@ POST /search-source-connectors/{connector_id}/index - Index content from a conne Note: Each user can have only one connector of each type (SERPER_API, TAVILY_API, SLACK_CONNECTOR, NOTION_CONNECTOR). """ -from fastapi import APIRouter, Depends, HTTPException, Query, BackgroundTasks +from fastapi import APIRouter, Depends, HTTPException, Query, BackgroundTasks, Body from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.future import select from sqlalchemy.exc import IntegrityError @@ -18,8 +18,9 @@ from app.db import get_async_session, User, SearchSourceConnector, SearchSourceC from app.schemas import SearchSourceConnectorCreate, SearchSourceConnectorUpdate, SearchSourceConnectorRead from app.users import current_active_user from app.utils.check_ownership import check_ownership -from pydantic import ValidationError +from pydantic import ValidationError, BaseModel, Field from app.tasks.connectors_indexing_tasks import index_slack_messages, index_notion_pages, index_github_repos +from app.connectors.github_connector import GitHubConnector from datetime import datetime, timezone import logging @@ -28,6 +29,34 @@ logger = logging.getLogger(__name__) router = APIRouter() +# --- New Schema for GitHub PAT --- +class GitHubPATRequest(BaseModel): + github_pat: str = Field(..., description="GitHub Personal Access Token") + +# --- New Endpoint to list GitHub Repositories --- +@router.post("/github/repositories/", response_model=List[Dict[str, Any]]) +async def list_github_repositories( + pat_request: GitHubPATRequest, + user: User = Depends(current_active_user) # Ensure the user is logged in +): + """ + Fetches a list of repositories accessible by the provided GitHub PAT. + The PAT is used for this request only and is not stored. + """ + try: + # Initialize GitHubConnector with the provided PAT + github_client = GitHubConnector(token=pat_request.github_pat) + # Fetch repositories + repositories = github_client.get_user_repositories() + return repositories + except ValueError as e: + # Handle invalid token error specifically + logger.error(f"GitHub PAT validation failed for user {user.id}: {str(e)}") + raise HTTPException(status_code=400, detail=f"Invalid GitHub PAT: {str(e)}") + except Exception as e: + logger.error(f"Failed to fetch GitHub repositories for user {user.id}: {str(e)}") + raise HTTPException(status_code=500, detail="Failed to fetch GitHub repositories.") + @router.post("/search-source-connectors/", response_model=SearchSourceConnectorRead) async def create_search_source_connector( connector: SearchSourceConnectorCreate, diff --git a/surfsense_backend/app/schemas/search_source_connector.py b/surfsense_backend/app/schemas/search_source_connector.py index 41e1086..1005a63 100644 --- a/surfsense_backend/app/schemas/search_source_connector.py +++ b/surfsense_backend/app/schemas/search_source_connector.py @@ -4,7 +4,6 @@ from typing import Dict, Any from pydantic import BaseModel, field_validator from .base import IDModel, TimestampModel from app.db import SearchSourceConnectorType -from fastapi import HTTPException class SearchSourceConnectorBase(BaseModel): name: str @@ -59,14 +58,19 @@ class SearchSourceConnectorBase(BaseModel): raise ValueError("NOTION_INTEGRATION_TOKEN cannot be empty") elif connector_type == SearchSourceConnectorType.GITHUB_CONNECTOR: - # For GITHUB_CONNECTOR, only allow GITHUB_PAT - allowed_keys = ["GITHUB_PAT"] + # For GITHUB_CONNECTOR, only allow GITHUB_PAT and repo_full_names + allowed_keys = ["GITHUB_PAT", "repo_full_names"] if set(config.keys()) != set(allowed_keys): raise ValueError(f"For GITHUB_CONNECTOR connector type, config must only contain these keys: {allowed_keys}") # Ensure the token is not empty if not config.get("GITHUB_PAT"): raise ValueError("GITHUB_PAT cannot be empty") + + # Ensure the repo_full_names is present and is a non-empty list + repo_full_names = config.get("repo_full_names") + if not isinstance(repo_full_names, list) or not repo_full_names: + raise ValueError("repo_full_names must be a non-empty list of strings") return config diff --git a/surfsense_backend/app/tasks/connectors_indexing_tasks.py b/surfsense_backend/app/tasks/connectors_indexing_tasks.py index 670fa26..31a7d4d 100644 --- a/surfsense_backend/app/tasks/connectors_indexing_tasks.py +++ b/surfsense_backend/app/tasks/connectors_indexing_tasks.py @@ -1,4 +1,4 @@ -from typing import Optional, List, Dict, Any, Tuple +from typing import Optional, Tuple from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.future import select @@ -626,10 +626,15 @@ async def index_github_repos( if not connector: return 0, f"Connector with ID {connector_id} not found or is not a GitHub connector" - # 2. Get the GitHub PAT from the connector config + # 2. Get the GitHub PAT and selected repositories from the connector config github_pat = connector.config.get("GITHUB_PAT") + repo_full_names_to_index = connector.config.get("repo_full_names") + if not github_pat: return 0, "GitHub Personal Access Token (PAT) not found in connector config" + + if not repo_full_names_to_index or not isinstance(repo_full_names_to_index, list): + return 0, "'repo_full_names' not found or is not a list in connector config" # 3. Initialize GitHub connector client try: @@ -637,13 +642,10 @@ async def index_github_repos( except ValueError as e: return 0, f"Failed to initialize GitHub client: {str(e)}" - # 4. Get list of accessible repositories - repositories = github_client.get_user_repositories() - if not repositories: - logger.info("No accessible GitHub repositories found for the provided token.") - return 0, "No accessible GitHub repositories found." - - logger.info(f"Found {len(repositories)} repositories to potentially index.") + # 4. Validate selected repositories + # For simplicity, we'll proceed with the list provided. + # If a repo is inaccessible, get_repository_files will likely fail gracefully later. + logger.info(f"Starting indexing for {len(repo_full_names_to_index)} selected repositories.") # 5. Get existing documents for this search space and connector type to prevent duplicates existing_docs_result = await session.execute( @@ -658,11 +660,10 @@ async def index_github_repos( existing_docs_lookup = {doc.document_metadata.get("full_path"): doc for doc in existing_docs if doc.document_metadata.get("full_path")} logger.info(f"Found {len(existing_docs_lookup)} existing GitHub documents in database for search space {search_space_id}") - # 6. Iterate through repositories and index files - for repo_info in repositories: - repo_full_name = repo_info.get("full_name") - if not repo_full_name: - logger.warning(f"Skipping repository with missing full_name: {repo_info.get('name')}") + # 6. Iterate through selected repositories and index files + for repo_full_name in repo_full_names_to_index: + if not repo_full_name or not isinstance(repo_full_name, str): + logger.warning(f"Skipping invalid repository entry: {repo_full_name}") continue logger.info(f"Processing repository: {repo_full_name}") diff --git a/surfsense_web/app/dashboard/[search_space_id]/connectors/add/github-connector/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/github-connector/page.tsx index 45534d6..fc7a602 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/connectors/add/github-connector/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/github-connector/page.tsx @@ -7,7 +7,7 @@ import { zodResolver } from "@hookform/resolvers/zod"; import { useForm } from "react-hook-form"; import * as z from "zod"; import { toast } from "sonner"; -import { ArrowLeft, Check, Info, Loader2, Github } from "lucide-react"; +import { ArrowLeft, Check, Info, Loader2, Github, CircleAlert, ListChecks } from "lucide-react"; // Assuming useSearchSourceConnectors hook exists and works similarly import { useSearchSourceConnectors } from "@/hooks/useSearchSourceConnectors"; @@ -42,9 +42,10 @@ import { AccordionTrigger, } from "@/components/ui/accordion"; import { Tabs, TabsContent, TabsList, TabsTrigger } from "@/components/ui/tabs"; +import { Checkbox } from "@/components/ui/checkbox"; -// Define the form schema with Zod for GitHub -const githubConnectorFormSchema = z.object({ +// Define the form schema with Zod for GitHub PAT entry step +const githubPatFormSchema = z.object({ name: z.string().min(3, { message: "Connector name must be at least 3 characters.", }), @@ -58,61 +59,144 @@ const githubConnectorFormSchema = z.object({ }); // Define the type for the form values -type GithubConnectorFormValues = z.infer; +type GithubPatFormValues = z.infer; + +// Type for fetched GitHub repositories +interface GithubRepo { + id: number; + name: string; + full_name: string; + private: boolean; + url: string; + description: string | null; + last_updated: string | null; +} export default function GithubConnectorPage() { const router = useRouter(); const params = useParams(); const searchSpaceId = params.search_space_id as string; - const [isSubmitting, setIsSubmitting] = useState(false); - const { createConnector } = useSearchSourceConnectors(); // Assuming this hook exists + const [step, setStep] = useState<'enter_pat' | 'select_repos'>('enter_pat'); + const [isFetchingRepos, setIsFetchingRepos] = useState(false); + const [isCreatingConnector, setIsCreatingConnector] = useState(false); + const [repositories, setRepositories] = useState([]); + const [selectedRepos, setSelectedRepos] = useState([]); + const [connectorName, setConnectorName] = useState("GitHub Connector"); + const [validatedPat, setValidatedPat] = useState(""); // Store the validated PAT - // Initialize the form - const form = useForm({ - resolver: zodResolver(githubConnectorFormSchema), + const { createConnector } = useSearchSourceConnectors(); + + // Initialize the form for PAT entry + const form = useForm({ + resolver: zodResolver(githubPatFormSchema), defaultValues: { - name: "GitHub Connector", + name: connectorName, github_pat: "", }, }); - // Handle form submission - const onSubmit = async (values: GithubConnectorFormValues) => { - setIsSubmitting(true); + // Function to fetch repositories using the new backend endpoint + const fetchRepositories = async (values: GithubPatFormValues) => { + setIsFetchingRepos(true); + setConnectorName(values.name); // Store the name + setValidatedPat(values.github_pat); // Store the PAT temporarily + try { + const token = localStorage.getItem('surfsense_bearer_token'); + if (!token) { + throw new Error('No authentication token found'); + } + + const response = await fetch( + `${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/github/repositories/`, + { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${token}` + }, + body: JSON.stringify({ github_pat: values.github_pat }) + } + ); + + if (!response.ok) { + const errorData = await response.json(); + throw new Error(errorData.detail || `Failed to fetch repositories: ${response.statusText}`); + } + + const data: GithubRepo[] = await response.json(); + setRepositories(data); + setStep('select_repos'); // Move to the next step + toast.success(`Found ${data.length} repositories.`); + } catch (error) { + console.error("Error fetching GitHub repositories:", error); + const errorMessage = error instanceof Error ? error.message : "Failed to fetch repositories. Please check the PAT and try again."; + toast.error(errorMessage); + } finally { + setIsFetchingRepos(false); + } + }; + + // Handle final connector creation + const handleCreateConnector = async () => { + if (selectedRepos.length === 0) { + toast.warning("Please select at least one repository to index."); + return; + } + + setIsCreatingConnector(true); try { await createConnector({ - name: values.name, + name: connectorName, // Use the stored name connector_type: "GITHUB_CONNECTOR", config: { - GITHUB_PAT: values.github_pat, + GITHUB_PAT: validatedPat, // Use the stored validated PAT + repo_full_names: selectedRepos, // Add the selected repo names }, - is_indexable: true, // GitHub connector is indexable - last_indexed_at: null, // New connector hasn't been indexed + is_indexable: true, + last_indexed_at: null, }); toast.success("GitHub connector created successfully!"); - - // Navigate back to connectors management page (or the add page) router.push(`/dashboard/${searchSpaceId}/connectors`); - } catch (error) { // Added type check for error + } catch (error) { console.error("Error creating GitHub connector:", error); - // Display specific backend error message if available - const errorMessage = error instanceof Error ? error.message : "Failed to create GitHub connector. Please check the PAT and permissions."; + const errorMessage = error instanceof Error ? error.message : "Failed to create GitHub connector."; toast.error(errorMessage); } finally { - setIsSubmitting(false); + setIsCreatingConnector(false); } }; + // Handle checkbox changes + const handleRepoSelection = (repoFullName: string, checked: boolean) => { + setSelectedRepos(prev => + checked + ? [...prev, repoFullName] + : prev.filter(name => name !== repoFullName) + ); + }; + return (
- Connect GitHub Account + + {step === 'enter_pat' ? : } + {step === 'enter_pat' ? "Connect GitHub Account" : "Select Repositories to Index"} + - Integrate with GitHub using a Personal Access Token (PAT) to search and retrieve information from accessible repositories. This connector can index your code and documentation. + {step === 'enter_pat' + ? "Provide a name and GitHub Personal Access Token (PAT) to fetch accessible repositories." + : `Select which repositories you want SurfSense to index for search. Found ${repositories.length} repositories accessible via your PAT.` + } - - - - GitHub Personal Access Token (PAT) Required - - You'll need a GitHub PAT with the appropriate scopes (e.g., 'repo') to use this connector. You can create one from your - - GitHub Developer Settings - . - - -
- - ( - - Connector Name - - - - - A friendly name to identify this GitHub connection. - - - - )} - /> + + {step === 'enter_pat' && ( + + + + GitHub Personal Access Token (PAT) Required + + You'll need a GitHub PAT with the appropriate scopes (e.g., 'repo') to fetch repositories. You can create one from your{' '} + + GitHub Developer Settings + . The PAT will be used to fetch repositories and then stored securely to enable indexing. + + - ( - - GitHub Personal Access Token (PAT) - - - - - Your GitHub PAT will be encrypted and stored securely. Ensure it has the necessary 'repo' scopes. - - - - )} - /> - -
- -
- - -
+ /> + + ( + + GitHub Personal Access Token (PAT) + + + + + Enter your GitHub PAT here to fetch your repositories. It will be stored encrypted later. + + + + )} + /> + +
+ +
+ +
+ )} + + {step === 'select_repos' && ( + + {repositories.length === 0 ? ( + + + No Repositories Found + + No repositories were found or accessible with the provided PAT. Please check the token and its permissions, then go back and try again. + + + ) : ( +
+ Repositories ({selectedRepos.length} selected) +
+ {repositories.map((repo) => ( +
+ handleRepoSelection(repo.full_name, !!checked)} + /> + +
+ ))} +
+ + Select the repositories you wish to index. Only checked repositories will be processed. + + +
+ + +
+
+ )} +
+ )} + +

What you get with GitHub integration:

    -
  • Search through code and documentation in your repositories
  • +
  • Search through code and documentation in your selected repositories
  • Access READMEs, Markdown files, and common code files
  • Connect your project knowledge directly to your search space
  • -
  • Index your repositories for enhanced search capabilities
  • +
  • Index your selected repositories for enhanced search capabilities
@@ -237,27 +398,20 @@ export default function GithubConnectorPage() {

How it works

- The GitHub connector uses a Personal Access Token (PAT) to authenticate with the GitHub API. It fetches information about repositories accessible to the token and indexes relevant files (code, markdown, text). + The GitHub connector uses a Personal Access Token (PAT) to authenticate with the GitHub API. First, it fetches a list of repositories accessible to the token. You then select which repositories you want to index. The connector indexes relevant files (code, markdown, text) from only the selected repositories.

  • The connector indexes files based on common code and documentation extensions.
  • Large files (over 1MB) are skipped during indexing.
  • +
  • Only selected repositories are indexed.
  • Indexing runs periodically (check connector settings for frequency) to keep content up-to-date.
- Step 1: Create a GitHub PAT - - - - Token Security - - Treat your PAT like a password. Store it securely and consider using fine-grained tokens if possible. - - - + Step 1: Generate GitHub PAT +

Generating a Token:

@@ -280,9 +434,13 @@ export default function GithubConnectorPage() { Step 2: Connect in SurfSense
    -
  1. Paste the copied GitHub PAT into the "GitHub Personal Access Token (PAT)" field on the "Connect GitHub" tab.
  2. -
  3. Optionally, give the connector a custom name.
  4. -
  5. Click the Connect GitHub button.
  6. +
  7. Navigate to the "Connect GitHub" tab.
  8. +
  9. Enter a name for your connector.
  10. +
  11. Paste the copied GitHub PAT into the "GitHub Personal Access Token (PAT)" field.
  12. +
  13. Click Fetch Repositories.
  14. +
  15. If the PAT is valid, you'll see a list of your accessible repositories.
  16. +
  17. Select the repositories you want SurfSense to index using the checkboxes.
  18. +
  19. Click the Create Connector button.
  20. If the connection is successful, you will be redirected and can start indexing from the Connectors page.