From bb198e38c015c6f7e0face338158627bc1b10f62 Mon Sep 17 00:00:00 2001
From: Adamsmith6300 <adamsmith6300@gmail.com>
Date: Sun, 13 Apr 2025 13:56:22 -0700
Subject: [PATCH 1/4] add github connector, add alembic for db migrations, fix
 bug updating connectors

---
 surfsense_backend/.gitignore                  |   3 +-
 surfsense_backend/alembic.ini                 | 119 +++++++
 surfsense_backend/alembic/README              |   1 +
 surfsense_backend/alembic/env.py              |  98 ++++++
 surfsense_backend/alembic/script.py.mako      |  28 ++
 .../versions/1_add_github_connector_enum.py   |  53 ++++
 .../app/connectors/github_connector.py        | 182 +++++++++++
 surfsense_backend/app/db.py                   |   2 +
 .../routes/search_source_connectors_routes.py | 146 +++++----
 .../app/schemas/search_source_connector.py    |  12 +-
 .../app/tasks/connectors_indexing_tasks.py    | 195 +++++++++++-
 surfsense_backend/main.py                     |   7 +
 surfsense_backend/pyproject.toml              |   2 +
 surfsense_backend/uv.lock                     |  54 ++++
 .../connectors/(manage)/page.tsx              |   3 +-
 .../connectors/[connector_id]/page.tsx        |  24 +-
 .../connectors/add/github-connector/page.tsx  | 298 ++++++++++++++++++
 .../[search_space_id]/connectors/add/page.tsx | 189 +++++------
 18 files changed, 1232 insertions(+), 184 deletions(-)
 create mode 100644 surfsense_backend/alembic.ini
 create mode 100644 surfsense_backend/alembic/README
 create mode 100644 surfsense_backend/alembic/env.py
 create mode 100644 surfsense_backend/alembic/script.py.mako
 create mode 100644 surfsense_backend/alembic/versions/1_add_github_connector_enum.py
 create mode 100644 surfsense_backend/app/connectors/github_connector.py
 create mode 100644 surfsense_web/app/dashboard/[search_space_id]/connectors/add/github-connector/page.tsx

diff --git a/surfsense_backend/.gitignore b/surfsense_backend/.gitignore
index a663d9d..ee59e47 100644
--- a/surfsense_backend/.gitignore
+++ b/surfsense_backend/.gitignore
@@ -3,4 +3,5 @@
 venv/
 data/
 __pycache__/
-.flashrank_cache
\ No newline at end of file
+.flashrank_cache
+surf_new_backend.egg-info/
diff --git a/surfsense_backend/alembic.ini b/surfsense_backend/alembic.ini
new file mode 100644
index 0000000..9b2a76f
--- /dev/null
+++ b/surfsense_backend/alembic.ini
@@ -0,0 +1,119 @@
+# A generic, single database configuration.
+
+[alembic]
+# path to migration scripts.
+# Use forward slashes (/) also on windows to provide an os agnostic path
+script_location = alembic
+
+# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
+# Uncomment the line below if you want the files to be prepended with date and time
+# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
+
+# sys.path path, will be prepended to sys.path if present.
+# defaults to the current working directory.
+prepend_sys_path = .
+
+# timezone to use when rendering the date within the migration file
+# as well as the filename.
+# If specified, requires the python>=3.9 or backports.zoneinfo library and tzdata library.
+# Any required deps can installed by adding `alembic[tz]` to the pip requirements
+# string value is passed to ZoneInfo()
+# leave blank for localtime
+# timezone =
+
+# max length of characters to apply to the "slug" field
+# truncate_slug_length = 40
+
+# set to 'true' to run the environment during
+# the 'revision' command, regardless of autogenerate
+# revision_environment = false
+
+# set to 'true' to allow .pyc and .pyo files without
+# a source .py file to be detected as revisions in the
+# versions/ directory
+# sourceless = false
+
+# version location specification; This defaults
+# to alembic/versions.  When using multiple version
+# directories, initial revisions must be specified with --version-path.
+# The path separator used here should be the separator specified by "version_path_separator" below.
+# version_locations = %(here)s/bar:%(here)s/bat:alembic/versions
+
+# version path separator; As mentioned above, this is the character used to split
+# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep.
+# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas.
+# Valid values for version_path_separator are:
+#
+# version_path_separator = :
+# version_path_separator = ;
+# version_path_separator = space
+# version_path_separator = newline
+#
+# Use os.pathsep. Default configuration used for new projects.
+version_path_separator = os
+
+# set to 'true' to search source files recursively
+# in each "version_locations" directory
+# new in Alembic version 1.10
+# recursive_version_locations = false
+
+# the output encoding used when revision files
+# are written from script.py.mako
+# output_encoding = utf-8
+
+# The SQLAlchemy URL to connect to
+# IMPORTANT: Replace this with your actual async database URL
+sqlalchemy.url = postgresql+asyncpg://postgres:postgres@localhost:5432/surfsense
+
+
+[post_write_hooks]
+# post_write_hooks defines scripts or Python functions that are run
+# on newly generated revision scripts.  See the documentation for further
+# detail and examples
+
+# format using "black" - use the console_scripts runner, against the "black" entrypoint
+# hooks = black
+# black.type = console_scripts
+# black.entrypoint = black
+# black.options = -l 79 REVISION_SCRIPT_FILENAME
+
+# lint with attempts to fix using "ruff" - use the exec runner, execute a binary
+# hooks = ruff
+# ruff.type = exec
+# ruff.executable = %(here)s/.venv/bin/ruff
+# ruff.options = check --fix REVISION_SCRIPT_FILENAME
+
+# Logging configuration
+[loggers]
+keys = root,sqlalchemy,alembic
+
+[handlers]
+keys = console
+
+[formatters]
+keys = generic
+
+[logger_root]
+level = WARNING
+handlers = console
+qualname =
+
+[logger_sqlalchemy]
+level = WARNING
+handlers =
+qualname = sqlalchemy.engine
+
+[logger_alembic]
+level = INFO
+handlers =
+qualname = alembic
+
+[handler_console]
+class = StreamHandler
+args = (sys.stderr,)
+level = NOTSET
+formatter = generic
+
+[formatter_generic]
+format = %(levelname)-5.5s [%(name)s] %(message)s
+datefmt = %H:%M:%S
diff --git a/surfsense_backend/alembic/README b/surfsense_backend/alembic/README
new file mode 100644
index 0000000..e0d0858
--- /dev/null
+++ b/surfsense_backend/alembic/README
@@ -0,0 +1 @@
+Generic single-database configuration with an async dbapi.
\ No newline at end of file
diff --git a/surfsense_backend/alembic/env.py b/surfsense_backend/alembic/env.py
new file mode 100644
index 0000000..d6e7104
--- /dev/null
+++ b/surfsense_backend/alembic/env.py
@@ -0,0 +1,98 @@
+import asyncio
+from logging.config import fileConfig
+
+import os
+import sys
+from sqlalchemy import pool
+from sqlalchemy.engine import Connection
+from sqlalchemy.ext.asyncio import async_engine_from_config
+
+from alembic import context
+
+# Ensure the app directory is in the Python path
+# This allows Alembic to find your models
+sys.path.insert(0, os.path.realpath(os.path.join(os.path.dirname(__file__), '..')))
+
+# Import your models base
+from app.db import Base # Assuming your Base is defined in app.db
+
+# this is the Alembic Config object, which provides
+# access to the values within the .ini file in use.
+config = context.config
+
+# Interpret the config file for Python logging.
+# This line sets up loggers basically.
+if config.config_file_name is not None:
+    fileConfig(config.config_file_name)
+
+# add your model's MetaData object here
+# for 'autogenerate' support
+# from myapp import mymodel
+# target_metadata = mymodel.Base.metadata
+target_metadata = Base.metadata
+
+# other values from the config, defined by the needs of env.py,
+# can be acquired:
+# my_important_option = config.get_main_option("my_important_option")
+# ... etc.
+
+
+def run_migrations_offline() -> None:
+    """Run migrations in 'offline' mode.
+
+    This configures the context with just a URL
+    and not an Engine, though an Engine is acceptable
+    here as well.  By skipping the Engine creation
+    we don't even need a DBAPI to be available.
+
+    Calls to context.execute() here emit the given string to the
+    script output.
+
+    """
+    url = config.get_main_option("sqlalchemy.url")
+    context.configure(
+        url=url,
+        target_metadata=target_metadata,
+        literal_binds=True,
+        dialect_opts={"paramstyle": "named"},
+    )
+
+    with context.begin_transaction():
+        context.run_migrations()
+
+
+def do_run_migrations(connection: Connection) -> None:
+    context.configure(connection=connection, target_metadata=target_metadata)
+
+    with context.begin_transaction():
+        context.run_migrations()
+
+
+async def run_async_migrations() -> None:
+    """In this scenario we need to create an Engine
+    and associate a connection with the context.
+
+    """
+
+    connectable = async_engine_from_config(
+        config.get_section(config.config_ini_section, {}),
+        prefix="sqlalchemy.",
+        poolclass=pool.NullPool,
+    )
+
+    async with connectable.connect() as connection:
+        await connection.run_sync(do_run_migrations)
+
+    await connectable.dispose()
+
+
+def run_migrations_online() -> None:
+    """Run migrations in 'online' mode."""
+
+    asyncio.run(run_async_migrations())
+
+
+if context.is_offline_mode():
+    run_migrations_offline()
+else:
+    run_migrations_online()
diff --git a/surfsense_backend/alembic/script.py.mako b/surfsense_backend/alembic/script.py.mako
new file mode 100644
index 0000000..480b130
--- /dev/null
+++ b/surfsense_backend/alembic/script.py.mako
@@ -0,0 +1,28 @@
+"""${message}
+
+Revision ID: ${up_revision}
+Revises: ${down_revision | comma,n}
+Create Date: ${create_date}
+
+"""
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+${imports if imports else ""}
+
+# revision identifiers, used by Alembic.
+revision: str = ${repr(up_revision)}
+down_revision: Union[str, None] = ${repr(down_revision)}
+branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
+depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
+
+
+def upgrade() -> None:
+    """Upgrade schema."""
+    ${upgrades if upgrades else "pass"}
+
+
+def downgrade() -> None:
+    """Downgrade schema."""
+    ${downgrades if downgrades else "pass"}
diff --git a/surfsense_backend/alembic/versions/1_add_github_connector_enum.py b/surfsense_backend/alembic/versions/1_add_github_connector_enum.py
new file mode 100644
index 0000000..bb72838
--- /dev/null
+++ b/surfsense_backend/alembic/versions/1_add_github_connector_enum.py
@@ -0,0 +1,53 @@
+"""Add GITHUB_CONNECTOR to SearchSourceConnectorType enum
+
+Revision ID: 1
+Revises: 
+Create Date: 2023-10-27 10:00:00.000000 
+
+"""
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+# Import pgvector if needed for other types, though not for this ENUM change
+# import pgvector
+
+
+# revision identifiers, used by Alembic.
+revision: str = '1'
+down_revision: Union[str, None] = None
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    
+    # Manually add the command to add the enum value
+    # Note: It's generally better to let autogenerate handle this, but we're bypassing it
+    op.execute("ALTER TYPE searchsourceconnectortype ADD VALUE 'GITHUB_CONNECTOR'")
+    
+    # Pass for the rest, as autogenerate didn't run to add other schema details
+    pass
+    # ### end Alembic commands ###
+
+
+def downgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    
+    # Downgrading removal of an enum value is complex and potentially dangerous
+    # if the value is in use. Often omitted or requires manual SQL based on context.
+    # For now, we'll just pass. If you needed to reverse this, you'd likely 
+    # have to manually check if 'GITHUB_CONNECTOR' is used in the table
+    # and then potentially recreate the type without it.
+    op.execute("ALTER TYPE searchsourceconnectortype RENAME TO searchsourceconnectortype_old")
+    op.execute("CREATE TYPE searchsourceconnectortype AS ENUM('SERPER_API', 'TAVILY_API', 'SLACK_CONNECTOR', 'NOTION_CONNECTOR')")
+    op.execute((
+        "ALTER TABLE search_source_connectors ALTER COLUMN connector_type TYPE searchsourceconnectortype USING "
+        "connector_type::text::searchsourceconnectortype"
+    ))
+    op.execute("DROP TYPE searchsourceconnectortype_old")
+
+    
+    pass
+    # ### end Alembic commands ### 
diff --git a/surfsense_backend/app/connectors/github_connector.py b/surfsense_backend/app/connectors/github_connector.py
new file mode 100644
index 0000000..d827dac
--- /dev/null
+++ b/surfsense_backend/app/connectors/github_connector.py
@@ -0,0 +1,182 @@
+import base64
+import logging
+from typing import List, Optional, Dict, Any, Tuple
+from github3 import login as github_login, exceptions as github_exceptions
+from github3.repos.repo import Repository
+from github3.repos.contents import Contents
+from github3.exceptions import ForbiddenError, NotFoundError
+
+logger = logging.getLogger(__name__)
+
+# List of common code file extensions to target
+CODE_EXTENSIONS = {
+    '.py', '.js', '.jsx', '.ts', '.tsx', '.java', '.c', '.cpp', '.h', '.hpp',
+    '.cs', '.go', '.rb', '.php', '.swift', '.kt', '.scala', '.rs', '.m',
+    '.sh', '.bash', '.ps1', '.lua', '.pl', '.pm', '.r', '.dart', '.sql'
+}
+
+# List of common documentation/text file extensions
+DOC_EXTENSIONS = {
+    '.md', '.txt', '.rst', '.adoc', '.html', '.htm', '.xml', '.json', '.yaml', '.yml', '.toml'
+}
+
+# Maximum file size in bytes (e.g., 1MB)
+MAX_FILE_SIZE = 1 * 1024 * 1024
+
+class GitHubConnector:
+    """Connector for interacting with the GitHub API."""
+
+    def __init__(self, token: str):
+        """
+        Initializes the GitHub connector.
+
+        Args:
+            token: GitHub Personal Access Token (PAT).
+        """
+        if not token:
+            raise ValueError("GitHub token cannot be empty.")
+        try:
+            self.gh = github_login(token=token)
+            # Try a simple authenticated call to check token validity
+            self.gh.me()
+            logger.info("Successfully authenticated with GitHub API.")
+        except (github_exceptions.AuthenticationFailed, ForbiddenError) as e:
+            logger.error(f"GitHub authentication failed: {e}")
+            raise ValueError("Invalid GitHub token or insufficient permissions.")
+        except Exception as e:
+            logger.error(f"Failed to initialize GitHub client: {e}")
+            raise
+
+    def get_user_repositories(self) -> List[Dict[str, Any]]:
+        """Fetches repositories accessible by the authenticated user."""
+        repos_data = []
+        try:
+            # type='owner' fetches repos owned by the user
+            # type='member' fetches repos the user is a collaborator on (including orgs)
+            # type='all' fetches both
+            for repo in self.gh.repositories(type='all', sort='updated'):
+                if isinstance(repo, Repository):
+                    repos_data.append({
+                        "id": repo.id,
+                        "name": repo.name,
+                        "full_name": repo.full_name,
+                        "private": repo.private,
+                        "url": repo.html_url,
+                        "description": repo.description or "",
+                        "last_updated": repo.updated_at.isoformat() if repo.updated_at else None,
+                    })
+            logger.info(f"Fetched {len(repos_data)} repositories.")
+            return repos_data
+        except Exception as e:
+            logger.error(f"Failed to fetch GitHub repositories: {e}")
+            return [] # Return empty list on error
+
+    def get_repository_files(self, repo_full_name: str, path: str = '') -> List[Dict[str, Any]]:
+        """
+        Recursively fetches details of relevant files (code, docs) within a repository path.
+
+        Args:
+            repo_full_name: The full name of the repository (e.g., 'owner/repo').
+            path: The starting path within the repository (default is root).
+
+        Returns:
+            A list of dictionaries, each containing file details (path, sha, url, size).
+            Returns an empty list if the repository or path is not found or on error.
+        """
+        files_list = []
+        try:
+            owner, repo_name = repo_full_name.split('/')
+            repo = self.gh.repository(owner, repo_name)
+            if not repo:
+                logger.warning(f"Repository '{repo_full_name}' not found.")
+                return []
+                
+            contents = repo.directory_contents(path=path) # Use directory_contents for clarity
+            
+            # contents returns a list of tuples (name, content_obj)
+            for item_name, content_item in contents:
+                if not isinstance(content_item, Contents):
+                    continue
+
+                if content_item.type == 'dir':
+                    # Recursively fetch contents of subdirectory
+                    files_list.extend(self.get_repository_files(repo_full_name, path=content_item.path))
+                elif content_item.type == 'file':
+                    # Check if the file extension is relevant and size is within limits
+                    file_extension = '.' + content_item.name.split('.')[-1].lower() if '.' in content_item.name else ''
+                    is_code = file_extension in CODE_EXTENSIONS
+                    is_doc = file_extension in DOC_EXTENSIONS
+                    
+                    if (is_code or is_doc) and content_item.size <= MAX_FILE_SIZE:
+                        files_list.append({
+                            "path": content_item.path,
+                            "sha": content_item.sha,
+                            "url": content_item.html_url,
+                            "size": content_item.size,
+                            "type": "code" if is_code else "doc"
+                        })
+                    elif content_item.size > MAX_FILE_SIZE:
+                         logger.debug(f"Skipping large file: {content_item.path} ({content_item.size} bytes)")
+                    else:
+                         logger.debug(f"Skipping irrelevant file type: {content_item.path}")
+
+        except (NotFoundError, ForbiddenError) as e:
+             logger.warning(f"Cannot access path '{path}' in '{repo_full_name}': {e}")
+        except Exception as e:
+            logger.error(f"Failed to get files for {repo_full_name} at path '{path}': {e}")
+            # Return what we have collected so far in case of partial failure
+        
+        return files_list
+
+    def get_file_content(self, repo_full_name: str, file_path: str) -> Optional[str]:
+        """
+        Fetches the decoded content of a specific file.
+
+        Args:
+            repo_full_name: The full name of the repository (e.g., 'owner/repo').
+            file_path: The path to the file within the repository.
+
+        Returns:
+            The decoded file content as a string, or None if fetching fails or file is too large.
+        """
+        try:
+            owner, repo_name = repo_full_name.split('/')
+            repo = self.gh.repository(owner, repo_name)
+            if not repo:
+                logger.warning(f"Repository '{repo_full_name}' not found when fetching file '{file_path}'.")
+                return None
+                
+            content_item = repo.file_contents(path=file_path) # Use file_contents for clarity
+
+            if not content_item or not isinstance(content_item, Contents) or content_item.type != 'file':
+                logger.warning(f"File '{file_path}' not found or is not a file in '{repo_full_name}'.")
+                return None
+            
+            if content_item.size > MAX_FILE_SIZE:
+                logger.warning(f"File '{file_path}' in '{repo_full_name}' exceeds max size ({content_item.size} > {MAX_FILE_SIZE}). Skipping content fetch.")
+                return None
+
+            # Content is base64 encoded
+            if content_item.content:
+                try:
+                    decoded_content = base64.b64decode(content_item.content).decode('utf-8')
+                    return decoded_content
+                except UnicodeDecodeError:
+                    logger.warning(f"Could not decode file '{file_path}' in '{repo_full_name}' as UTF-8. Trying with 'latin-1'.")
+                    try:
+                        # Try a fallback encoding
+                        decoded_content = base64.b64decode(content_item.content).decode('latin-1')
+                        return decoded_content
+                    except Exception as decode_err:
+                        logger.error(f"Failed to decode file '{file_path}' with fallback encoding: {decode_err}")
+                        return None # Give up if fallback fails
+            else:
+                logger.warning(f"No content returned for file '{file_path}' in '{repo_full_name}'. It might be empty.")
+                return "" # Return empty string for empty files
+
+        except (NotFoundError, ForbiddenError) as e:
+             logger.warning(f"Cannot access file '{file_path}' in '{repo_full_name}': {e}")
+             return None
+        except Exception as e:
+            logger.error(f"Failed to get content for file '{file_path}' in '{repo_full_name}': {e}")
+            return None 
diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py
index b0fb2f0..25b7bfb 100644
--- a/surfsense_backend/app/db.py
+++ b/surfsense_backend/app/db.py
@@ -40,12 +40,14 @@ class DocumentType(str, Enum):
     SLACK_CONNECTOR = "SLACK_CONNECTOR"
     NOTION_CONNECTOR = "NOTION_CONNECTOR"
     YOUTUBE_VIDEO = "YOUTUBE_VIDEO"
+    GITHUB_CONNECTOR = "GITHUB_CONNECTOR"
 
 class SearchSourceConnectorType(str, Enum):
     SERPER_API = "SERPER_API"
     TAVILY_API = "TAVILY_API"
     SLACK_CONNECTOR = "SLACK_CONNECTOR"
     NOTION_CONNECTOR = "NOTION_CONNECTOR"
+    GITHUB_CONNECTOR = "GITHUB_CONNECTOR"
     
 class ChatType(str, Enum):
     GENERAL = "GENERAL"
diff --git a/surfsense_backend/app/routes/search_source_connectors_routes.py b/surfsense_backend/app/routes/search_source_connectors_routes.py
index 4025f2d..482a825 100644
--- a/surfsense_backend/app/routes/search_source_connectors_routes.py
+++ b/surfsense_backend/app/routes/search_source_connectors_routes.py
@@ -14,13 +14,13 @@ from sqlalchemy.ext.asyncio import AsyncSession
 from sqlalchemy.future import select
 from sqlalchemy.exc import IntegrityError
 from typing import List, Dict, Any
-from app.db import get_async_session, User, SearchSourceConnector, SearchSourceConnectorType, SearchSpace
+from app.db import get_async_session, User, SearchSourceConnector, SearchSourceConnectorType, SearchSpace, async_session_maker
 from app.schemas import SearchSourceConnectorCreate, SearchSourceConnectorUpdate, SearchSourceConnectorRead
 from app.users import current_active_user
 from app.utils.check_ownership import check_ownership
 from pydantic import ValidationError
-from app.tasks.connectors_indexing_tasks import index_slack_messages, index_notion_pages
-from datetime import datetime
+from app.tasks.connectors_indexing_tasks import index_slack_messages, index_notion_pages, index_github_repos
+from datetime import datetime, timezone
 import logging
 
 # Set up logging
@@ -50,13 +50,11 @@ async def create_search_source_connector(
             )
         )
         existing_connector = result.scalars().first()
-        
         if existing_connector:
             raise HTTPException(
                 status_code=409,
                 detail=f"A connector with type {connector.connector_type} already exists. Each user can have only one connector of each type."
             )
-            
         db_connector = SearchSourceConnector(**connector.model_dump(), user_id=user.id)
         session.add(db_connector)
         await session.commit()
@@ -239,10 +237,15 @@ async def index_connector_content(
         search_space = await check_ownership(session, SearchSpace, search_space_id, user)
         
         # Handle different connector types
+        response_message = ""
+        indexing_from = None
+        indexing_to = None
+        today_str = datetime.now().strftime("%Y-%m-%d")
+
         if connector.connector_type == SearchSourceConnectorType.SLACK_CONNECTOR:
             # Determine the time range that will be indexed
             if not connector.last_indexed_at:
-                start_date = "365 days ago"
+                start_date = "365 days ago" # Or perhaps set a specific date if needed
             else:
                 # Check if last_indexed_at is today
                 today = datetime.now().date()
@@ -252,33 +255,18 @@ async def index_connector_content(
                 else:
                     start_date = connector.last_indexed_at.strftime("%Y-%m-%d")
             
-            # Add the indexing task to background tasks
-            if background_tasks:
-                background_tasks.add_task(
-                    run_slack_indexing_with_new_session,
-                    connector_id,
-                    search_space_id
-                )
-                
-                return {
-                    "success": True,
-                    "message": "Slack indexing started in the background",
-                    "connector_type": connector.connector_type,
-                    "search_space": search_space.name,
-                    "indexing_from": start_date,
-                    "indexing_to": datetime.now().strftime("%Y-%m-%d")
-                }
-            else:
-                # For testing or if background tasks are not available
-                return {
-                    "success": False,
-                    "message": "Background tasks not available",
-                    "connector_type": connector.connector_type
-                }
+            indexing_from = start_date
+            indexing_to = today_str
+            
+            # Run indexing in background
+            logger.info(f"Triggering Slack indexing for connector {connector_id} into search space {search_space_id}")
+            background_tasks.add_task(run_slack_indexing_with_new_session, connector_id, search_space_id)
+            response_message = "Slack indexing started in the background."
+
         elif connector.connector_type == SearchSourceConnectorType.NOTION_CONNECTOR:
             # Determine the time range that will be indexed
             if not connector.last_indexed_at:
-                start_date = "365 days ago"
+                start_date = "365 days ago" # Or perhaps set a specific date
             else:
                 # Check if last_indexed_at is today
                 today = datetime.now().date()
@@ -288,44 +276,46 @@ async def index_connector_content(
                 else:
                     start_date = connector.last_indexed_at.strftime("%Y-%m-%d")
             
-            # Add the indexing task to background tasks
-            if background_tasks:
-                background_tasks.add_task(
-                    run_notion_indexing_with_new_session,
-                    connector_id,
-                    search_space_id
-                )
-                
-                return {
-                    "success": True,
-                    "message": "Notion indexing started in the background",
-                    "connector_type": connector.connector_type,
-                    "search_space": search_space.name,
-                    "indexing_from": start_date,
-                    "indexing_to": datetime.now().strftime("%Y-%m-%d")
-                }
-            else:
-                # For testing or if background tasks are not available
-                return {
-                    "success": False,
-                    "message": "Background tasks not available",
-                    "connector_type": connector.connector_type
-                }
+            indexing_from = start_date
+            indexing_to = today_str
+
+            # Run indexing in background
+            logger.info(f"Triggering Notion indexing for connector {connector_id} into search space {search_space_id}")
+            background_tasks.add_task(run_notion_indexing_with_new_session, connector_id, search_space_id)
+            response_message = "Notion indexing started in the background."
+            
+        elif connector.connector_type == SearchSourceConnectorType.GITHUB_CONNECTOR:
+            # GitHub connector likely indexes everything relevant, or uses internal logic
+            # Setting indexing_from to None and indexing_to to today
+            indexing_from = None 
+            indexing_to = today_str
+
+            # Run indexing in background
+            logger.info(f"Triggering GitHub indexing for connector {connector_id} into search space {search_space_id}")
+            background_tasks.add_task(run_github_indexing_with_new_session, connector_id, search_space_id)
+            response_message = "GitHub indexing started in the background."
+
         else:
             raise HTTPException(
                 status_code=400,
                 detail=f"Indexing not supported for connector type: {connector.connector_type}"
             )
-    
+
+        return {
+            "message": response_message, 
+            "connector_id": connector_id, 
+            "search_space_id": search_space_id,
+            "indexing_from": indexing_from,
+            "indexing_to": indexing_to
+        }
     except HTTPException:
         raise
     except Exception as e:
-        logger.error(f"Failed to start indexing: {str(e)}")
+        logger.error(f"Failed to initiate indexing for connector {connector_id}: {e}", exc_info=True)
         raise HTTPException(
             status_code=500,
-            detail=f"Failed to start indexing: {str(e)}"
-        ) 
-        
+            detail=f"Failed to initiate indexing: {str(e)}"
+        )
         
 async def update_connector_last_indexed(
     session: AsyncSession,
@@ -361,8 +351,6 @@ async def run_slack_indexing_with_new_session(
     Create a new session and run the Slack indexing task.
     This prevents session leaks by creating a dedicated session for the background task.
     """
-    from app.db import async_session_maker
-    
     async with async_session_maker() as session:
         await run_slack_indexing(session, connector_id, search_space_id)
 
@@ -405,8 +393,6 @@ async def run_notion_indexing_with_new_session(
     Create a new session and run the Notion indexing task.
     This prevents session leaks by creating a dedicated session for the background task.
     """
-    from app.db import async_session_maker
-    
     async with async_session_maker() as session:
         await run_notion_indexing(session, connector_id, search_space_id)
 
@@ -439,4 +425,38 @@ async def run_notion_indexing(
         else:
             logger.error(f"Notion indexing failed or no documents processed: {error_or_warning}")
     except Exception as e:
-        logger.error(f"Error in background Notion indexing task: {str(e)}")
\ No newline at end of file
+        logger.error(f"Error in background Notion indexing task: {str(e)}")
+
+# Add new helper functions for GitHub indexing
+async def run_github_indexing_with_new_session(
+    connector_id: int,
+    search_space_id: int
+):
+    """Wrapper to run GitHub indexing with its own database session."""
+    logger.info(f"Background task started: Indexing GitHub connector {connector_id} into space {search_space_id}")
+    async with async_session_maker() as session:
+        await run_github_indexing(session, connector_id, search_space_id)
+    logger.info(f"Background task finished: Indexing GitHub connector {connector_id}")
+
+async def run_github_indexing(
+    session: AsyncSession,
+    connector_id: int,
+    search_space_id: int
+):
+    """Runs the GitHub indexing task and updates the timestamp."""
+    try:
+        indexed_count, error_message = await index_github_repos(
+            session, connector_id, search_space_id, update_last_indexed=False
+        )
+        if error_message:
+            logger.error(f"GitHub indexing failed for connector {connector_id}: {error_message}")
+            # Optionally update status in DB to indicate failure
+        else:
+            logger.info(f"GitHub indexing successful for connector {connector_id}. Indexed {indexed_count} documents.")
+            # Update the last indexed timestamp only on success
+            await update_connector_last_indexed(session, connector_id)
+            await session.commit() # Commit timestamp update
+    except Exception as e:
+        await session.rollback()
+        logger.error(f"Critical error in run_github_indexing for connector {connector_id}: {e}", exc_info=True)
+        # Optionally update status in DB to indicate failure
diff --git a/surfsense_backend/app/schemas/search_source_connector.py b/surfsense_backend/app/schemas/search_source_connector.py
index f86f45d..5386658 100644
--- a/surfsense_backend/app/schemas/search_source_connector.py
+++ b/surfsense_backend/app/schemas/search_source_connector.py
@@ -57,6 +57,16 @@ class SearchSourceConnectorBase(BaseModel):
             # Ensure the integration token is not empty
             if not config.get("NOTION_INTEGRATION_TOKEN"):
                 raise ValueError("NOTION_INTEGRATION_TOKEN cannot be empty")
+        
+        elif connector_type == SearchSourceConnectorType.GITHUB_CONNECTOR:
+            # For GITHUB_CONNECTOR, only allow GITHUB_TOKEN
+            allowed_keys = ["GITHUB_PAT"]
+            if set(config.keys()) != set(allowed_keys):
+                raise ValueError(f"For GITHUB_CONNECTOR connector type, config must only contain these keys: {allowed_keys}")
+        
+            # Ensure the token is not empty
+            if not config.get("GITHUB_PAT"):
+                raise ValueError("GITHUB_TOKEN cannot be empty")
 
         return config
 
@@ -70,4 +80,4 @@ class SearchSourceConnectorRead(SearchSourceConnectorBase, IDModel, TimestampMod
     user_id: uuid.UUID
 
     class Config:
-        from_attributes = True 
\ No newline at end of file
+        from_attributes = True 
diff --git a/surfsense_backend/app/tasks/connectors_indexing_tasks.py b/surfsense_backend/app/tasks/connectors_indexing_tasks.py
index 580a5c7..670fa26 100644
--- a/surfsense_backend/app/tasks/connectors_indexing_tasks.py
+++ b/surfsense_backend/app/tasks/connectors_indexing_tasks.py
@@ -3,12 +3,13 @@ from sqlalchemy.ext.asyncio import AsyncSession
 from sqlalchemy.exc import SQLAlchemyError
 from sqlalchemy.future import select
 from sqlalchemy import delete
-from datetime import datetime, timedelta
+from datetime import datetime, timedelta, timezone
 from app.db import Document, DocumentType, Chunk, SearchSourceConnector, SearchSourceConnectorType
 from app.config import config
 from app.prompts import SUMMARY_PROMPT_TEMPLATE
 from app.connectors.slack_history import SlackHistory
 from app.connectors.notion_history import NotionHistoryConnector
+from app.connectors.github_connector import GitHubConnector
 from slack_sdk.errors import SlackApiError
 import logging
 
@@ -589,3 +590,195 @@ async def index_notion_pages(
         await session.rollback()
         logger.error(f"Failed to index Notion pages: {str(e)}", exc_info=True)
         return 0, f"Failed to index Notion pages: {str(e)}"
+
+async def index_github_repos(
+    session: AsyncSession,
+    connector_id: int,
+    search_space_id: int,
+    update_last_indexed: bool = True
+) -> Tuple[int, Optional[str]]:
+    """
+    Index code and documentation files from accessible GitHub repositories.
+
+    Args:
+        session: Database session
+        connector_id: ID of the GitHub connector
+        search_space_id: ID of the search space to store documents in
+        update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
+
+    Returns:
+        Tuple containing (number of documents indexed, error message or None)
+    """
+    documents_processed = 0
+    errors = []
+
+    try:
+        # 1. Get the GitHub connector from the database
+        result = await session.execute(
+            select(SearchSourceConnector)
+            .filter(
+                SearchSourceConnector.id == connector_id,
+                SearchSourceConnector.connector_type == SearchSourceConnectorType.GITHUB_CONNECTOR
+            )
+        )
+        connector = result.scalars().first()
+
+        if not connector:
+            return 0, f"Connector with ID {connector_id} not found or is not a GitHub connector"
+
+        # 2. Get the GitHub PAT from the connector config
+        github_pat = connector.config.get("GITHUB_PAT")
+        if not github_pat:
+            return 0, "GitHub Personal Access Token (PAT) not found in connector config"
+
+        # 3. Initialize GitHub connector client
+        try:
+            github_client = GitHubConnector(token=github_pat)
+        except ValueError as e:
+            return 0, f"Failed to initialize GitHub client: {str(e)}"
+
+        # 4. Get list of accessible repositories
+        repositories = github_client.get_user_repositories()
+        if not repositories:
+            logger.info("No accessible GitHub repositories found for the provided token.")
+            return 0, "No accessible GitHub repositories found."
+
+        logger.info(f"Found {len(repositories)} repositories to potentially index.")
+
+        # 5. Get existing documents for this search space and connector type to prevent duplicates
+        existing_docs_result = await session.execute(
+            select(Document)
+            .filter(
+                Document.search_space_id == search_space_id,
+                Document.document_type == DocumentType.GITHUB_CONNECTOR
+            )
+        )
+        existing_docs = existing_docs_result.scalars().all()
+        # Create a lookup dict: key=repo_fullname/file_path, value=Document object
+        existing_docs_lookup = {doc.document_metadata.get("full_path"): doc for doc in existing_docs if doc.document_metadata.get("full_path")}
+        logger.info(f"Found {len(existing_docs_lookup)} existing GitHub documents in database for search space {search_space_id}")
+
+        # 6. Iterate through repositories and index files
+        for repo_info in repositories:
+            repo_full_name = repo_info.get("full_name")
+            if not repo_full_name:
+                logger.warning(f"Skipping repository with missing full_name: {repo_info.get('name')}")
+                continue
+
+            logger.info(f"Processing repository: {repo_full_name}")
+            try:
+                files_to_index = github_client.get_repository_files(repo_full_name)
+                if not files_to_index:
+                    logger.info(f"No indexable files found in repository: {repo_full_name}")
+                    continue
+
+                logger.info(f"Found {len(files_to_index)} files to process in {repo_full_name}")
+
+                for file_info in files_to_index:
+                    file_path = file_info.get("path")
+                    file_url = file_info.get("url")
+                    file_sha = file_info.get("sha")
+                    file_type = file_info.get("type") # 'code' or 'doc'
+                    full_path_key = f"{repo_full_name}/{file_path}"
+
+                    if not file_path or not file_url or not file_sha:
+                        logger.warning(f"Skipping file with missing info in {repo_full_name}: {file_info}")
+                        continue
+
+                    # Check if document already exists and if content hash matches
+                    existing_doc = existing_docs_lookup.get(full_path_key)
+                    if existing_doc and existing_doc.document_metadata.get("sha") == file_sha:
+                        logger.debug(f"Skipping unchanged file: {full_path_key}")
+                        continue # Skip if SHA matches (content hasn't changed)
+
+                    # Get file content
+                    file_content = github_client.get_file_content(repo_full_name, file_path)
+
+                    if file_content is None:
+                        logger.warning(f"Could not retrieve content for {full_path_key}. Skipping.")
+                        continue # Skip if content fetch failed
+                        
+                    # Use file_content directly for chunking, maybe summary for main content?
+                    # For now, let's use the full content for both, might need refinement
+                    summary_content = f"GitHub file: {full_path_key}\n\n{file_content[:1000]}..." # Simple summary
+                    summary_embedding = config.embedding_model_instance.embed(summary_content)
+
+                    # Chunk the content
+                    try:
+                        chunks_data = [
+                            Chunk(content=chunk.text, embedding=chunk.embedding)
+                            for chunk in config.chunker_instance.chunk(file_content)
+                        ]
+                    except Exception as chunk_err:
+                        logger.error(f"Failed to chunk file {full_path_key}: {chunk_err}")
+                        errors.append(f"Chunking failed for {full_path_key}: {chunk_err}")
+                        continue # Skip this file if chunking fails
+
+                    doc_metadata = {
+                        "repository_full_name": repo_full_name,
+                        "file_path": file_path,
+                        "full_path": full_path_key, # For easier lookup
+                        "url": file_url,
+                        "sha": file_sha,
+                        "type": file_type,
+                        "indexed_at": datetime.now(timezone.utc).isoformat()
+                    }
+
+                    if existing_doc:
+                        # Update existing document
+                        logger.info(f"Updating document for file: {full_path_key}")
+                        existing_doc.title = f"GitHub - {file_path}"
+                        existing_doc.document_metadata = doc_metadata
+                        existing_doc.content = summary_content # Update summary
+                        existing_doc.embedding = summary_embedding # Update embedding
+
+                        # Delete old chunks
+                        await session.execute(
+                            delete(Chunk)
+                            .where(Chunk.document_id == existing_doc.id)
+                        )
+                        # Add new chunks
+                        for chunk_obj in chunks_data:
+                            chunk_obj.document_id = existing_doc.id
+                            session.add(chunk_obj)
+                        
+                        documents_processed += 1
+                    else:
+                        # Create new document
+                        logger.info(f"Creating new document for file: {full_path_key}")
+                        document = Document(
+                            title=f"GitHub - {file_path}",
+                            document_type=DocumentType.GITHUB_CONNECTOR,
+                            document_metadata=doc_metadata,
+                            content=summary_content, # Store summary
+                            embedding=summary_embedding,
+                            search_space_id=search_space_id,
+                            chunks=chunks_data # Associate chunks directly
+                        )
+                        session.add(document)
+                        documents_processed += 1
+
+                    # Commit periodically or at the end? For now, commit per repo
+                    # await session.commit() 
+
+            except Exception as repo_err:
+                logger.error(f"Failed to process repository {repo_full_name}: {repo_err}")
+                errors.append(f"Failed processing {repo_full_name}: {repo_err}")
+        
+        # Commit all changes at the end
+        await session.commit()
+        logger.info(f"Finished GitHub indexing for connector {connector_id}. Processed {documents_processed} files.")
+
+    except SQLAlchemyError as db_err:
+        await session.rollback()
+        logger.error(f"Database error during GitHub indexing for connector {connector_id}: {db_err}")
+        errors.append(f"Database error: {db_err}")
+        return documents_processed, "; ".join(errors) if errors else str(db_err)
+    except Exception as e:
+        await session.rollback()
+        logger.error(f"Unexpected error during GitHub indexing for connector {connector_id}: {e}", exc_info=True)
+        errors.append(f"Unexpected error: {e}")
+        return documents_processed, "; ".join(errors) if errors else str(e)
+
+    error_message = "; ".join(errors) if errors else None
+    return documents_processed, error_message
diff --git a/surfsense_backend/main.py b/surfsense_backend/main.py
index 76d478b..81ef520 100644
--- a/surfsense_backend/main.py
+++ b/surfsense_backend/main.py
@@ -1,5 +1,12 @@
 import uvicorn
 import argparse
+import logging
+
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    datefmt='%Y-%m-%d %H:%M:%S'
+)
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description='Run the SurfSense application')
diff --git a/surfsense_backend/pyproject.toml b/surfsense_backend/pyproject.toml
index 2d1e00a..9fb5fbb 100644
--- a/surfsense_backend/pyproject.toml
+++ b/surfsense_backend/pyproject.toml
@@ -5,12 +5,14 @@ description = "Add your description here"
 readme = "README.md"
 requires-python = ">=3.12"
 dependencies = [
+    "alembic>=1.13.0",
     "asyncpg>=0.30.0",
     "chonkie[all]>=0.4.1",
     "fastapi>=0.115.8",
     "fastapi-users[oauth,sqlalchemy]>=14.0.1",
     "firecrawl-py>=1.12.0",
     "gpt-researcher>=0.12.12",
+    "github3.py==4.0.1",
     "langchain-community>=0.3.17",
     "langchain-unstructured>=0.1.6",
     "litellm>=1.61.4",
diff --git a/surfsense_backend/uv.lock b/surfsense_backend/uv.lock
index e64ff39..5211ea0 100644
--- a/surfsense_backend/uv.lock
+++ b/surfsense_backend/uv.lock
@@ -92,6 +92,20 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ec/6a/bc7e17a3e87a2985d3e8f4da4cd0f481060eb78fb08596c42be62c90a4d9/aiosignal-1.3.2-py2.py3-none-any.whl", hash = "sha256:45cde58e409a301715980c2b01d0c28bdde3770d8290b5eb2173759d9acb31a5", size = 7597 },
 ]
 
+[[package]]
+name = "alembic"
+version = "1.15.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "mako" },
+    { name = "sqlalchemy" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/e6/57/e314c31b261d1e8a5a5f1908065b4ff98270a778ce7579bd4254477209a7/alembic-1.15.2.tar.gz", hash = "sha256:1c72391bbdeffccfe317eefba686cb9a3c078005478885413b95c3b26c57a8a7", size = 1925573 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/41/18/d89a443ed1ab9bcda16264716f809c663866d4ca8de218aa78fd50b38ead/alembic-1.15.2-py3-none-any.whl", hash = "sha256:2e76bd916d547f6900ec4bb5a90aeac1485d2c92536923d0b138c02b126edc53", size = 231911 },
+]
+
 [[package]]
 name = "annotated-types"
 version = "0.7.0"
@@ -884,6 +898,21 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/e2/94/758680531a00d06e471ef649e4ec2ed6bf185356a7f9fbfbb7368a40bd49/fsspec-2025.2.0-py3-none-any.whl", hash = "sha256:9de2ad9ce1f85e1931858535bc882543171d197001a0a5eb2ddc04f1781ab95b", size = 184484 },
 ]
 
+[[package]]
+name = "github3-py"
+version = "4.0.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pyjwt", extra = ["crypto"] },
+    { name = "python-dateutil" },
+    { name = "requests" },
+    { name = "uritemplate" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/89/91/603bcaf8cd1b3927de64bf56c3a8915f6653ea7281919140c5bcff2bfe7b/github3.py-4.0.1.tar.gz", hash = "sha256:30d571076753efc389edc7f9aaef338a4fcb24b54d8968d5f39b1342f45ddd36", size = 36214038 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/61/ad/2394d4fb542574678b0ba342daf734d4d811768da3c2ee0c84d509dcb26c/github3.py-4.0.1-py3-none-any.whl", hash = "sha256:a89af7de25650612d1da2f0609622bcdeb07ee8a45a1c06b2d16a05e4234e753", size = 151800 },
+]
+
 [[package]]
 name = "google-api-core"
 version = "2.24.2"
@@ -1614,6 +1643,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/89/a1/3e145759e776c8866488a71270c399bf7c4e554551ac2e247aa0a18a0596/makefun-1.15.6-py2.py3-none-any.whl", hash = "sha256:e69b870f0bb60304765b1e3db576aaecf2f9b3e5105afe8cfeff8f2afe6ad067", size = 22946 },
 ]
 
+[[package]]
+name = "mako"
+version = "1.3.10"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "markupsafe" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/9e/38/bd5b78a920a64d708fe6bc8e0a2c075e1389d53bef8413725c63ba041535/mako-1.3.10.tar.gz", hash = "sha256:99579a6f39583fa7e5630a28c3c1f440e4e97a414b80372649c0ce338da2ea28", size = 392474 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/87/fb/99f81ac72ae23375f22b7afdb7642aba97c00a713c217124420147681a2f/mako-1.3.10-py3-none-any.whl", hash = "sha256:baef24a52fc4fc514a0887ac600f9f1cff3d82c61d4d700a1fa84d597b88db59", size = 78509 },
+]
+
 [[package]]
 name = "markdown"
 version = "3.7"
@@ -3228,11 +3269,13 @@ name = "surf-new-backend"
 version = "0.1.0"
 source = { virtual = "." }
 dependencies = [
+    { name = "alembic" },
     { name = "asyncpg" },
     { name = "chonkie", extra = ["all"] },
     { name = "fastapi" },
     { name = "fastapi-users", extra = ["oauth", "sqlalchemy"] },
     { name = "firecrawl-py" },
+    { name = "github3-py" },
     { name = "gpt-researcher" },
     { name = "langchain-community" },
     { name = "langchain-unstructured" },
@@ -3254,11 +3297,13 @@ dependencies = [
 
 [package.metadata]
 requires-dist = [
+    { name = "alembic", specifier = ">=1.13.0" },
     { name = "asyncpg", specifier = ">=0.30.0" },
     { name = "chonkie", extras = ["all"], specifier = ">=0.4.1" },
     { name = "fastapi", specifier = ">=0.115.8" },
     { name = "fastapi-users", extras = ["oauth", "sqlalchemy"], specifier = ">=14.0.1" },
     { name = "firecrawl-py", specifier = ">=1.12.0" },
+    { name = "github3-py", specifier = "==4.0.1" },
     { name = "gpt-researcher", specifier = ">=0.12.12" },
     { name = "langchain-community", specifier = ">=0.3.17" },
     { name = "langchain-unstructured", specifier = ">=0.1.6" },
@@ -3658,6 +3703,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/10/6d/adb955ecf60811a3735d508974bbb5358e7745b635dc001329267529c6f2/unstructured.pytesseract-0.3.15-py3-none-any.whl", hash = "sha256:a3f505c5efb7ff9f10379051a7dd6aa624b3be6b0f023ed6767cc80d0b1613d1", size = 14992 },
 ]
 
+[[package]]
+name = "uritemplate"
+version = "4.1.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d2/5a/4742fdba39cd02a56226815abfa72fe0aa81c33bed16ed045647d6000eba/uritemplate-4.1.1.tar.gz", hash = "sha256:4346edfc5c3b79f694bccd6d6099a322bbeb628dbf2cd86eea55a456ce5124f0", size = 273898 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/81/c0/7461b49cd25aeece13766f02ee576d1db528f1c37ce69aee300e075b485b/uritemplate-4.1.1-py2.py3-none-any.whl", hash = "sha256:830c08b8d99bdd312ea4ead05994a38e8936266f84b9a7878232db50b044e02e", size = 10356 },
+]
+
 [[package]]
 name = "urllib3"
 version = "2.3.0"
diff --git a/surfsense_web/app/dashboard/[search_space_id]/connectors/(manage)/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/connectors/(manage)/page.tsx
index dfc8b82..817ca58 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/connectors/(manage)/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/connectors/(manage)/page.tsx
@@ -44,6 +44,7 @@ const getConnectorTypeDisplay = (type: string): string => {
     "TAVILY_API": "Tavily API",
     "SLACK_CONNECTOR": "Slack",
     "NOTION_CONNECTOR": "Notion",
+    "GITHUB_CONNECTOR": "GitHub",
     // Add other connector types here as needed
   };
   return typeMap[type] || type;
@@ -253,4 +254,4 @@ export default function ConnectorsPage() {
       </Card>
     </div>
   );
-} 
\ No newline at end of file
+} 
diff --git a/surfsense_web/app/dashboard/[search_space_id]/connectors/[connector_id]/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/connectors/[connector_id]/page.tsx
index 7700bc8..e841639 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/connectors/[connector_id]/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/connectors/[connector_id]/page.tsx
@@ -51,6 +51,7 @@ const getConnectorTypeDisplay = (type: string): string => {
     "TAVILY_API": "Tavily API",
     "SLACK_CONNECTOR": "Slack Connector",
     "NOTION_CONNECTOR": "Notion Connector",
+    "GITHUB_CONNECTOR": "GitHub Connector",
     // Add other connector types here as needed
   };
   return typeMap[type] || type;
@@ -85,7 +86,8 @@ export default function EditConnectorPage() {
       "SERPER_API": "SERPER_API_KEY",
       "TAVILY_API": "TAVILY_API_KEY",
       "SLACK_CONNECTOR": "SLACK_BOT_TOKEN",
-      "NOTION_CONNECTOR": "NOTION_INTEGRATION_TOKEN"
+      "NOTION_CONNECTOR": "NOTION_INTEGRATION_TOKEN",
+      "GITHUB_CONNECTOR": "GITHUB_PAT"
     };
     return fieldMap[connectorType] || "";
   };
@@ -136,6 +138,8 @@ export default function EditConnectorPage() {
         name: values.name,
         connector_type: connector.connector_type,
         config: updatedConfig,
+        is_indexable: connector.is_indexable,
+        last_indexed_at: connector.last_indexed_at,
       });
 
       toast.success("Connector updated successfully!");
@@ -223,17 +227,21 @@ export default function EditConnectorPage() {
                           ? "Slack Bot Token" 
                           : connector?.connector_type === "NOTION_CONNECTOR" 
                             ? "Notion Integration Token" 
-                            : "API Key"}
+                            : connector?.connector_type === "GITHUB_CONNECTOR"
+                              ? "GitHub Personal Access Token (PAT)"
+                              : "API Key"}
                       </FormLabel>
                       <FormControl>
                         <Input 
                           type="password" 
                           placeholder={
                             connector?.connector_type === "SLACK_CONNECTOR" 
-                              ? "Enter your Slack Bot Token" 
+                              ? "Enter new Slack Bot Token (optional)" 
                               : connector?.connector_type === "NOTION_CONNECTOR" 
-                                ? "Enter your Notion Integration Token" 
-                                : "Enter your API key"
+                                ? "Enter new Notion Token (optional)"
+                                : connector?.connector_type === "GITHUB_CONNECTOR"
+                                  ? "Enter new GitHub PAT (optional)"
+                                  : "Enter new API key (optional)"
                           } 
                           {...field} 
                         />
@@ -243,7 +251,9 @@ export default function EditConnectorPage() {
                           ? "Enter a new Slack Bot Token or leave blank to keep your existing token." 
                           : connector?.connector_type === "NOTION_CONNECTOR" 
                             ? "Enter a new Notion Integration Token or leave blank to keep your existing token." 
-                            : "Enter a new API key or leave blank to keep your existing key."}
+                            : connector?.connector_type === "GITHUB_CONNECTOR"
+                              ? "Enter a new GitHub PAT or leave blank to keep your existing token."
+                              : "Enter a new API key or leave blank to keep your existing key."}
                       </FormDescription>
                       <FormMessage />
                     </FormItem>
@@ -276,4 +286,4 @@ export default function EditConnectorPage() {
       </motion.div>
     </div>
   );
-} 
\ No newline at end of file
+} 
diff --git a/surfsense_web/app/dashboard/[search_space_id]/connectors/add/github-connector/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/github-connector/page.tsx
new file mode 100644
index 0000000..45534d6
--- /dev/null
+++ b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/github-connector/page.tsx
@@ -0,0 +1,298 @@
+"use client";
+
+import { useState } from "react";
+import { useRouter, useParams } from "next/navigation";
+import { motion } from "framer-motion";
+import { zodResolver } from "@hookform/resolvers/zod";
+import { useForm } from "react-hook-form";
+import * as z from "zod";
+import { toast } from "sonner";
+import { ArrowLeft, Check, Info, Loader2, Github } from "lucide-react";
+
+// Assuming useSearchSourceConnectors hook exists and works similarly
+import { useSearchSourceConnectors } from "@/hooks/useSearchSourceConnectors";
+import {
+    Form,
+    FormControl,
+    FormDescription,
+    FormField,
+    FormItem,
+    FormLabel,
+    FormMessage,
+} from "@/components/ui/form";
+import { Input } from "@/components/ui/input";
+import { Button } from "@/components/ui/button";
+import {
+    Card,
+    CardContent,
+    CardDescription,
+    CardFooter,
+    CardHeader,
+    CardTitle,
+} from "@/components/ui/card";
+import {
+    Alert,
+    AlertDescription,
+    AlertTitle,
+} from "@/components/ui/alert";
+import {
+    Accordion,
+    AccordionContent,
+    AccordionItem,
+    AccordionTrigger,
+} from "@/components/ui/accordion";
+import { Tabs, TabsContent, TabsList, TabsTrigger } from "@/components/ui/tabs";
+
+// Define the form schema with Zod for GitHub
+const githubConnectorFormSchema = z.object({
+    name: z.string().min(3, {
+        message: "Connector name must be at least 3 characters.",
+    }),
+    github_pat: z.string()
+        .min(20, { // Apply min length first
+            message: "GitHub Personal Access Token seems too short.",
+        })
+        .refine(pat => pat.startsWith('ghp_') || pat.startsWith('github_pat_'), { // Then refine the pattern
+            message: "GitHub PAT should start with 'ghp_' or 'github_pat_'",
+        }),
+});
+
+// Define the type for the form values
+type GithubConnectorFormValues = z.infer<typeof githubConnectorFormSchema>;
+
+export default function GithubConnectorPage() {
+    const router = useRouter();
+    const params = useParams();
+    const searchSpaceId = params.search_space_id as string;
+    const [isSubmitting, setIsSubmitting] = useState(false);
+    const { createConnector } = useSearchSourceConnectors(); // Assuming this hook exists
+
+    // Initialize the form
+    const form = useForm<GithubConnectorFormValues>({
+        resolver: zodResolver(githubConnectorFormSchema),
+        defaultValues: {
+            name: "GitHub Connector",
+            github_pat: "",
+        },
+    });
+
+    // Handle form submission
+    const onSubmit = async (values: GithubConnectorFormValues) => {
+        setIsSubmitting(true);
+        try {
+            await createConnector({
+                name: values.name,
+                connector_type: "GITHUB_CONNECTOR",
+                config: {
+                    GITHUB_PAT: values.github_pat,
+                },
+                is_indexable: true, // GitHub connector is indexable
+                last_indexed_at: null, // New connector hasn't been indexed
+            });
+
+            toast.success("GitHub connector created successfully!");
+
+            // Navigate back to connectors management page (or the add page)
+            router.push(`/dashboard/${searchSpaceId}/connectors`);
+        } catch (error) { // Added type check for error
+            console.error("Error creating GitHub connector:", error);
+            // Display specific backend error message if available
+            const errorMessage = error instanceof Error ? error.message : "Failed to create GitHub connector. Please check the PAT and permissions.";
+            toast.error(errorMessage);
+        } finally {
+            setIsSubmitting(false);
+        }
+    };
+
+    return (
+        <div className="container mx-auto py-8 max-w-3xl">
+            <Button
+                variant="ghost"
+                className="mb-6"
+                onClick={() => router.push(`/dashboard/${searchSpaceId}/connectors/add`)}
+            >
+                <ArrowLeft className="mr-2 h-4 w-4" />
+                Back to Add Connectors
+            </Button>
+
+            <motion.div
+                initial={{ opacity: 0, y: 20 }}
+                animate={{ opacity: 1, y: 0 }}
+                transition={{ duration: 0.5 }}
+            >
+                <Tabs defaultValue="connect" className="w-full">
+                    <TabsList className="grid w-full grid-cols-2 mb-6">
+                        <TabsTrigger value="connect">Connect GitHub</TabsTrigger>
+                        <TabsTrigger value="documentation">Setup Guide</TabsTrigger>
+                    </TabsList>
+
+                    <TabsContent value="connect">
+                        <Card className="border-2 border-border">
+                            <CardHeader>
+                                <CardTitle className="text-2xl font-bold flex items-center gap-2"><Github className="h-6 w-6" /> Connect GitHub Account</CardTitle>
+                                <CardDescription>
+                                    Integrate with GitHub using a Personal Access Token (PAT) to search and retrieve information from accessible repositories. This connector can index your code and documentation.
+                                </CardDescription>
+                            </CardHeader>
+                            <CardContent>
+                                <Alert className="mb-6 bg-muted">
+                                    <Info className="h-4 w-4" />
+                                    <AlertTitle>GitHub Personal Access Token (PAT) Required</AlertTitle>
+                                    <AlertDescription>
+                                        You'll need a GitHub PAT with the appropriate scopes (e.g., 'repo') to use this connector. You can create one from your
+                                        <a
+                                            href="https://github.com/settings/personal-access-tokens"
+                                            target="_blank"
+                                            rel="noopener noreferrer"
+                                            className="font-medium underline underline-offset-4 ml-1"
+                                        >
+                                            GitHub Developer Settings
+                                        </a>.
+                                    </AlertDescription>
+                                </Alert>
+
+                                <Form {...form}>
+                                    <form onSubmit={form.handleSubmit(onSubmit)} className="space-y-6">
+                                        <FormField
+                                            control={form.control}
+                                            name="name"
+                                            render={({ field }) => (
+                                                <FormItem>
+                                                    <FormLabel>Connector Name</FormLabel>
+                                                    <FormControl>
+                                                        <Input placeholder="My GitHub Connector" {...field} />
+                                                    </FormControl>
+                                                    <FormDescription>
+                                                        A friendly name to identify this GitHub connection.
+                                                    </FormDescription>
+                                                    <FormMessage />
+                                                </FormItem>
+                                            )}
+                                        />
+
+                                        <FormField
+                                            control={form.control}
+                                            name="github_pat"
+                                            render={({ field }) => (
+                                                <FormItem>
+                                                    <FormLabel>GitHub Personal Access Token (PAT)</FormLabel>
+                                                    <FormControl>
+                                                        <Input
+                                                            type="password"
+                                                            placeholder="ghp_... or github_pat_..."
+                                                            {...field}
+                                                        />
+                                                    </FormControl>
+                                                    <FormDescription>
+                                                        Your GitHub PAT will be encrypted and stored securely. Ensure it has the necessary 'repo' scopes.
+                                                    </FormDescription>
+                                                    <FormMessage />
+                                                </FormItem>
+                                            )}
+                                        />
+
+                                        <div className="flex justify-end">
+                                            <Button
+                                                type="submit"
+                                                disabled={isSubmitting}
+                                                className="w-full sm:w-auto"
+                                            >
+                                                {isSubmitting ? (
+                                                    <>
+                                                        <Loader2 className="mr-2 h-4 w-4 animate-spin" />
+                                                        Connecting...
+                                                    </>
+                                                ) : (
+                                                    <>
+                                                        <Check className="mr-2 h-4 w-4" />
+                                                        Connect GitHub
+                                                    </>
+                                                )}
+                                            </Button>
+                                        </div>
+                                    </form>
+                                </Form>
+                            </CardContent>
+                            <CardFooter className="flex flex-col items-start border-t bg-muted/50 px-6 py-4">
+                                <h4 className="text-sm font-medium">What you get with GitHub integration:</h4>
+                                <ul className="mt-2 list-disc pl-5 text-sm text-muted-foreground">
+                                    <li>Search through code and documentation in your repositories</li>
+                                    <li>Access READMEs, Markdown files, and common code files</li>
+                                    <li>Connect your project knowledge directly to your search space</li>
+                                    <li>Index your repositories for enhanced search capabilities</li>
+                                </ul>
+                            </CardFooter>
+                        </Card>
+                    </TabsContent>
+
+                    <TabsContent value="documentation">
+                        <Card className="border-2 border-border">
+                            <CardHeader>
+                                <CardTitle className="text-2xl font-bold">GitHub Connector Setup Guide</CardTitle>
+                                <CardDescription>
+                                    Learn how to generate a Personal Access Token (PAT) and connect your GitHub account.
+                                </CardDescription>
+                            </CardHeader>
+                            <CardContent className="space-y-6">
+                                <div>
+                                    <h3 className="text-xl font-semibold mb-2">How it works</h3>
+                                    <p className="text-muted-foreground">
+                                        The GitHub connector uses a Personal Access Token (PAT) to authenticate with the GitHub API. It fetches information about repositories accessible to the token and indexes relevant files (code, markdown, text).
+                                    </p>
+                                    <ul className="mt-2 list-disc pl-5 text-muted-foreground">
+                                        <li>The connector indexes files based on common code and documentation extensions.</li>
+                                        <li>Large files (over 1MB) are skipped during indexing.</li>
+                                        <li>Indexing runs periodically (check connector settings for frequency) to keep content up-to-date.</li>
+                                    </ul>
+                                </div>
+
+                                <Accordion type="single" collapsible className="w-full">
+                                    <AccordionItem value="create_pat">
+                                        <AccordionTrigger className="text-lg font-medium">Step 1: Create a GitHub PAT</AccordionTrigger>
+                                        <AccordionContent className="space-y-4">
+                                            <Alert className="bg-muted">
+                                                <Info className="h-4 w-4" />
+                                                <AlertTitle>Token Security</AlertTitle>
+                                                <AlertDescription>
+                                                    Treat your PAT like a password. Store it securely and consider using fine-grained tokens if possible.
+                                                </AlertDescription>
+                                            </Alert>
+
+                                            <div className="space-y-6">
+                                                <div>
+                                                    <h4 className="font-medium mb-2">Generating a Token:</h4>
+                                                    <ol className="list-decimal pl-5 space-y-3">
+                                                        <li>Go to your GitHub <a href="https://github.com/settings/tokens" target="_blank" rel="noopener noreferrer" className="font-medium underline underline-offset-4">Developer settings</a>.</li>
+                                                        <li>Click on <strong>Personal access tokens</strong>, then choose <strong>Tokens (classic)</strong> or <strong>Fine-grained tokens</strong> (recommended if available and suitable).</li>
+                                                        <li>Click <strong>Generate new token</strong> (and choose the appropriate type).</li>
+                                                        <li>Give your token a descriptive name (e.g., "SurfSense Connector").</li>
+                                                        <li>Set an expiration date for the token (recommended for security).</li>
+                                                        <li>Under <strong>Select scopes</strong> (for classic tokens) or <strong>Repository access</strong> (for fine-grained), grant the necessary permissions. At minimum, the <strong>`repo`</strong> scope (or equivalent read access to repositories for fine-grained tokens) is required to read repository content.</li>
+                                                        <li>Click <strong>Generate token</strong>.</li>
+                                                        <li><strong>Important:</strong> Copy your new PAT immediately. You won't be able to see it again after leaving the page.</li>
+                                                    </ol>
+                                                </div>
+                                            </div>
+                                        </AccordionContent>
+                                    </AccordionItem>
+
+                                    <AccordionItem value="connect_app">
+                                        <AccordionTrigger className="text-lg font-medium">Step 2: Connect in SurfSense</AccordionTrigger>
+                                        <AccordionContent className="space-y-4">
+                                            <ol className="list-decimal pl-5 space-y-3">
+                                                <li>Paste the copied GitHub PAT into the "GitHub Personal Access Token (PAT)" field on the "Connect GitHub" tab.</li>
+                                                <li>Optionally, give the connector a custom name.</li>
+                                                <li>Click the <strong>Connect GitHub</strong> button.</li>
+                                                <li>If the connection is successful, you will be redirected and can start indexing from the Connectors page.</li>
+                                            </ol>
+                                        </AccordionContent>
+                                    </AccordionItem>
+                                </Accordion>
+                            </CardContent>
+                        </Card>
+                    </TabsContent>
+                </Tabs>
+            </motion.div>
+        </div>
+    );
+} 
diff --git a/surfsense_web/app/dashboard/[search_space_id]/connectors/add/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/page.tsx
index d68de69..f70bb62 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/connectors/add/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/page.tsx
@@ -14,6 +14,7 @@ import {
   IconMail,
   IconBrandZoom,
   IconChevronRight,
+  IconWorldWww,
 } from "@tabler/icons-react";
 import { motion, AnimatePresence } from "framer-motion";
 import { useState } from "react";
@@ -22,36 +23,43 @@ import Link from "next/link";
 import { Button } from "@/components/ui/button";
 import { Separator } from "@/components/ui/separator";
 import { Collapsible, CollapsibleContent, CollapsibleTrigger } from "@/components/ui/collapsible";
+import { useForm } from "react-hook-form";
+
+// Define the Connector type
+interface Connector {
+  id: string;
+  title: string;
+  description: string;
+  icon: React.ReactNode;
+  status: "available" | "coming-soon" | "connected"; // Added connected status example
+}
+
+interface ConnectorCategory {
+  id: string;
+  title: string;
+  connectors: Connector[];
+}
 
 // Define connector categories and their connectors
-const connectorCategories = [
+const connectorCategories: ConnectorCategory[] = [
   {
     id: "search-engines",
     title: "Search Engines",
-    description: "Connect to search engines to enhance your research capabilities.",
-    icon: <IconSearch className="h-5 w-5" />,
     connectors: [
       {
-        id: "tavily-api",
-        title: "Tavily Search API",
-        description: "Connect to Tavily Search API to search the web.",
-        icon: <IconSearch className="h-6 w-6" />,
-        status: "available",
-      },
-      {
-        id: "serper-api",
-        title: "Serper API",
-        description: "Connect to Serper API to search the web.",
-        icon: <IconBrandGoogle className="h-6 w-6" />,
-        status: "coming-soon",
+        id: "web-search",
+        title: "Web Search",
+        description: "Enable web search capabilities for broader context.",
+        icon: <IconWorldWww className="h-6 w-6" />,
+        status: "available", // Example status
+        // Potentially add config form here if needed (e.g., choosing provider)
       },
+      // Add other search engine connectors like Tavily, Serper if they have UI config
     ],
   },
   {
     id: "team-chats",
     title: "Team Chats",
-    description: "Connect to your team communication platforms.",
-    icon: <IconMessages className="h-5 w-5" />,
     connectors: [
       {
         id: "slack-connector",
@@ -79,8 +87,6 @@ const connectorCategories = [
   {
     id: "knowledge-bases",
     title: "Knowledge Bases",
-    description: "Connect to your knowledge bases and documentation.",
-    icon: <IconDatabase className="h-5 w-5" />,
     connectors: [
       {
         id: "notion-connector",
@@ -88,21 +94,20 @@ const connectorCategories = [
         description: "Connect to your Notion workspace to access pages and databases.",
         icon: <IconBrandNotion className="h-6 w-6" />,
         status: "available",
+        // No form here, assumes it links to its own page
       },
       {
-        id: "github",
+        id: "github-connector", // Keep the id simple
         title: "GitHub",
-        description: "Connect to GitHub repositories to access code and documentation.",
+        description: "Connect a GitHub PAT to index code and docs from accessible repositories.",
         icon: <IconBrandGithub className="h-6 w-6" />,
-        status: "coming-soon",
+        status: "available",
       },
     ],
   },
   {
     id: "communication",
     title: "Communication",
-    description: "Connect to your email and meeting platforms.",
-    icon: <IconMail className="h-5 w-5" />,
     connectors: [
       {
         id: "gmail",
@@ -125,7 +130,7 @@ const connectorCategories = [
 export default function ConnectorsPage() {
   const params = useParams();
   const searchSpaceId = params.search_space_id as string;
-  const [expandedCategories, setExpandedCategories] = useState<string[]>(["search-engines"]);
+  const [expandedCategories, setExpandedCategories] = useState<string[]>(["search-engines", "knowledge-bases"]);
 
   const toggleCategory = (categoryId: string) => {
     setExpandedCategories(prev => 
@@ -150,104 +155,68 @@ export default function ConnectorsPage() {
       </motion.div>
 
       <div className="space-y-6">
-        {connectorCategories.map((category, categoryIndex) => (
+        {connectorCategories.map((category) => (
           <Collapsible
             key={category.id}
             open={expandedCategories.includes(category.id)}
             onOpenChange={() => toggleCategory(category.id)}
-            className="border rounded-lg overflow-hidden bg-card"
+            className="space-y-2"
           >
-            <CollapsibleTrigger asChild>
-              <motion.div
-                initial={{ opacity: 0, y: 10 }}
-                animate={{ opacity: 1, y: 0 }}
-                transition={{ duration: 0.3, delay: categoryIndex * 0.1 }}
-                className="p-4 flex items-center justify-between cursor-pointer hover:bg-accent/50 transition-colors"
-              >
-                <div className="flex items-center gap-3">
-                  <div className="p-2 rounded-md bg-primary/10 text-primary">
-                    {category.icon}
-                  </div>
-                  <div>
-                    <h2 className="text-xl font-semibold">{category.title}</h2>
-                    <p className="text-sm text-muted-foreground">{category.description}</p>
-                  </div>
-                </div>
-                <IconChevronRight 
-                  className={cn(
-                    "h-5 w-5 text-muted-foreground transition-transform duration-200",
-                    expandedCategories.includes(category.id) && "rotate-90"
-                  )} 
-                />
-              </motion.div>
-            </CollapsibleTrigger>
+            <div className="flex items-center justify-between space-x-4 px-1">
+              <h3 className="text-lg font-semibold dark:text-gray-200">{category.title}</h3>
+              <CollapsibleTrigger asChild>
+                {/* Replace with your preferred expand/collapse icon/button */}
+                <button className="text-sm text-indigo-600 hover:underline dark:text-indigo-400">
+                  {expandedCategories.includes(category.id) ? "Collapse" : "Expand"}
+                </button>
+              </CollapsibleTrigger>
+            </div>
             <CollapsibleContent>
-              <Separator />
-              <div className="p-4 grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-4">
-                <AnimatePresence>
-                  {category.connectors.map((connector, index) => (
-                    <motion.div
-                      key={connector.id}
-                      initial={{ opacity: 0, scale: 0.95 }}
-                      animate={{ opacity: 1, scale: 1 }}
-                      exit={{ opacity: 0, scale: 0.95 }}
-                      transition={{ 
-                        duration: 0.2, 
-                        delay: index * 0.05,
-                        type: "spring",
-                        stiffness: 300,
-                        damping: 30
-                      }}
-                      className={cn(
-                        "relative group flex flex-col p-4 rounded-lg border",
-                        connector.status === "coming-soon" ? "opacity-70" : ""
-                      )}
-                    >
-                      <div className="absolute inset-0 opacity-0 group-hover:opacity-100 transition duration-200 bg-gradient-to-t from-accent/50 to-transparent rounded-lg pointer-events-none" />
-                      
-                      <div className="mb-4 relative z-10 text-primary">
-                        {connector.icon}
+              <div className="grid grid-cols-1 gap-6 sm:grid-cols-2 lg:grid-cols-3 p-1">
+                {category.connectors.map((connector) => (
+                  <div key={connector.id} className="col-span-1 flex flex-col divide-y divide-gray-200 dark:divide-gray-700 rounded-lg bg-white dark:bg-gray-800 shadow">
+                    <div className="flex w-full items-center justify-between space-x-6 p-6 flex-grow">
+                      <div className="flex-1 truncate">
+                        <div className="flex items-center space-x-3">
+                          <span className="text-gray-900 dark:text-gray-100">{connector.icon}</span>
+                          <h3 className="truncate text-sm font-medium text-gray-900 dark:text-gray-100">
+                            {connector.title}
+                          </h3>
+                          {connector.status === "coming-soon" && (
+                            <span className="inline-block flex-shrink-0 rounded-full bg-yellow-100 px-2 py-0.5 text-xs font-medium text-yellow-800 dark:bg-yellow-900 dark:text-yellow-200">
+                              Coming soon
+                            </span>
+                          )}
+                          {/* TODO: Add 'Connected' badge based on actual state */} 
+                        </div>
+                        <p className="mt-1 truncate text-sm text-gray-500 dark:text-gray-400">
+                          {connector.description}
+                        </p>
                       </div>
-                      
-                      <div className="flex items-center justify-between mb-2">
-                        <h3 className="text-lg font-semibold group-hover:translate-x-1 transition duration-200">
-                          {connector.title}
-                        </h3>
-                        {connector.status === "coming-soon" && (
-                          <span className="text-xs bg-muted px-2 py-1 rounded-full">Coming soon</span>
-                        )}
-                      </div>
-                      
-                      <p className="text-sm text-muted-foreground mb-4 flex-grow">
-                        {connector.description}
-                      </p>
-                      
-                      {connector.status === "available" ? (
-                        <Link 
-                          href={`/dashboard/${searchSpaceId}/connectors/add/${connector.id}`}
-                          className="w-full mt-auto"
-                        >
-                          <Button 
-                            variant="default"
-                            className="w-full"
-                          >
+                    </div>
+                    {/* Always render Link button if available */}
+                    {connector.status === 'available' && (
+                      <div className="px-6 py-4 border-t border-gray-200 dark:border-gray-700">
+                        <Link href={`/dashboard/${searchSpaceId}/connectors/add/${connector.id}`}>
+                          <Button variant="default" className="w-full">
                             Connect
                           </Button>
                         </Link>
-                      ) : (
-                        <Button 
-                          variant="outline"
-                          className="w-full mt-auto"
-                          disabled
-                        >
-                          Notify Me
+                      </div>
+                    )}
+                    {connector.status === 'coming-soon' && (
+                      <div className="px-6 py-4 border-t border-gray-200 dark:border-gray-700">
+                        <Button variant="outline" disabled className="w-full">
+                          Coming Soon
                         </Button>
-                      )}
-                    </motion.div>
-                  ))}
-                </AnimatePresence>
+                      </div>
+                    )}
+                    {/* TODO: Add logic for 'connected' status */}
+                  </div>
+                ))}
               </div>
             </CollapsibleContent>
+            <Separator className="my-4" />
           </Collapsible>
         ))}
       </div>

From a26fac435b8bcee0e189ffa422729e2c9ca9ac7c Mon Sep 17 00:00:00 2001
From: Adamsmith6300 <adamsmith6300@gmail.com>
Date: Sun, 13 Apr 2025 21:23:05 -0700
Subject: [PATCH 2/4] documents table migration, fix/update github indexing

---
 ...1_add_github_connector_to_documenttype_.py | 70 +++++++++++++++++++
 .../app/connectors/github_connector.py        | 57 +++++++++++----
 .../tasks/stream_connector_search_results.py  | 27 +++++++
 .../app/utils/connector_service.py            | 55 ++++++++++++++-
 .../documents/(manage)/page.tsx               |  5 +-
 5 files changed, 197 insertions(+), 17 deletions(-)
 create mode 100644 surfsense_backend/alembic/versions/e55302644c51_add_github_connector_to_documenttype_.py

diff --git a/surfsense_backend/alembic/versions/e55302644c51_add_github_connector_to_documenttype_.py b/surfsense_backend/alembic/versions/e55302644c51_add_github_connector_to_documenttype_.py
new file mode 100644
index 0000000..1f15912
--- /dev/null
+++ b/surfsense_backend/alembic/versions/e55302644c51_add_github_connector_to_documenttype_.py
@@ -0,0 +1,70 @@
+"""Add GITHUB_CONNECTOR to DocumentType enum
+
+Revision ID: e55302644c51
+Revises: 1
+Create Date: 2025-04-13 19:56:00.059921
+
+"""
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision: str = 'e55302644c51'
+down_revision: Union[str, None] = '1'
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+# Define the ENUM type name and the new value
+ENUM_NAME = 'documenttype' # Make sure this matches the name in your DB (usually lowercase class name)
+NEW_VALUE = 'GITHUB_CONNECTOR'
+
+def upgrade() -> None:
+    """Upgrade schema."""
+    op.execute(f"ALTER TYPE {ENUM_NAME} ADD VALUE '{NEW_VALUE}'")
+        
+
+# Warning: This will delete all rows with the new value
+def downgrade() -> None:
+    """Downgrade schema - remove GITHUB_CONNECTOR from enum."""
+
+    # The old type name
+    old_enum_name = f"{ENUM_NAME}_old"
+
+    # Enum values *before* GITHUB_CONNECTOR was added
+    old_values = (
+        'EXTENSION',
+        'CRAWLED_URL',
+        'FILE',
+        'SLACK_CONNECTOR',
+        'NOTION_CONNECTOR',
+        'YOUTUBE_VIDEO'
+    )
+    old_values_sql = ", ".join([f"'{v}'" for v in old_values])
+
+    # Table and column names (adjust if different)
+    table_name = 'documents'
+    column_name = 'document_type'
+
+    # 1. Rename the current enum type
+    op.execute(f"ALTER TYPE {ENUM_NAME} RENAME TO {old_enum_name}")
+
+    # 2. Create the new enum type with the old values
+    op.execute(f"CREATE TYPE {ENUM_NAME} AS ENUM({old_values_sql})")
+
+    # 3. Update the table: 
+    op.execute(
+        f"DELETE FROM {table_name} WHERE {column_name}::text = '{NEW_VALUE}'"
+    )
+
+    # 4. Alter the column to use the new enum type (casting old values)
+    op.execute(
+        f"ALTER TABLE {table_name} ALTER COLUMN {column_name} "
+        f"TYPE {ENUM_NAME} USING {column_name}::text::{ENUM_NAME}"
+    )
+
+    # 5. Drop the old enum type
+    op.execute(f"DROP TYPE {old_enum_name}")
+    # ### end Alembic commands ### 
diff --git a/surfsense_backend/app/connectors/github_connector.py b/surfsense_backend/app/connectors/github_connector.py
index d827dac..265f89b 100644
--- a/surfsense_backend/app/connectors/github_connector.py
+++ b/surfsense_backend/app/connectors/github_connector.py
@@ -2,7 +2,6 @@ import base64
 import logging
 from typing import List, Optional, Dict, Any, Tuple
 from github3 import login as github_login, exceptions as github_exceptions
-from github3.repos.repo import Repository
 from github3.repos.contents import Contents
 from github3.exceptions import ForbiddenError, NotFoundError
 
@@ -26,6 +25,33 @@ MAX_FILE_SIZE = 1 * 1024 * 1024
 class GitHubConnector:
     """Connector for interacting with the GitHub API."""
 
+    # Directories to skip during file traversal
+    SKIPPED_DIRS = {
+        # Version control
+        '.git',
+        # Dependencies
+        'node_modules',
+        'vendor', 
+        # Build artifacts / Caches
+        'build',
+        'dist',
+        'target',
+        '__pycache__',
+        # Virtual environments
+        'venv',
+        '.venv',
+        'env',
+        # IDE/Editor config
+        '.vscode',
+        '.idea',
+        '.project',
+        '.settings',
+        # Temporary / Logs
+        'tmp',
+        'logs',
+        # Add other project-specific irrelevant directories if needed
+    }
+
     def __init__(self, token: str):
         """
         Initializes the GitHub connector.
@@ -54,17 +80,16 @@ class GitHubConnector:
             # type='owner' fetches repos owned by the user
             # type='member' fetches repos the user is a collaborator on (including orgs)
             # type='all' fetches both
-            for repo in self.gh.repositories(type='all', sort='updated'):
-                if isinstance(repo, Repository):
-                    repos_data.append({
-                        "id": repo.id,
-                        "name": repo.name,
-                        "full_name": repo.full_name,
-                        "private": repo.private,
-                        "url": repo.html_url,
-                        "description": repo.description or "",
-                        "last_updated": repo.updated_at.isoformat() if repo.updated_at else None,
-                    })
+            for repo in self.gh.repositories(type='owner', sort='updated'):
+                repos_data.append({
+                    "id": repo.id,
+                    "name": repo.name,
+                    "full_name": repo.full_name,
+                    "private": repo.private,
+                    "url": repo.html_url,
+                    "description": repo.description or "",
+                    "last_updated": repo.updated_at if repo.updated_at else None,
+                })
             logger.info(f"Fetched {len(repos_data)} repositories.")
             return repos_data
         except Exception as e:
@@ -90,8 +115,7 @@ class GitHubConnector:
             if not repo:
                 logger.warning(f"Repository '{repo_full_name}' not found.")
                 return []
-                
-            contents = repo.directory_contents(path=path) # Use directory_contents for clarity
+            contents = repo.directory_contents(directory_path=path) # Use directory_contents for clarity
             
             # contents returns a list of tuples (name, content_obj)
             for item_name, content_item in contents:
@@ -99,6 +123,11 @@ class GitHubConnector:
                     continue
 
                 if content_item.type == 'dir':
+                    # Check if the directory name is in the skipped list
+                    if content_item.name in self.SKIPPED_DIRS:
+                        logger.debug(f"Skipping directory: {content_item.path}")
+                        continue # Skip recursion for this directory
+                    
                     # Recursively fetch contents of subdirectory
                     files_list.extend(self.get_repository_files(repo_full_name, path=content_item.path))
                 elif content_item.type == 'file':
diff --git a/surfsense_backend/app/tasks/stream_connector_search_results.py b/surfsense_backend/app/tasks/stream_connector_search_results.py
index 5c563dc..b9a703a 100644
--- a/surfsense_backend/app/tasks/stream_connector_search_results.py
+++ b/surfsense_backend/app/tasks/stream_connector_search_results.py
@@ -244,6 +244,33 @@ async def stream_connector_search_results(
             all_raw_documents.extend(notion_chunks)
             
             
+        # Github Connector
+        if connector == "GITHUB_CONNECTOR":
+            # Send terminal message about starting search
+            yield streaming_service.add_terminal_message("Starting to search for GitHub connector...")  
+            print("Starting to search for GitHub connector...")
+            # Search using Github API with reformulated query
+            result_object, github_chunks = await connector_service.search_github(
+                user_query=reformulated_query,
+                user_id=user_id,
+                search_space_id=search_space_id,
+                top_k=TOP_K
+            )
+            
+            # Send terminal message about search results
+            yield streaming_service.add_terminal_message(
+                f"Found {len(result_object['sources'])} relevant results from Github",
+                "success"
+            )
+            
+            # Update sources
+            all_sources.append(result_object)
+            yield streaming_service.update_sources(all_sources)
+            
+            # Add documents to collection
+            all_raw_documents.extend(github_chunks)
+            
+            
     
 
     # If we have documents to research
diff --git a/surfsense_backend/app/utils/connector_service.py b/surfsense_backend/app/utils/connector_service.py
index 9e676e5..8d7a551 100644
--- a/surfsense_backend/app/utils/connector_service.py
+++ b/surfsense_backend/app/utils/connector_service.py
@@ -558,4 +558,57 @@ class ConnectorService:
             "sources": sources_list,
         }
         
-        return result_object, youtube_chunks
\ No newline at end of file
+        return result_object, youtube_chunks
+
+    async def search_github(self, user_query: str, user_id: int, search_space_id: int, top_k: int = 20) -> tuple:
+        """
+        Search for GitHub documents and return both the source information and langchain documents
+        
+        Returns:
+            tuple: (sources_info, langchain_documents)
+        """
+        github_chunks = await self.retriever.hybrid_search(
+            query_text=user_query,
+            top_k=top_k,
+            user_id=user_id,
+            search_space_id=search_space_id,
+            document_type="GITHUB_CONNECTOR"
+        )
+
+        # Map github_chunks to the required format
+        mapped_sources = {}
+        for i, chunk in enumerate(github_chunks):
+            # Fix for UI - assign a unique ID for citation/source tracking
+            github_chunks[i]['document']['id'] = self.source_id_counter
+            
+            # Extract document metadata
+            document = chunk.get('document', {})
+            metadata = document.get('metadata', {})
+
+            # Create a mapped source entry
+            source = {
+                "id": self.source_id_counter,
+                "title": document.get('title', 'GitHub Document'), # Use specific title if available
+                "description": metadata.get('description', chunk.get('content', '')[:100]), # Use description or content preview
+                "url": metadata.get('url', '') # Use URL if available in metadata
+            }
+
+            self.source_id_counter += 1
+
+            # Use a unique identifier for tracking unique sources (URL preferred)
+            source_key = source.get("url") or source.get("title")
+            if source_key and source_key not in mapped_sources:
+                mapped_sources[source_key] = source
+        
+        # Convert to list of sources
+        sources_list = list(mapped_sources.values())
+        
+        # Create result object
+        result_object = {
+            "id": 7, # Assuming 7 is the next available ID
+            "name": "GitHub",
+            "type": "GITHUB_CONNECTOR",
+            "sources": sources_list,
+        }
+        
+        return result_object, github_chunks
diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx
index 66f8b08..18b4357 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx
@@ -94,7 +94,7 @@ import rehypeSanitize from "rehype-sanitize";
 import remarkGfm from "remark-gfm";
 import { DocumentViewer } from "@/components/document-viewer";
 import { JsonMetadataViewer } from "@/components/json-metadata-viewer";
-import { IconBrandNotion, IconBrandSlack, IconBrandYoutube } from "@tabler/icons-react";
+import { IconBrandGithub, IconBrandNotion, IconBrandSlack, IconBrandYoutube } from "@tabler/icons-react";
 
 // Define animation variants for reuse
 const fadeInScale = {
@@ -142,6 +142,7 @@ const documentTypeIcons = {
     NOTION_CONNECTOR: IconBrandNotion,
     FILE: File,
     YOUTUBE_VIDEO: IconBrandYoutube,
+    GITHUB_CONNECTOR: IconBrandGithub,
 } as const;
 
 const columns: ColumnDef<Document>[] = [
@@ -1028,4 +1029,4 @@ function RowActions({ row }: { row: Row<Document> }) {
     );
 }
 
-export { DocumentsTable }
\ No newline at end of file
+export { DocumentsTable }

From 396c070b28b0c2d0f7a8f54a104c64f22fcd03c0 Mon Sep 17 00:00:00 2001
From: Adamsmith6300 <adamsmith6300@gmail.com>
Date: Sun, 13 Apr 2025 21:33:10 -0700
Subject: [PATCH 3/4] addressing coderabbit PR comment

---
 surfsense_backend/app/schemas/search_source_connector.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/surfsense_backend/app/schemas/search_source_connector.py b/surfsense_backend/app/schemas/search_source_connector.py
index 5386658..41e1086 100644
--- a/surfsense_backend/app/schemas/search_source_connector.py
+++ b/surfsense_backend/app/schemas/search_source_connector.py
@@ -59,14 +59,14 @@ class SearchSourceConnectorBase(BaseModel):
                 raise ValueError("NOTION_INTEGRATION_TOKEN cannot be empty")
         
         elif connector_type == SearchSourceConnectorType.GITHUB_CONNECTOR:
-            # For GITHUB_CONNECTOR, only allow GITHUB_TOKEN
+            # For GITHUB_CONNECTOR, only allow GITHUB_PAT
             allowed_keys = ["GITHUB_PAT"]
             if set(config.keys()) != set(allowed_keys):
                 raise ValueError(f"For GITHUB_CONNECTOR connector type, config must only contain these keys: {allowed_keys}")
         
             # Ensure the token is not empty
             if not config.get("GITHUB_PAT"):
-                raise ValueError("GITHUB_TOKEN cannot be empty")
+                raise ValueError("GITHUB_PAT cannot be empty")
 
         return config
 

From 3e472c124fe323ca10ea8230b2bb20c3d0a857db Mon Sep 17 00:00:00 2001
From: Adamsmith6300 <adamsmith6300@gmail.com>
Date: Mon, 14 Apr 2025 17:04:43 -0700
Subject: [PATCH 4/4] sync with main and address comments

---
 .../app/utils/connector_service.py              | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/surfsense_backend/app/utils/connector_service.py b/surfsense_backend/app/utils/connector_service.py
index e0d6322..fe08572 100644
--- a/surfsense_backend/app/utils/connector_service.py
+++ b/surfsense_backend/app/utils/connector_service.py
@@ -529,8 +529,8 @@ class ConnectorService:
             document_type="GITHUB_CONNECTOR"
         )
 
-        # Map github_chunks to the required format
-        mapped_sources = {}
+        # Process each chunk and create sources directly without deduplication
+        sources_list = []
         for i, chunk in enumerate(github_chunks):
             # Fix for UI - assign a unique ID for citation/source tracking
             github_chunks[i]['document']['id'] = self.source_id_counter
@@ -539,7 +539,7 @@ class ConnectorService:
             document = chunk.get('document', {})
             metadata = document.get('metadata', {})
 
-            # Create a mapped source entry
+            # Create a source entry
             source = {
                 "id": self.source_id_counter,
                 "title": document.get('title', 'GitHub Document'), # Use specific title if available
@@ -548,18 +548,11 @@ class ConnectorService:
             }
 
             self.source_id_counter += 1
-
-            # Use a unique identifier for tracking unique sources (URL preferred)
-            source_key = source.get("url") or source.get("title")
-            if source_key and source_key not in mapped_sources:
-                mapped_sources[source_key] = source
-        
-        # Convert to list of sources
-        sources_list = list(mapped_sources.values())
+            sources_list.append(source)
         
         # Create result object
         result_object = {
-            "id": 7, # Assuming 7 is the next available ID
+            "id": 8,
             "name": "GitHub",
             "type": "GITHUB_CONNECTOR",
             "sources": sources_list,