feat(youtube): integrate YouTube video processing connector

- Added support for processing YouTube videos, including transcript extraction and document creation. - Implemented a new background task for adding YouTube video documents. - Enhanced the connector service to search for YouTube videos and return relevant results. - Updated frontend components to include YouTube video options in the dashboard and connector sources. - Added necessary dependencies for YouTube transcript API.
2025-09-16 17:19:46 +00:00 · 2025-04-11 15:05:17 -07:00 · 2025-04-11 15:05:17 -07:00 · b43272a115
commit b43272a115
parent 753f40dfea
13 changed files with 608 additions and 18 deletions
--- a/surfsense_backend/app/routes/documents_routes.py
+++ b/surfsense_backend/app/routes/documents_routes.py
@ -6,7 +6,7 @@ from app.db import get_async_session, User, SearchSpace, Document, DocumentType
 from app.schemas import DocumentsCreate, DocumentUpdate, DocumentRead
 from app.users import current_active_user
 from app.utils.check_ownership import check_ownership
-from app.tasks.background_tasks import add_extension_received_document, add_received_file_document, add_crawled_url_document
+from app.tasks.background_tasks import add_extension_received_document, add_received_file_document, add_crawled_url_document, add_youtube_video_document
 # Force asyncio to use standard event loop before unstructured imports
 import asyncio
 try:
@ -368,8 +368,7 @@ async def process_youtube_video_with_new_session(
    
    async with async_session_maker() as session:
        try:
-            # TODO: Implement YouTube video processing
-            print("Processing YouTube video with new session")
+            await add_youtube_video_document(session, url, search_space_id)
        except Exception as e:
            import logging
            logging.error(f"Error processing YouTube video: {str(e)}")
--- a/surfsense_backend/app/tasks/background_tasks.py
+++ b/surfsense_backend/app/tasks/background_tasks.py
@ -244,3 +244,142 @@ async def add_received_file_document(
    except Exception as e:
        await session.rollback()
        raise RuntimeError(f"Failed to process file document: {str(e)}")
+
+
+async def add_youtube_video_document(
+    session: AsyncSession,
+    url: str,
+    search_space_id: int
+):
+    """
+    Process a YouTube video URL, extract transcripts, and add as document.
+    """
+    try:
+        from youtube_transcript_api import YouTubeTranscriptApi
+        
+        # Extract video ID from URL
+        def get_youtube_video_id(url: str):
+            from urllib.parse import urlparse, parse_qs
+            
+            parsed_url = urlparse(url)
+            hostname = parsed_url.hostname
+            
+            if hostname == "youtu.be":
+                return parsed_url.path[1:]
+            if hostname in ("www.youtube.com", "youtube.com"):
+                if parsed_url.path == "/watch":
+                    query_params = parse_qs(parsed_url.query)
+                    return query_params.get("v", [None])[0]
+                if parsed_url.path.startswith("/embed/"):
+                    return parsed_url.path.split("/")[2]
+                if parsed_url.path.startswith("/v/"):
+                    return parsed_url.path.split("/")[2]
+            return None
+        
+        # Get video ID
+        video_id = get_youtube_video_id(url)
+        if not video_id:
+            raise ValueError(f"Could not extract video ID from URL: {url}")
+            
+        # Get video metadata
+        import json
+        from urllib.parse import urlencode
+        from urllib.request import urlopen
+        
+        params = {"format": "json", "url": f"https://www.youtube.com/watch?v={video_id}"}
+        oembed_url = "https://www.youtube.com/oembed"
+        query_string = urlencode(params)
+        full_url = oembed_url + "?" + query_string
+        
+        with urlopen(full_url) as response:
+            response_text = response.read()
+            video_data = json.loads(response_text.decode())
+            
+        # Get video transcript
+        try:
+            captions = YouTubeTranscriptApi.get_transcript(video_id)
+            # Include complete caption information with timestamps
+            transcript_segments = []
+            for line in captions:
+                start_time = line.get("start", 0)
+                duration = line.get("duration", 0)
+                text = line.get("text", "")
+                timestamp = f"[{start_time:.2f}s-{start_time + duration:.2f}s]"
+                transcript_segments.append(f"{timestamp} {text}")
+            transcript_text = "\n".join(transcript_segments)
+        except Exception as e:
+            transcript_text = f"No captions available for this video. Error: {str(e)}"
+            
+        # Format document metadata in a more maintainable way
+        metadata_sections = [
+            ("METADATA", [
+                f"TITLE: {video_data.get('title', 'YouTube Video')}",
+                f"URL: {url}",
+                f"VIDEO_ID: {video_id}",
+                f"AUTHOR: {video_data.get('author_name', 'Unknown')}",
+                f"THUMBNAIL: {video_data.get('thumbnail_url', '')}"
+            ]),
+            ("CONTENT", [
+                "FORMAT: transcript",
+                "TEXT_START",
+                transcript_text,
+                "TEXT_END"
+            ])
+        ]
+
+        # Build the document string more efficiently
+        document_parts = []
+        document_parts.append("<DOCUMENT>")
+
+        for section_title, section_content in metadata_sections:
+            document_parts.append(f"<{section_title}>")
+            document_parts.extend(section_content)
+            document_parts.append(f"</{section_title}>")
+
+        document_parts.append("</DOCUMENT>")
+        combined_document_string = '\n'.join(document_parts)
+
+        # Generate summary
+        summary_chain = SUMMARY_PROMPT_TEMPLATE | config.long_context_llm_instance
+        summary_result = await summary_chain.ainvoke({"document": combined_document_string})
+        summary_content = summary_result.content
+        summary_embedding = config.embedding_model_instance.embed(summary_content)
+
+        # Process chunks
+        chunks = [
+            Chunk(content=chunk.text, embedding=chunk.embedding)
+            for chunk in config.chunker_instance.chunk(transcript_text)
+        ]
+            
+        # Create document
+        from app.db import Document, DocumentType
+        
+        document = Document(
+            title=video_data.get("title", "YouTube Video"),
+            document_type=DocumentType.YOUTUBE_VIDEO,
+            document_metadata={
+                "url": url,
+                "video_id": video_id,
+                "video_title": video_data.get("title", "YouTube Video"),
+                "author": video_data.get("author_name", "Unknown"),
+                "thumbnail": video_data.get("thumbnail_url", "")
+            },
+            content=summary_content,
+            embedding=summary_embedding,
+            chunks=chunks,
+            search_space_id=search_space_id
+        )
+        
+        session.add(document)
+        await session.commit()
+        await session.refresh(document)
+        
+        return document
+    except SQLAlchemyError as db_error:
+        await session.rollback()
+        raise db_error
+    except Exception as e:
+        await session.rollback()
+        import logging
+        logging.error(f"Failed to process YouTube video: {str(e)}")
+        raise
--- a/surfsense_backend/app/tasks/stream_connector_search_results.py
+++ b/surfsense_backend/app/tasks/stream_connector_search_results.py
@ -59,6 +59,32 @@ async def stream_connector_search_results(

    # Process each selected connector
    for connector in selected_connectors:
+        if connector == "YOUTUBE_VIDEO":
+            # Send terminal message about starting search
+            yield streaming_service.add_terminal_message("Starting to search for youtube videos...")
+            
+            # Search for YouTube videos using reformulated query
+            result_object, youtube_chunks = await connector_service.search_youtube(
+                user_query=reformulated_query,
+                user_id=user_id,
+                search_space_id=search_space_id,
+                top_k=TOP_K
+            )
+            
+            # Send terminal message about search results
+            yield streaming_service.add_terminal_message(
+                f"Found {len(result_object['sources'])} relevant YouTube videos",
+                "success"
+            )
+            
+            # Update sources
+            all_sources.append(result_object)
+            yield streaming_service.update_sources(all_sources)
+            
+            # Add documents to collection
+            all_raw_documents.extend(youtube_chunks)
+            
+            
        # Extension Docs
        if connector == "EXTENSION":
            # Send terminal message about starting search
--- a/surfsense_backend/app/utils/connector_service.py
+++ b/surfsense_backend/app/utils/connector_service.py
@ -480,3 +480,82 @@ class ConnectorService:
        }
        
        return result_object, extension_chunks
+    
+    async def search_youtube(self, user_query: str, user_id: int, search_space_id: int, top_k: int = 20) -> tuple:
+        """
+        Search for YouTube videos and return both the source information and langchain documents
+        
+        Args:
+            user_query: The user's query
+            user_id: The user's ID
+            search_space_id: The search space ID to search in
+            top_k: Maximum number of results to return
+            
+        Returns:
+            tuple: (sources_info, langchain_documents)
+        """
+        youtube_chunks = await self.retriever.hybrid_search(
+            query_text=user_query,
+            top_k=top_k,
+            user_id=user_id,
+            search_space_id=search_space_id,
+            document_type="YOUTUBE_VIDEO"
+        )
+
+        # Map youtube_chunks to the required format
+        mapped_sources = {}
+        for i, chunk in enumerate(youtube_chunks):
+            # Fix for UI
+            youtube_chunks[i]['document']['id'] = self.source_id_counter
+            
+            # Extract document metadata
+            document = chunk.get('document', {})
+            metadata = document.get('metadata', {})
+
+            # Extract YouTube-specific metadata
+            video_title = metadata.get('video_title', 'Untitled Video')
+            video_id = metadata.get('video_id', '')
+            channel_name = metadata.get('channel_name', '')
+            published_date = metadata.get('published_date', '')
+            
+            # Create a more descriptive title for YouTube videos
+            title = video_title
+            if channel_name:
+                title += f" - {channel_name}"
+                
+            # Create a more descriptive description for YouTube videos
+            description = metadata.get('description', chunk.get('content', '')[:100])
+            if len(description) == 100:
+                description += "..."
+                
+            # For URL, construct a URL to the YouTube video
+            url = f"https://www.youtube.com/watch?v={video_id}" if video_id else ""
+
+            source = {
+                "id": self.source_id_counter,
+                "title": title,
+                "description": description,
+                "url": url,
+                "video_id": video_id,  # Additional field for YouTube videos
+                "channel_name": channel_name  # Additional field for YouTube videos
+            }
+
+            self.source_id_counter += 1
+
+            # Use video_id as a unique identifier for tracking unique sources
+            source_key = video_id or f"youtube_{i}"
+            if source_key and source_key not in mapped_sources:
+                mapped_sources[source_key] = source
+        
+        # Convert to list of sources
+        sources_list = list(mapped_sources.values())
+        
+        # Create result object
+        result_object = {
+            "id": 6,  # Assign a unique ID for the YouTube connector
+            "name": "YouTube Videos",
+            "type": "YOUTUBE_VIDEO",
+            "sources": sources_list,
+        }
+        
+        return result_object, youtube_chunks
--- a/surfsense_backend/pyproject.toml
+++ b/surfsense_backend/pyproject.toml
@ -26,4 +26,5 @@ dependencies = [
    "unstructured[all-docs]>=0.16.25",
    "uvicorn[standard]>=0.34.0",
    "validators>=0.34.0",
+    "youtube-transcript-api>=1.0.3",
 ]
--- a/surfsense_backend/uv.lock
+++ b/surfsense_backend/uv.lock
@ -580,6 +580,15 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/c3/be/d0d44e092656fe7a06b55e6103cbce807cdbdee17884a5367c68c9860853/dataclasses_json-0.6.7-py3-none-any.whl", hash = "sha256:0dbf33f26c8d5305befd61b39d2b3414e8a407bedc2834dea9b8d642666fb40a", size = 28686 },
 ]

+[[package]]
+name = "defusedxml"
+version = "0.7.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/0f/d5/c66da9b79e5bdb124974bfe172b4daf3c984ebd9c2a06e2b8a4dc7331c72/defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69", size = 75520 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/07/6c/aa3f2f849e01cb6a001cd8554a88d4c77c5c1a31c95bdf1cf9301e6d9ef4/defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61", size = 25604 },
+]
+
 [[package]]
 name = "deprecated"
 version = "1.2.18"
@ -3240,6 +3249,7 @@ dependencies = [
    { name = "unstructured-client" },
    { name = "uvicorn", extra = ["standard"] },
    { name = "validators" },
+    { name = "youtube-transcript-api" },
 ]

 [package.metadata]
@ -3265,6 +3275,7 @@ requires-dist = [
    { name = "unstructured-client", specifier = ">=0.30.0" },
    { name = "uvicorn", extras = ["standard"], specifier = ">=0.34.0" },
    { name = "validators", specifier = ">=0.34.0" },
+    { name = "youtube-transcript-api", specifier = ">=1.0.3" },
 ]

 [[package]]
@ -3919,6 +3930,19 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/f5/4b/a06e0ec3d155924f77835ed2d167ebd3b211a7b0853da1cf8d8414d784ef/yarl-1.18.3-py3-none-any.whl", hash = "sha256:b57f4f58099328dfb26c6a771d09fb20dbbae81d20cfb66141251ea063bd101b", size = 45109 },
 ]

+[[package]]
+name = "youtube-transcript-api"
+version = "1.0.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "defusedxml" },
+    { name = "requests" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b0/32/f60d87a99c05a53604c58f20f670c7ea6262b55e0bbeb836ffe4550b248b/youtube_transcript_api-1.0.3.tar.gz", hash = "sha256:902baf90e7840a42e1e148335e09fe5575dbff64c81414957aea7038e8a4db46", size = 2153252 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f0/44/40c03bb0f8bddfb9d2beff2ed31641f52d96c287ba881d20e0c074784ac2/youtube_transcript_api-1.0.3-py3-none-any.whl", hash = "sha256:d1874e57de65cf14c9d7d09b2b37c814d6287fa0e770d4922c4cd32a5b3f6c47", size = 2169911 },
+]
+
 [[package]]
 name = "zipp"
 version = "3.21.0"
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/page.tsx
@ -94,7 +94,7 @@ import rehypeSanitize from "rehype-sanitize";
 import remarkGfm from "remark-gfm";
 import { DocumentViewer } from "@/components/document-viewer";
 import { JsonMetadataViewer } from "@/components/json-metadata-viewer";
-import { IconBrandNotion, IconBrandSlack } from "@tabler/icons-react";
+import { IconBrandNotion, IconBrandSlack, IconBrandYoutube } from "@tabler/icons-react";

 // Define animation variants for reuse
 const fadeInScale = {
@ -114,7 +114,7 @@ const fadeInScale = {
 type Document = {
    id: number;
    title: string;
-    document_type: "EXTENSION" | "CRAWLED_URL" | "SLACK_CONNECTOR" | "NOTION_CONNECTOR" | "FILE";
+    document_type: "EXTENSION" | "CRAWLED_URL" | "SLACK_CONNECTOR" | "NOTION_CONNECTOR" | "FILE" | "YOUTUBE_VIDEO";
    document_metadata: any;
    content: string;
    created_at: string;
@ -141,6 +141,7 @@ const documentTypeIcons = {
    SLACK_CONNECTOR: IconBrandSlack,
    NOTION_CONNECTOR: IconBrandNotion,
    FILE: File,
+    YOUTUBE_VIDEO: IconBrandYoutube,
 } as const;

 const columns: ColumnDef<Document>[] = [
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/youtube/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/youtube/page.tsx
@ -0,0 +1,302 @@
+"use client";
+
+import { useState } from 'react';
+import { useParams, useRouter } from 'next/navigation';
+import { Tag, TagInput } from "emblor";
+import { Button } from "@/components/ui/button";
+import { Label } from "@/components/ui/label";
+import { Card, CardContent, CardDescription, CardFooter, CardHeader, CardTitle } from "@/components/ui/card";
+import { toast } from "sonner";
+import { Youtube, Loader2 } from "lucide-react";
+import { motion } from "framer-motion";
+
+// YouTube video ID validation regex
+const youtubeRegex = /^(https:\/\/)?(www\.)?(youtube\.com\/watch\?v=|youtu\.be\/)([a-zA-Z0-9_-]{11})$/;
+
+export default function YouTubeVideoAdder() {
+  const params = useParams();
+  const router = useRouter();
+  const search_space_id = params.search_space_id as string;
+  
+  const [videoTags, setVideoTags] = useState<Tag[]>([]);
+  const [activeTagIndex, setActiveTagIndex] = useState<number | null>(null);
+  const [isSubmitting, setIsSubmitting] = useState(false);
+  const [error, setError] = useState<string | null>(null);
+
+  // Function to validate a YouTube URL
+  const isValidYoutubeUrl = (url: string): boolean => {
+    return youtubeRegex.test(url);
+  };
+
+  // Function to extract video ID from URL
+  const extractVideoId = (url: string): string | null => {
+    const match = url.match(/(?:youtube\.com\/watch\?v=|youtu\.be\/)([a-zA-Z0-9_-]{11})/);
+    return match ? match[1] : null;
+  };
+
+  // Function to handle video URL submission
+  const handleSubmit = async () => {
+    // Validate that we have at least one video URL
+    if (videoTags.length === 0) {
+      setError("Please add at least one YouTube video URL");
+      return;
+    }
+
+    // Validate all URLs
+    const invalidUrls = videoTags.filter(tag => !isValidYoutubeUrl(tag.text));
+    if (invalidUrls.length > 0) {
+      setError(`Invalid YouTube URLs detected: ${invalidUrls.map(tag => tag.text).join(', ')}`);
+      return;
+    }
+
+    setError(null);
+    setIsSubmitting(true);
+
+    try {
+      toast("YouTube Video Processing", {
+        description: "Starting YouTube video processing...",
+      });
+
+      // Extract URLs from tags
+      const videoUrls = videoTags.map(tag => tag.text);
+
+      // Make API call to backend
+      const response = await fetch(`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/documents/`, {
+        method: "POST",
+        headers: {
+          'Content-Type': 'application/json',
+          'Authorization': `Bearer ${localStorage.getItem("surfsense_bearer_token")}`
+        },
+        body: JSON.stringify({
+          "document_type": "YOUTUBE_VIDEO",
+          "content": videoUrls,
+          "search_space_id": parseInt(search_space_id)
+        }),
+      });
+
+      if (!response.ok) {
+        throw new Error("Failed to process YouTube videos");
+      }
+
+      await response.json();
+
+      toast("Processing Successful", {
+        description: "YouTube videos have been submitted for processing",
+      });
+
+      // Redirect to documents page
+      router.push(`/dashboard/${search_space_id}/documents`);
+    } catch (error: any) {
+      setError(error.message || "An error occurred while processing YouTube videos");
+      toast("Processing Error", {
+        description: `Error processing YouTube videos: ${error.message}`,
+      });
+    } finally {
+      setIsSubmitting(false);
+    }
+  };
+
+  // Function to add a new video URL tag
+  const handleAddTag = (text: string) => {
+    // Basic URL validation
+    if (!isValidYoutubeUrl(text)) {
+      toast("Invalid YouTube URL", {
+        description: "Please enter a valid YouTube video URL",
+      });
+      return;
+    }
+
+    // Check for duplicates
+    if (videoTags.some(tag => tag.text === text)) {
+      toast("Duplicate URL", {
+        description: "This YouTube video has already been added",
+      });
+      return;
+    }
+
+    // Add the new tag
+    const newTag: Tag = {
+      id: Date.now().toString(),
+      text: text,
+    };
+
+    setVideoTags([...videoTags, newTag]);
+  };
+
+  // Animation variants
+  const containerVariants = {
+    hidden: { opacity: 0 },
+    visible: { 
+      opacity: 1,
+      transition: { 
+        staggerChildren: 0.1
+      }
+    }
+  };
+  
+  const itemVariants = {
+    hidden: { y: 20, opacity: 0 },
+    visible: { 
+      y: 0, 
+      opacity: 1,
+      transition: {
+        type: "spring",
+        stiffness: 300,
+        damping: 24
+      }
+    }
+  };
+
+  return (
+    <div className="container mx-auto py-8">
+      <motion.div
+        initial="hidden"
+        animate="visible"
+        variants={containerVariants}
+      >
+        <Card className="max-w-2xl mx-auto">
+          <motion.div variants={itemVariants}>
+            <CardHeader>
+              <CardTitle className="flex items-center gap-2">
+                <Youtube className="h-5 w-5" />
+                Add YouTube Videos
+              </CardTitle>
+              <CardDescription>
+                Enter YouTube video URLs to add to your document collection
+              </CardDescription>
+            </CardHeader>
+          </motion.div>
+          
+          <motion.div variants={itemVariants}>
+            <CardContent>
+              <div className="space-y-4">
+                <div className="space-y-2">
+                  <Label htmlFor="video-input">Enter YouTube Video URLs</Label>
+                  <TagInput
+                    id="video-input"
+                    tags={videoTags}
+                    setTags={setVideoTags}
+                    placeholder="Enter a YouTube URL and press Enter"
+                    onAddTag={handleAddTag}
+                    styleClasses={{
+                      inlineTagsContainer:
+                        "border-input rounded-lg bg-background shadow-sm shadow-black/5 transition-shadow focus-within:border-ring focus-within:outline-none focus-within:ring-[3px] focus-within:ring-ring/20 p-1 gap-1",
+                      input: "w-full min-w-[80px] focus-visible:outline-none shadow-none px-2 h-7",
+                      tag: {
+                        body: "h-7 relative bg-background border border-input hover:bg-background rounded-md font-medium text-xs ps-2 pe-7 flex",
+                        closeButton:
+                          "absolute -inset-y-px -end-px p-0 rounded-e-lg flex size-7 transition-colors outline-0 focus-visible:outline focus-visible:outline-2 focus-visible:outline-ring/70 text-muted-foreground/80 hover:text-foreground",
+                      },
+                    }}
+                    activeTagIndex={activeTagIndex}
+                    setActiveTagIndex={setActiveTagIndex}
+                  />
+                  <p className="text-xs text-muted-foreground mt-1">
+                    Add multiple YouTube URLs by pressing Enter after each one
+                  </p>
+                </div>
+
+                {error && (
+                  <motion.div 
+                    className="text-sm text-red-500 mt-2"
+                    initial={{ opacity: 0, scale: 0.9 }}
+                    animate={{ opacity: 1, scale: 1 }}
+                    transition={{ type: "spring", stiffness: 500, damping: 30 }}
+                  >
+                    {error}
+                  </motion.div>
+                )}
+
+                <motion.div 
+                  variants={itemVariants}
+                  className="bg-muted/50 rounded-lg p-4 text-sm"
+                >
+                  <h4 className="font-medium mb-2">Tips for adding YouTube videos:</h4>
+                  <ul className="list-disc pl-5 space-y-1 text-muted-foreground">
+                    <li>Use standard YouTube URLs (youtube.com/watch?v= or youtu.be/)</li>
+                    <li>Make sure videos are publicly accessible</li>
+                    <li>Supported formats: youtube.com/watch?v=VIDEO_ID or youtu.be/VIDEO_ID</li>
+                    <li>Processing may take some time depending on video length</li>
+                  </ul>
+                </motion.div>
+
+                {videoTags.length > 0 && (
+                  <motion.div
+                    variants={itemVariants}
+                    className="mt-4 space-y-2"
+                  >
+                    <h4 className="font-medium">Preview:</h4>
+                    <div className="grid grid-cols-1 gap-3">
+                      {videoTags.map((tag, index) => {
+                        const videoId = extractVideoId(tag.text);
+                        return videoId ? (
+                          <motion.div
+                            key={tag.id}
+                            initial={{ opacity: 0, y: 10 }}
+                            animate={{ opacity: 1, y: 0 }}
+                            transition={{ delay: index * 0.1 }}
+                            className="relative aspect-video rounded-lg overflow-hidden border"
+                          >
+                            <iframe
+                              width="100%"
+                              height="100%"
+                              src={`https://www.youtube.com/embed/${videoId}`}
+                              title="YouTube video player"
+                              allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture"
+                              allowFullScreen
+                            ></iframe>
+                          </motion.div>
+                        ) : null;
+                      })}
+                    </div>
+                  </motion.div>
+                )}
+              </div>
+            </CardContent>
+          </motion.div>
+          
+          <motion.div variants={itemVariants}>
+            <CardFooter className="flex justify-between">
+              <Button 
+                variant="outline" 
+                onClick={() => router.push(`/dashboard/${search_space_id}/documents`)}
+              >
+                Cancel
+              </Button>
+              <Button 
+                onClick={handleSubmit} 
+                disabled={isSubmitting || videoTags.length === 0}
+                className="relative overflow-hidden"
+              >
+                {isSubmitting ? (
+                  <>
+                    <Loader2 className="mr-2 h-4 w-4 animate-spin" />
+                    Processing...
+                  </>
+                ) : (
+                  <>
+                    <motion.span
+                      initial={{ x: -5, opacity: 0 }}
+                      animate={{ x: 0, opacity: 1 }}
+                      transition={{ delay: 0.2 }}
+                      className="mr-2"
+                    >
+                      <Youtube className="h-4 w-4" />
+                    </motion.span>
+                    Submit YouTube Videos
+                  </>
+                )}
+                <motion.div
+                  className="absolute inset-0 bg-primary/10"
+                  initial={{ x: "-100%" }}
+                  animate={isSubmitting ? { x: "0%" } : { x: "-100%" }}
+                  transition={{ duration: 0.5, ease: "easeInOut" }}
+                />
+              </Button>
+            </CardFooter>
+          </motion.div>
+        </Card>
+      </motion.div>
+    </div>
+  );
+}
--- a/surfsense_web/app/dashboard/[search_space_id]/layout.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/layout.tsx
@ -48,6 +48,10 @@ export default function DashboardLayout({
          title: "Add Webpages",
          url: `/dashboard/${search_space_id}/documents/webpage`,
        },
+        {
+          title: "Add Youtube Videos",
+          url: `/dashboard/${search_space_id}/documents/youtube`,
+        },
        {
          title: "Manage Documents",
          url: `/dashboard/${search_space_id}/documents`,
--- a/surfsense_web/app/dashboard/[search_space_id]/researcher/[chat_id]/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/researcher/[chat_id]/page.tsx
@ -239,12 +239,10 @@ const SourcesDialogContent = ({

 const ChatPage = () => {
  const [token, setToken] = React.useState<string | null>(null);
-  const [showAnswer, setShowAnswer] = useState(true);
  const [activeTab, setActiveTab] = useState("");
  const [dialogOpen, setDialogOpen] = useState(false);
  const [sourcesPage, setSourcesPage] = useState(1);
  const [expandedSources, setExpandedSources] = useState(false);
-  const [isLoadingMore, setIsLoadingMore] = useState(false);
  const [canScrollLeft, setCanScrollLeft] = useState(false);
  const [canScrollRight, setCanScrollRight] = useState(true);
  const [sourceFilter, setSourceFilter] = useState("");
@ -258,7 +256,6 @@ const ChatPage = () => {
  const terminalMessagesRef = useRef<HTMLDivElement>(null);
  const { connectorSourceItems, isLoading: isLoadingConnectors } = useSearchSourceConnectors();

-  const SOURCES_PER_PAGE = 5;
  const INITIAL_SOURCES_DISPLAY = 3;

  const { search_space_id, chat_id } = useParams();
@ -836,7 +833,7 @@ const ChatPage = () => {
                        {connectorSources.map(connector => (
                          <TabsContent key={connector.id} value={connector.type} className="mt-0">
                            <div className="space-y-3">
-                              {getMainViewSources(connector).map((source: any) => (
+                              {getMainViewSources(connector)?.map((source: any) => (
                                <Card key={source.id} className="p-3 hover:bg-gray-50 dark:hover:bg-gray-800 cursor-pointer">
                                  <div className="flex items-start gap-3">
                                    <div className="flex-shrink-0 w-6 h-6 flex items-center justify-center">
@ -874,7 +871,7 @@ const ChatPage = () => {
                                      setSourcesPage={setSourcesPage}
                                      setSourceFilter={setSourceFilter}
                                      setExpandedSources={setExpandedSources}
-                                      isLoadingMore={isLoadingMore}
+                                      isLoadingMore={false}
                                    />
                                  </DialogContent>
                                </Dialog>
@ -887,7 +884,7 @@ const ChatPage = () => {

                    {/* Answer Section */}
                    <div className="mb-6">
-                      {showAnswer && (
+                      {
                        <div className="prose dark:prose-invert max-w-none">
                          {message.annotations && (() => {
                            // Get all ANSWER annotations
@ -913,7 +910,7 @@ const ChatPage = () => {
                            return <MarkdownViewer content={message.content} getCitationSource={getCitationSource} />;
                          })()}
                        </div>
-                      )}
+                      }
                    </div>
                    {/* Scroll to bottom button */}
                    <div className="fixed bottom-8 right-8">
--- a/surfsense_web/components/chat/ConnectorComponents.tsx
+++ b/surfsense_web/components/chat/ConnectorComponents.tsx
@ -4,15 +4,14 @@ import {
  Plus,
  Search,
  Globe,
-  BookOpen,
  Sparkles,
  Microscope,
  Telescope,
  File,
  Link,
-  Slack,
-  Webhook
+  Webhook,
 } from 'lucide-react';
+import { IconBrandNotion, IconBrandSlack, IconBrandYoutube } from "@tabler/icons-react";
 import { Button } from '@/components/ui/button';
 import { Connector, ResearchMode } from './types';

@ -21,6 +20,8 @@ export const getConnectorIcon = (connectorType: string) => {
  const iconProps = { className: "h-4 w-4" };
  
  switch(connectorType) {
+    case 'YOUTUBE_VIDEO':
+      return <IconBrandYoutube {...iconProps} />;
    case 'CRAWLED_URL':
      return <Globe {...iconProps} />;
    case 'FILE':
@ -31,9 +32,9 @@ export const getConnectorIcon = (connectorType: string) => {
    case 'TAVILY_API':
      return <Link {...iconProps} />;
    case 'SLACK_CONNECTOR':
-      return <Slack {...iconProps} />;
+      return <IconBrandSlack {...iconProps} />;
    case 'NOTION_CONNECTOR':
-      return <BookOpen {...iconProps} />;
+      return <IconBrandNotion {...iconProps} />;
    case 'DEEP':
      return <Sparkles {...iconProps} />;
    case 'DEEPER':
--- a/surfsense_web/components/chat/connector-sources.ts
+++ b/surfsense_web/components/chat/connector-sources.ts
@ -15,4 +15,9 @@ export const connectorSourcesMenu = [
      name: "Extension",
      type: "EXTENSION",
    },
+    {
+      id: 4,
+      name: "Youtube Video",
+      type: "YOUTUBE_VIDEO",
+    }
  ];
--- a/surfsense_web/hooks/useSearchSourceConnectors.ts
+++ b/surfsense_web/hooks/useSearchSourceConnectors.ts
@ -43,6 +43,12 @@ export const useSearchSourceConnectors = () => {
      name: "Extension",
      type: "EXTENSION",
      sources: [],
+    },
+    {
+      id: 4,
+      name: "Youtube Video",
+      type: "YOUTUBE_VIDEO",
+      sources: [],
    }
  ]);

@ -108,6 +114,12 @@ export const useSearchSourceConnectors = () => {
        name: "Extension",
        type: "EXTENSION",
        sources: [],
+      },
+      {
+        id: 4,
+        name: "Youtube Video",
+        type: "YOUTUBE_VIDEO",
+        sources: [],
      }
    ];