mirror of
https://github.com/eigent-ai/eigent.git
synced 2026-05-24 22:04:09 +00:00
85 lines
3 KiB
Python
85 lines
3 KiB
Python
# ========= Copyright 2023-2026 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
# ========= Copyright 2023-2026 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
from abc import ABC, abstractmethod
|
|
from pathlib import Path
|
|
from typing import Any, Dict, List, Union
|
|
|
|
|
|
class BaseLoader(ABC):
|
|
r"""Abstract base class for all data loaders in CAMEL."""
|
|
|
|
@abstractmethod
|
|
def _load_single(self, source: Union[str, Path]) -> Dict[str, Any]:
|
|
r"""Load data from a single source.
|
|
|
|
Args:
|
|
source (Union[str, Path]): The data source to load from.
|
|
|
|
Returns:
|
|
Dict[str, Any]: A dictionary containing the loaded data. It is
|
|
recommended that the dictionary includes a "content" key with
|
|
the primary data and optional metadata keys.
|
|
"""
|
|
pass
|
|
|
|
def load(
|
|
self,
|
|
source: Union[str, Path, List[Union[str, Path]]],
|
|
) -> Dict[str, List[Dict[str, Any]]]:
|
|
r"""Load data from one or multiple sources.
|
|
|
|
Args:
|
|
source (Union[str, Path, List[Union[str, Path]]]): The data source
|
|
(s) to load from. Can be:
|
|
- A single path/URL (str or Path)
|
|
- A list of paths/URLs
|
|
|
|
Returns:
|
|
Dict[str, List[Dict[str, Any]]]: A dictionary with a single key
|
|
"contents" containing a list of loaded data. If a single source
|
|
is provided, the list will contain a single item.
|
|
|
|
Raises:
|
|
ValueError: If no sources are provided
|
|
Exception: If loading fails for any source
|
|
"""
|
|
if not source:
|
|
raise ValueError("At least one source must be provided")
|
|
|
|
# Convert single source to list for uniform processing
|
|
sources = [source] if isinstance(source, (str, Path)) else list(source)
|
|
|
|
# Process all sources
|
|
results = []
|
|
for i, src in enumerate(sources, 1):
|
|
try:
|
|
content = self._load_single(src)
|
|
results.append(content)
|
|
except Exception as e:
|
|
raise RuntimeError(
|
|
f"Error loading source {i}/{len(sources)}: {src}"
|
|
) from e
|
|
|
|
return {"contents": results}
|
|
|
|
@property
|
|
@abstractmethod
|
|
def supported_formats(self) -> set[str]:
|
|
r"""Get the set of supported file formats or data sources.
|
|
|
|
Returns:
|
|
set[str]: A set of strings representing the supported formats/
|
|
sources.
|
|
"""
|
|
pass
|