mirror of
https://github.com/eigent-ai/eigent.git
synced 2026-05-25 14:46:19 +00:00
79 lines
4.1 KiB
Python
79 lines
4.1 KiB
Python
# ========= Copyright 2023-2026 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
# ========= Copyright 2023-2026 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
from typing import Any
|
|
|
|
from camel.prompts import TextPrompt, TextPromptDict
|
|
from camel.types import RoleType
|
|
|
|
|
|
# flake8: noqa :E501
|
|
class GenerateTextEmbeddingDataPromptTemplateDict(TextPromptDict):
|
|
r"""A :obj:`TextPrompt` dictionary containing text embedding tasks
|
|
generation, query, positive and hard negative samples generation,
|
|
from the `"Improving Text Embeddings with Large Language Models"
|
|
<https://arxiv.org/abs/2401.00368>`_ paper.
|
|
|
|
|
|
Attributes:
|
|
GENERATE_TASKS (TextPrompt): A prompt to generate a list
|
|
of :obj:`num_tasks` synthetic text_embedding tasks.
|
|
ASSISTANT_PROMPT (TextPrompt): A system prompt for the AI assistant
|
|
to generate synthetic :obj:`user_query`, :obj:`positive document`,
|
|
and :obj:`hard_negative_document` for a specific :obj:`task` with
|
|
specified parameters including :obj:`query_type`,
|
|
:obj:`query_length`, :obj:`clarity`, :obj:`num_words`,
|
|
:obj:`language` and :obj:`difficulty`.
|
|
"""
|
|
|
|
GENERATE_TASKS = TextPrompt(
|
|
"""You are an expert to brainstorm a list of {num_tasks} potentially useful text retrieval tasks
|
|
Here are a few examples for your reference:
|
|
- Provided a scientific claim as query, retrieve documents that help verify or refute the claim.
|
|
- Search for documents that answers a FAQ-style query on children's nutrition.
|
|
Please adhere to the following guidelines:
|
|
- Specify what the query is, and what the desired documents are.
|
|
- Each retrieval task should cover a wide range of queries, and should not be too specific.
|
|
Your output should always be a python list of strings starting with `1.`, `2.` etc.
|
|
And each element corresponds to a distinct retrieval task in one sentence.
|
|
Do not explain yourself or output anything else.
|
|
Be creative!"""
|
|
)
|
|
|
|
ASSISTANT_PROMPT = TextPrompt(
|
|
"""You have been assigned a retrieval task: {task}
|
|
Your mission is to write one text retrieval example for this task in JSON format. The JSON object must
|
|
contain the following keys:
|
|
- "user_query": a string, a random user search query specified by the retrieval task.
|
|
- "positive_document": a string, a relevant document for the user query.
|
|
- "hard_negative_document": a string, a hard negative document that only appears relevant to the query.
|
|
Please adhere to the following guidelines:
|
|
- The "user_query" should be {query_type}, {query_length}, {clarity}, and diverse in topic.
|
|
- All documents must be created independent of the query. Avoid copying the query verbatim.
|
|
It's acceptable if some parts of the "positive_document" are not topically related to the query.
|
|
- All documents should be at least {num_words} words long.
|
|
- The "hard_negative_document" contains some useful information, but it should be less useful or comprehensive compared to the "positive_document".
|
|
- Both the query and documents should be in {language}.
|
|
- Do not provide any explanation in any document on why it is relevant or not relevant to the query.
|
|
- Both the query and documents require {difficulty} level education to understand.
|
|
Your output must always be a JSON object only (starting and ending with curly brackets), do not explain yourself or output anything else. Be creative!"""
|
|
)
|
|
|
|
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
|
super().__init__(*args, **kwargs)
|
|
self.update(
|
|
{
|
|
"generate_tasks": self.GENERATE_TASKS,
|
|
RoleType.ASSISTANT: self.ASSISTANT_PROMPT,
|
|
}
|
|
)
|