跳过到内容

生成

TestsetGenerator dataclass

TestsetGenerator(llm: BaseRagasLLM, embedding_model: BaseRagasEmbeddings, knowledge_graph: KnowledgeGraph = KnowledgeGraph(), persona_list: Optional[List[Persona]] = None)

根据给定的场景和参数生成评估数据集。

属性

名称 类型 描述
llm BaseRagasLLM

用于生成过程的语言模型。

knowledge_graph KnowledgeGraph,默认为空

用于生成过程的知识图谱。

from_langchain classmethod

from_langchain(llm: BaseLanguageModel, embedding_model: Embeddings, knowledge_graph: Optional[KnowledgeGraph] = None) -> TestsetGenerator

从 Langchain LLM 创建一个 TestsetGenerator

源代码位于 src/ragas/testset/synthesizers/generate.py
@classmethod
def from_langchain(
    cls,
    llm: LangchainLLM,
    embedding_model: LangchainEmbeddings,
    knowledge_graph: t.Optional[KnowledgeGraph] = None,
) -> TestsetGenerator:
    """
    Creates a `TestsetGenerator` from a Langchain LLMs.
    """
    knowledge_graph = knowledge_graph or KnowledgeGraph()
    return cls(
        LangchainLLMWrapper(llm),
        LangchainEmbeddingsWrapper(embedding_model),
        knowledge_graph,
    )

from_llama_index classmethod

from_llama_index(llm: BaseLLM, embedding_model: BaseEmbedding, knowledge_graph: Optional[KnowledgeGraph] = None) -> TestsetGenerator

从 LlamaIndex LLM 和 embedding 模型创建一个 TestsetGenerator

源代码位于 src/ragas/testset/synthesizers/generate.py
@classmethod
def from_llama_index(
    cls,
    llm: LlamaIndexLLM,
    embedding_model: LlamaIndexEmbedding,
    knowledge_graph: t.Optional[KnowledgeGraph] = None,
) -> TestsetGenerator:
    """
    Creates a `TestsetGenerator` from a LlamaIndex LLM and embedding model.
    """
    knowledge_graph = knowledge_graph or KnowledgeGraph()
    return cls(
        LlamaIndexLLMWrapper(llm),
        LlamaIndexEmbeddingsWrapper(embedding_model),
        knowledge_graph,
    )

generate_with_langchain_docs

generate_with_langchain_docs(documents: Sequence[Document], testset_size: int, transforms: Optional[Transforms] = None, transforms_llm: Optional[BaseRagasLLM] = None, transforms_embedding_model: Optional[BaseRagasEmbeddings] = None, query_distribution: Optional[QueryDistribution] = None, run_config: Optional[RunConfig] = None, callbacks: Optional[Callbacks] = None, with_debugging_logs=False, raise_exceptions: bool = True) -> Testset

根据给定的 Langchain 文档和参数生成评估数据集。

参数

名称 类型 描述 默认值
documents Sequence[Document]

用作源材料的 Langchain 文档序列

必填
testset_size int

要生成的测试样本数量

必填
transforms Optional[Transforms]

要应用于文档的自定义转换,默认为 None

None
transforms_llm Optional[BaseRagasLLM]

用于转换的 LLM,如果与实例 LLM 不同,默认为 None

None
transforms_embedding_model Optional[BaseRagasEmbeddings]

用于转换的 Embedding 模型,如果与实例模型不同,默认为 None

None
query_distribution Optional[QueryDistribution]

要生成的查询类型分布,默认为 None

None
run_config Optional[RunConfig]

生成运行的配置,默认为 None

None
callbacks Optional[Callbacks]

生成过程中使用的 Callbacks,默认为 None

None
with_debugging_logs bool

是否包含调试日志,默认为 False

False
raise_exceptions bool

生成过程中是否抛出异常,默认为 True

True

返回值

类型 描述
Testset

生成的评估数据集

抛出异常

类型 描述
ValueError

如果在初始化期间或作为参数未提供 LLM 或 embedding 模型

源代码位于 src/ragas/testset/synthesizers/generate.py
def generate_with_langchain_docs(
    self,
    documents: t.Sequence[LCDocument],
    testset_size: int,
    transforms: t.Optional[Transforms] = None,
    transforms_llm: t.Optional[BaseRagasLLM] = None,
    transforms_embedding_model: t.Optional[BaseRagasEmbeddings] = None,
    query_distribution: t.Optional[QueryDistribution] = None,
    run_config: t.Optional[RunConfig] = None,
    callbacks: t.Optional[Callbacks] = None,
    with_debugging_logs=False,
    raise_exceptions: bool = True,
) -> Testset:
    """
    Generates an evaluation dataset based on given Langchain documents and parameters.

    Parameters
    ----------
    documents : Sequence[LCDocument]
        A sequence of Langchain documents to use as source material
    testset_size : int
        The number of test samples to generate
    transforms : Optional[Transforms], optional
        Custom transforms to apply to the documents, by default None
    transforms_llm : Optional[BaseRagasLLM], optional
        LLM to use for transforms if different from instance LLM, by default None
    transforms_embedding_model : Optional[BaseRagasEmbeddings], optional
        Embedding model to use for transforms if different from instance model, by default None
    query_distribution : Optional[QueryDistribution], optional
        Distribution of query types to generate, by default None
    run_config : Optional[RunConfig], optional
        Configuration for the generation run, by default None
    callbacks : Optional[Callbacks], optional
        Callbacks to use during generation, by default None
    with_debugging_logs : bool, optional
        Whether to include debug logs, by default False
    raise_exceptions : bool, optional
        Whether to raise exceptions during generation, by default True

    Returns
    -------
    Testset
        The generated evaluation dataset

    Raises
    ------
    ValueError
        If no LLM or embedding model is provided either during initialization or as arguments
    """

    # force the user to provide an llm and embedding client to prevent use of default LLMs
    if not self.llm and not transforms_llm:
        raise ValueError(
            """An llm client was not provided.
                   Provide an LLM on TestsetGenerator instantiation or as an argument for transforms_llm parameter.
                   Alternatively you can provide your own transforms through the `transforms` parameter."""
        )
    if not self.embedding_model and not transforms_embedding_model:
        raise ValueError(
            """An embedding client was not provided. Provide an embedding through the transforms_embedding_model parameter. Alternatively you can provide your own transforms through the `transforms` parameter."""
        )

    if not transforms:
        transforms = default_transforms(
            documents=list(documents),
            llm=transforms_llm or self.llm,
            embedding_model=transforms_embedding_model or self.embedding_model,
        )

    # convert the documents to Ragas nodes
    nodes = []
    for doc in documents:
        node = Node(
            type=NodeType.DOCUMENT,
            properties={
                "page_content": doc.page_content,
                "document_metadata": doc.metadata,
            },
        )
        nodes.append(node)

    kg = KnowledgeGraph(nodes=nodes)

    # apply transforms and update the knowledge graph
    apply_transforms(kg, transforms)
    self.knowledge_graph = kg

    return self.generate(
        testset_size=testset_size,
        query_distribution=query_distribution,
        run_config=run_config,
        callbacks=callbacks,
        with_debugging_logs=with_debugging_logs,
        raise_exceptions=raise_exceptions,
    )

generate_with_llamaindex_docs

generate_with_llamaindex_docs(documents: Sequence[Document], testset_size: int, transforms: Optional[Transforms] = None, transforms_llm: Optional[BaseLLM] = None, transforms_embedding_model: Optional[BaseEmbedding] = None, query_distribution: Optional[QueryDistribution] = None, run_config: Optional[RunConfig] = None, callbacks: Optional[Callbacks] = None, with_debugging_logs=False, raise_exceptions: bool = True)

根据给定的场景和参数生成评估数据集。

源代码位于 src/ragas/testset/synthesizers/generate.py
def generate_with_llamaindex_docs(
    self,
    documents: t.Sequence[LlamaIndexDocument],
    testset_size: int,
    transforms: t.Optional[Transforms] = None,
    transforms_llm: t.Optional[LlamaIndexLLM] = None,
    transforms_embedding_model: t.Optional[LlamaIndexEmbedding] = None,
    query_distribution: t.Optional[QueryDistribution] = None,
    run_config: t.Optional[RunConfig] = None,
    callbacks: t.Optional[Callbacks] = None,
    with_debugging_logs=False,
    raise_exceptions: bool = True,
):
    """
    Generates an evaluation dataset based on given scenarios and parameters.
    """

    run_config = run_config or RunConfig()

    # force the user to provide an llm and embedding client to prevent use of default LLMs
    if not self.llm and not transforms_llm:
        raise ValueError(
            "An llm client was not provided. Provide an LLM on TestsetGenerator instantiation or as an argument for transforms_llm parameter. Alternatively you can provide your own transforms through the `transforms` parameter."
        )
    if not self.embedding_model and not transforms_embedding_model:
        raise ValueError(
            "An embedding client was not provided. Provide an embedding through the transforms_embedding_model parameter. Alternatively you can provide your own transforms through the `transforms` parameter."
        )

    if not transforms:
        # use TestsetGenerator's LLM and embedding model if no transforms_llm or transforms_embedding_model is provided
        if transforms_llm is None:
            llm_for_transforms = self.llm
        else:
            llm_for_transforms = LlamaIndexLLMWrapper(transforms_llm)
        if transforms_embedding_model is None:
            embedding_model_for_transforms = self.embedding_model
        else:
            embedding_model_for_transforms = LlamaIndexEmbeddingsWrapper(
                transforms_embedding_model
            )

        # create the transforms
        transforms = default_transforms(
            documents=[LCDocument(page_content=doc.text) for doc in documents],
            llm=llm_for_transforms,
            embedding_model=embedding_model_for_transforms,
        )

    # convert the documents to Ragas nodes
    nodes = []
    for doc in documents:
        if doc.text is not None and doc.text.strip() != "":
            node = Node(
                type=NodeType.DOCUMENT,
                properties={
                    "page_content": doc.text,
                    "document_metadata": doc.metadata,
                },
            )
            nodes.append(node)

    kg = KnowledgeGraph(nodes=nodes)

    # apply transforms and update the knowledge graph
    apply_transforms(kg, transforms, run_config)
    self.knowledge_graph = kg

    return self.generate(
        testset_size=testset_size,
        query_distribution=query_distribution,
        run_config=run_config,
        callbacks=callbacks,
        with_debugging_logs=with_debugging_logs,
        raise_exceptions=raise_exceptions,
    )

generate

generate(testset_size: int, query_distribution: Optional[QueryDistribution] = None, num_personas: int = 3, run_config: Optional[RunConfig] = None, batch_size: Optional[int] = None, callbacks: Optional[Callbacks] = None, token_usage_parser: Optional[TokenUsageParser] = None, with_debugging_logs=False, raise_exceptions: bool = True) -> Testset

根据给定的场景和参数生成评估数据集。

参数

名称 类型 描述 默认值
testset_size int

要生成的样本数量。

必填
query_distribution Optional[QueryDistribution]

包含场景模拟器及其概率的元组列表。如果为 None,将使用默认模拟器。

None
num_personas int

要从 persona_list 中生成或使用的 persona 数量。

3
run_config Optional[RunConfig]

用于运行生成过程的配置。

None
batch_size Optional[int]

批次大小应为多少。如果设置为 None(默认),则不进行批处理。

None
callbacks Optional[Callbacks]

用于生成过程的 Langchain 风格的 Callbacks。您可以使用此参数记录生成过程或添加其他元数据。

None
token_usage_parser Optional[TokenUsageParser]

解析 LLMResult 对象并返回一个 TokenUsage 对象。这用于计算生成过程的成本。

None
with_debugging_logs bool

如果为 True,则启用各种组件的调试日志记录。

False
raise_exceptions bool

如果为 True,则在生成过程中抛出异常。

True

返回值

类型 描述
Testset

包含生成的 TestsetSamples 的数据集。

注意

此函数执行以下步骤:1. 如有需要,设置场景和调试日志记录。2. 使用 Executor 生成场景。3. 计算不同场景类型的分割值。4. 为每个场景生成样本。5. 将结果编译到 EvaluationDataset 中。

源代码位于 src/ragas/testset/synthesizers/generate.py
def generate(
    self,
    testset_size: int,
    query_distribution: t.Optional[QueryDistribution] = None,
    num_personas: int = 3,
    run_config: t.Optional[RunConfig] = None,
    batch_size: t.Optional[int] = None,
    callbacks: t.Optional[Callbacks] = None,
    token_usage_parser: t.Optional[TokenUsageParser] = None,
    with_debugging_logs=False,
    raise_exceptions: bool = True,
) -> Testset:
    """
    Generate an evaluation dataset based on given scenarios and parameters.

    Parameters
    ----------
    testset_size : int
        The number of samples to generate.
    query_distribution : Optional[QueryDistribution], optional
        A list of tuples containing scenario simulators and their probabilities.
        If None, default simulators will be used.
    num_personas : int, default 3
        The number of personas to generate or use from the persona_list.
    run_config : Optional[RunConfig], optional
        Configuration for running the generation process.
    batch_size: int, optional
        How large should batches be.  If set to None (default), no batching is done.
    callbacks : Optional[Callbacks], optional
        Langchain style callbacks to use for the generation process. You can use
        this to log the generation process or add other metadata.
    token_usage_parser : Optional[TokenUsageParser], optional
        Parse the LLMResult object and return a TokenUsage object. This is used to
        calculate the cost of the generation process.
    with_debugging_logs : bool, default False
        If True, enable debug logging for various components.
    raise_exceptions : bool, default True
        If True, raise exceptions during the generation process.

    Returns
    -------
    Testset
        A dataset containing the generated TestsetSamples.

    Notes
    -----
    This function performs the following steps:
    1. Set up scenarios and debug logging if required.
    2. Generate scenarios using an Executor.
    3. Calculate split values for different scenario types.
    4. Generate samples for each scenario.
    5. Compile the results into an EvaluationDataset.
    """
    if run_config is not None:
        self.llm.set_run_config(run_config)

    query_distribution = query_distribution or default_query_distribution(
        self.llm, self.knowledge_graph
    )
    callbacks = callbacks or []

    # dict to store any callbacks we define
    ragas_callbacks = {}
    # set the token usage parser
    if token_usage_parser is not None:
        from ragas.cost import CostCallbackHandler

        cost_cb = CostCallbackHandler(token_usage_parser=token_usage_parser)
        ragas_callbacks["cost_cb"] = cost_cb
    else:
        cost_cb = None

    # append all the ragas_callbacks to the callbacks
    for cb in ragas_callbacks.values():
        if isinstance(callbacks, BaseCallbackManager):
            callbacks.add_handler(cb)
        else:
            callbacks.append(cb)

    # new group for Testset Generation
    testset_generation_rm, testset_generation_grp = new_group(
        name=RAGAS_TESTSET_GENERATION_GROUP_NAME,
        inputs={"testset_size": testset_size},
        callbacks=callbacks,
    )

    if with_debugging_logs:
        # TODO: Edit this before pre-release
        from ragas.utils import patch_logger

        patch_logger("ragas.experimental.testset.synthesizers", logging.DEBUG)
        patch_logger("ragas.experimental.testset.graph", logging.DEBUG)
        patch_logger("ragas.experimental.testset.transforms", logging.DEBUG)

    if self.persona_list is None:
        self.persona_list = generate_personas_from_kg(
            llm=self.llm,
            kg=self.knowledge_graph,
            num_personas=num_personas,
            callbacks=callbacks,
        )
    else:
        random.shuffle(self.persona_list)

    splits, _ = calculate_split_values(
        [prob for _, prob in query_distribution], testset_size
    )
    # new group for Generation of Scenarios
    scenario_generation_rm, scenario_generation_grp = new_group(
        name="Scenario Generation",
        inputs={"splits": splits},
        callbacks=testset_generation_grp,
    )

    # generate scenarios
    exec = Executor(
        desc="Generating Scenarios",
        raise_exceptions=raise_exceptions,
        run_config=run_config,
        keep_progress_bar=False,
        batch_size=batch_size,
    )
    # generate samples
    splits, _ = calculate_split_values(
        [prob for _, prob in query_distribution], testset_size
    )
    for i, (scenario, _) in enumerate(query_distribution):
        exec.submit(
            scenario.generate_scenarios,
            n=splits[i],
            knowledge_graph=self.knowledge_graph,
            persona_list=self.persona_list[:num_personas],
            callbacks=scenario_generation_grp,
        )

    try:
        scenario_sample_list: t.List[t.List[BaseScenario]] = exec.results()
    except Exception as e:
        scenario_generation_rm.on_chain_error(e)
        raise e
    else:
        scenario_generation_rm.on_chain_end(
            outputs={"scenario_sample_list": scenario_sample_list}
        )

    # new group for Generation of Samples
    sample_generation_rm, sample_generation_grp = new_group(
        name="Sample Generation",
        inputs={"scenario_sample_list": scenario_sample_list},
        callbacks=testset_generation_grp,
    )
    exec = Executor(
        "Generating Samples",
        raise_exceptions=raise_exceptions,
        run_config=run_config,
        keep_progress_bar=True,
        batch_size=batch_size,
    )
    additional_testset_info: t.List[t.Dict] = []
    for i, (synthesizer, _) in enumerate(query_distribution):
        for sample in scenario_sample_list[i]:
            exec.submit(
                synthesizer.generate_sample,
                scenario=sample,
                callbacks=sample_generation_grp,
            )
            # fill out the additional info for the TestsetSample
            additional_testset_info.append(
                {
                    "synthesizer_name": synthesizer.name,
                }
            )

    try:
        eval_samples = exec.results()
    except Exception as e:
        sample_generation_rm.on_chain_error(e)
        raise e
    else:
        sample_generation_rm.on_chain_end(outputs={"eval_samples": eval_samples})

    # build the testset
    testsets = []
    for sample, additional_info in zip(eval_samples, additional_testset_info):
        testsets.append(TestsetSample(eval_sample=sample, **additional_info))
    testset = Testset(samples=testsets, cost_cb=cost_cb)
    testset_generation_rm.on_chain_end({"testset": testset})

    # tracking how many samples were generated
    track(
        TestsetGenerationEvent(
            event_type="testset_generation",
            evolution_names=[
                e.__class__.__name__.lower() for e, _ in query_distribution
            ],
            evolution_percentages=[p for _, p in query_distribution],
            num_rows=testset_size,
            language="english",
        )
    )
    return testset