Source code for langchain.chains.openai_functions.citation_fuzzy_match

from typing import Iterator, List

from langchain_core._api import deprecated
from langchain_core.language_models import BaseChatModel, BaseLanguageModel
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_core.output_parsers.openai_functions import PydanticOutputFunctionsParser
from langchain_core.prompts.chat import ChatPromptTemplate, HumanMessagePromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.runnables import Runnable

from langchain.chains.llm import LLMChain
from langchain.chains.openai_functions.utils import get_llm_kwargs


[docs]class FactWithEvidence(BaseModel): """Class representing a single statement. Each fact has a body and a list of sources. If there are multiple facts make sure to break them apart such that each one only uses a set of sources that are relevant to it. """ fact: str = Field(..., description="Body of the sentence, as part of a response") substring_quote: List[str] = Field( ..., description=( "Each source should be a direct quote from the context, " "as a substring of the original content" ), ) def _get_span(self, quote: str, context: str, errs: int = 100) -> Iterator[str]: import regex minor = quote major = context errs_ = 0 s = regex.search(f"({minor}){{e<={errs_}}}", major) while s is None and errs_ <= errs: errs_ += 1 s = regex.search(f"({minor}){{e<={errs_}}}", major) if s is not None: yield from s.spans()
[docs] def get_spans(self, context: str) -> Iterator[str]: for quote in self.substring_quote: yield from self._get_span(quote, context)
[docs]class QuestionAnswer(BaseModel): """A question and its answer as a list of facts each one should have a source. each sentence contains a body and a list of sources.""" question: str = Field(..., description="Question that was asked") answer: List[FactWithEvidence] = Field( ..., description=( "Body of the answer, each fact should be " "its separate object with a body and a list of sources" ), )
[docs]def create_citation_fuzzy_match_runnable(llm: BaseChatModel) -> Runnable: """Create a citation fuzzy match Runnable. Example usage: .. code-block:: python from langchain.chains import create_citation_fuzzy_match_runnable from langchain_openai import ChatOpenAI llm = ChatOpenAI(model="gpt-4o-mini") context = "Alice has blue eyes. Bob has brown eyes. Charlie has green eyes." question = "What color are Bob's eyes?" chain = create_citation_fuzzy_match_runnable(llm) chain.invoke({"question": question, "context": context}) Args: llm: Language model to use for the chain. Must implement bind_tools. Returns: Runnable that can be used to answer questions with citations. """ if llm.bind_tools is BaseChatModel.bind_tools: raise ValueError( "Language model must implement bind_tools to use this function." ) prompt = ChatPromptTemplate( [ SystemMessage( "You are a world class algorithm to answer " "questions with correct and exact citations." ), HumanMessagePromptTemplate.from_template( "Answer question using the following context." "\n\n{context}" "\n\nQuestion: {question}" "\n\nTips: Make sure to cite your sources, " "and use the exact words from the context." ), ] ) return prompt | llm.with_structured_output(QuestionAnswer)
[docs]@deprecated( since="0.2.13", removal="1.0", alternative="create_citation_fuzzy_match_runnable", ) def create_citation_fuzzy_match_chain(llm: BaseLanguageModel) -> LLMChain: """Create a citation fuzzy match chain. Args: llm: Language model to use for the chain. Returns: Chain (LLMChain) that can be used to answer questions with citations. """ output_parser = PydanticOutputFunctionsParser(pydantic_schema=QuestionAnswer) schema = QuestionAnswer.schema() function = { "name": schema["title"], "description": schema["description"], "parameters": schema, } llm_kwargs = get_llm_kwargs(function) messages = [ SystemMessage( content=( "You are a world class algorithm to answer " "questions with correct and exact citations." ) ), HumanMessage(content="Answer question using the following context"), HumanMessagePromptTemplate.from_template("{context}"), HumanMessagePromptTemplate.from_template("Question: {question}"), HumanMessage( content=( "Tips: Make sure to cite your sources, " "and use the exact words from the context." ) ), ] prompt = ChatPromptTemplate(messages=messages) # type: ignore[arg-type, call-arg] chain = LLMChain( llm=llm, prompt=prompt, llm_kwargs=llm_kwargs, output_parser=output_parser, ) return chain