Skip to content


Empower your Q&A processes with ScribbleData's Q&A Agent. Dive into detailed documentation for effective question and answer capabilities in your applications

LLMIndexQuerier(name, cred={}, platform='openai', searchapi='serpapi', statestore='redis', memory_size=1000)

Bases: BaseLLMAgent

Class to do querying using LLMs Query can be run against a specified set of documents that act as context to constrain the answers or against all the stored knowledge of the LLM model

init the LLM query agent name: name of the agent cred: credentials object platform: name of the platform backend to use default to OpenAI GPT model for now, and Azure as well will be extended in the future to suuport other models memory_size: how many tokens of memory to use when chatting with the LLM

Source code in llmsdk/agents/
def __init__(self,
    init the LLM query agent
    name: name of the agent
    cred: credentials object
    platform: name of the platform backend to use
            default to OpenAI GPT model for now, and Azure as well
            will be extended in the future to suuport other models
    memory_size: how many tokens of memory to use when chatting with the LLM

    start_time = time.time()

    # init the base class

    # defaults
    self.chunk_size = 1000
    self.chunk_overlap = 200
    self.index = None
    self.latest_context = []
    self.context_topK = 1
    self.current_kg = []
    self.metadata = {}
    self.vdb_client = None
    self.index_name = None
    self.index_store = None
    self.state_store = statestore = {}

    # LLM params
    self.platform = platform
    self.searchapi = searchapi
    self.chaintype = "stuff"
    self.memory_size = memory_size

    # init the memories for the llm
    conv_memory = ConversationTokenBufferMemory(llm=self.llm,
    kg_memory = ConversationKGMemory(llm=self.llm,
    self.memory = conv_memory = kg_memory

    # init the QnA chain for internal queries
    prompt = self._get_query_prompt_internal()
    self.llm_chain_int = load_qa_chain(llm=self.llm,
    # init the chain for external queries
    prompt = self._get_query_prompt_external()
    self.llm_chain_ext = LLMChain(llm=self.llm,
    # init the chain for kwords
    prompt = self._get_query_prompt_kwords()
    self.llm_chain_kw = LLMChain(llm=self.llm,
    # init the chain for suggestions
    prompt = self._get_query_prompt_suggest()
    self.llm_chain_sug = LLMChain(llm=self.llm,
    # init the agent for searches
    self.llm_agent_srch, self.searchengine = self._load_search_agent(cred=self.cred,
    # note metadata for this agent
    self.metadata = {
        "agent": {
            "name": self.agent_name,
            "type": self.agent_type,
            "id": self.agent_id,
            "platform": self.platform,
            "searchapi": self.searchapi,
            "statestore": self.state_store,
            "memory_size": self.memory_size,
            "chaintype": self.chaintype,
        "events": []
    # log that the agent is ready
    duration = time.time() - start_time
    event = self._log_event(agent_events._EVNT_READY, duration)


extract all KG entities, format as {entity: relation} and add to the current set of tracked KG entities

Source code in llmsdk/agents/
def extract_add_kg_entities(self, answer):
    extract all KG entities, format as {entity: relation}
    and add to the current set of tracked KG entities
    # get the KG entities for this answer
    kg_entities =
    # format the entities as we need them
    kge = [(e.subject, e.predicate, e.object_) for e in kg_entities]

    ## -- TO-DO --
    # at this point
    # we will have to do some post-processing on the KG entities
    # one option is to look at useful relations
    # defined by the predicate
    # e.g. {"is defined as", "shall mean", "is"} -> "="
    ## -- END TO-DO --

    # add to running list of all KG entities
    self.current_kg += kge

    # format for output
    kge = self._list_to_nested_dict(kge)

    return kge


return all the KG entities as a dict {entity: (relation, object)}

Source code in llmsdk/agents/
def get_kg_entities(self):
    return all the KG entities as a dict
    {entity: (relation, object)}
    # get the current list of KG entities
    kg_entities = self._list_to_nested_dict(self.current_kg)

    return kg_entities

query(query, mode='internal', policy={})

run a query on an index using an llm chain object query: query string mode: 'internal' for querying over docset, 'external' for general query, 'suggest' for asking the LLM for alternate ways to pose the question policy: any extra params needed by the agent

Source code in llmsdk/agents/
def query(self, query, mode="internal", policy={}):
    run a query on an index using an llm chain object
    query: query string
    mode: 'internal' for querying over docset,
          'external' for general query,
          'suggest' for asking the LLM for alternate ways to pose the question
    policy: any extra params needed by the agent

    start_time = time.time()

    # check to see if we need to reset agent memory
    reset_state = policy.get("reset_state", False)
    if reset_state:
        # clear the agent state
        # set the state of the agent
        # this happens only if we are not doing a reset-state

    method = getattr(self, f"run_query_{mode}", None)
    if method is None:
        raise Exception(f"Unsupported mode: {mode}")

    if self.platform in ['openai', 'azure']:
        with get_openai_callback() as cb:
            result = method(query)
        stats = {
            "total_tokens": cb.total_tokens,
            "prompt_tokens": cb.prompt_tokens,
            "completion_tokens": cb.completion_tokens,
            "total_cost": round(cb.total_cost, 4)
        result = method(query)
        stats = {}

    if result:

        answer = result['answer']

        if answer:
            # add keywords identified to the result
            result['keywords'] = self.run_query_kwords(context=answer)

            # store the latest context
            # this is useful to guide the external agent
            # since it is memory-less
            # only store the top-n keywords
            # storing more will make the LLM overfit responses
            self.latest_context = result['keywords'][0:self.context_topK]

            # add KG elements to the result
            result['kg'] = self.extract_add_kg_entities(answer)

    # store the agent's state

    # log the event
    params = {
        "query": query,
        "mode": mode,
        "policy": policy,
        "result": result.copy() if result is not None else None,
        "stats": stats
    duration = time.time() - start_time
    event = self._log_event(agent_events._EVNT_QUERY, duration, params=params)

    # add the event to the result
    if result:
        result['metadata'] = {
            "timestamp": event['timestamp'],
            "duration": event['duration'],
            "stats": stats

    return result


run a query using llm this is useful when looking for answers that generic llm can provide

Source code in llmsdk/agents/
def run_query_external(self, query):
    run a query using llm
    this is useful when looking for answers that generic llm can provide
    # augment the query with some context to guide the LLM
    context = ", ".join(self.latest_context)
    result = self.llm_chain_ext({"context": context, "input":query},
    result = {
        "question": query,
        "answer": result,
        "sources": [{"source": f"llm-{self.platform}"}]

    return result


run a query using llm on an internal docset indexed in index this is useful when looking for answers using a private source of data

Source code in llmsdk/agents/
def run_query_internal(self, query):
    run a query using llm on an internal docset indexed in index
    this is useful when looking for answers using a private source of data
    # get the similar docs
    docs = self.get_similar_docs(query)

    if docs is None:
        return {
            "question": query,
            "answer": "Could not find document chunks to process"

    # setup the QnA chain object
    response = self.llm_chain_int({"input_documents":docs, "input":query},

    # run the query against the similar docs
    result = {
        "question": query,
        "answer": response.get('output_text', self._err_msg('field')).strip(),
        "sources": [{"content": d.page_content, "metadata": d.metadata, "distance": d.metadata.pop('distance')} for d in docs],

    # check if suggest call is needed
    if ('output_text' not in response) or ("i am not sure" in result['answer'].lower()):
        response = self.run_query_suggest(query)
        result['suggest'] = response['suggest']
        # we don't have a usable answer, so no need for sources
        result['sources'] = []

    return result


run a query using llm on an internal docset indexed in index this is useful when looking for answers that generic llm can provide

Source code in llmsdk/agents/
def run_query_kwords(self, context=""):
    run a query using llm on an internal docset indexed in index
    this is useful when looking for answers that generic llm can provide
    result = self.llm_chain_kw({"context": context},
    result = result.get('text', result)
    # a few tries to extract the response
    # sometimes, the LLM messes up
        result = json.loads(result)
            result = json.loads(f"[{result.split('[')[-1]}")
                result = result.split("\n")
                result = [r.replace("- ", "") for r in result]

    # force a list
    result = [] if not isinstance(result, list) else result

    return result

run a query using the search agent this is useful when looking for answers using a search engine

Source code in llmsdk/agents/
def run_query_search(self, query):
    run a query using the search agent
    this is useful when looking for answers using a search engine
    def extract_content_sources(sourcedata):
        docs = sourcedata.get('organic_results')
        if not docs:
            return None
        sources = [{"content": d.get('snippet', ""), "source": d.get('link')} for d in docs]
        return sources

    # modify the query using context history
    context = ", ".join(self.latest_context)
    query_mod = f"In the context of {context}, {query}"

    # get the human-readable result
    result =

    # get the sources
    sourcedata = self.searchengine.results(query_mod)
    sources = extract_content_sources(sourcedata)
    if not sources:
        sources = [{"content": "", "source": f"search-{self.searchapi}"}]

    # construct result
    result = {
        "question": query,
        "answer": result,
        "suggest": list(set([q.get('question', '') for q in sourcedata.get('related_questions', [])])),
        "sources": sources

    return result


run a query using llm to suggest other ways of asking the query, in the context of the chat history

Source code in llmsdk/agents/
def run_query_suggest(self, query):
    run a query using llm to suggest other ways of asking the query,
    in the context of the chat history
    # augment the query with some context to guide the LLM
    response = self.llm_chain_sug({"input":query},
    result = response.get('text', response).strip()
        # we asked the LLM to give us json
        suggest = json.loads(result)
        # in case the LLM gave us a list of dicts instead of list of strs
        suggest = [list(s.values())[0] if isinstance(s, dict) else s for s in suggest]
        suggest = []

    result = {
        "question": query,
        "answer": result,
        "suggest": suggest,
        "sources": [{"source": f"llm-{self.platform}"}]

    return result