Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pdf-qa not working: #947

Open
miriam-z opened this issue Apr 28, 2024 · 0 comments
Open

pdf-qa not working: #947

miriam-z opened this issue Apr 28, 2024 · 0 comments
Labels
cookbook Issues related to the chainlit-cookbook repository

Comments

@miriam-z
Copy link

miriam-z commented Apr 28, 2024

Describe the bug

Upload a PDF, in this case Apple SEC 10K

Screenshot 2024-04-28 at 10 23 06

(400)
Reason: Bad Request
pinecone.core.client.exceptions.PineconeApiException: (400)
Reason: Bad Request
HTTP response headers: HTTPHeaderDict({'Date': 'Sun, 28 Apr 2024 02:20:51 GMT', 'Content-Type': 'application/json', 'Content-Length': '101', 'Connection': 'keep-alive', 'x-pinecone-request-latency-ms': '1560', 'x-pinecone-request-id': '7850253967941358202', 'x-envoy-upstream-service-time': '384', 'server': 'envoy'})
HTTP response body: {"code":3,"message":"Vector dimension 1536 does not match the dimension of the index 8","details":[]}

2024-04-28 10:20:50 - HTTP Request: POST https://cloud.getliteral.ai/api/graphql "HTTP/1.1 200 OK"
2024-04-28 10:20:50 - Failed to send steps: [{'message': 'Unknown type "FeedbackPayloadInput". Did you mean "ThreadPayloadInput", "GenerationPayloadInput", or "ScorePayloadInput"?', 'locations': [{'line': 14, 'column': 22}]}, {'message': 'Unknown argument "feedback" on field "Mutation.ingestStep".', 'locations': [{'line': 31, 'column': 9}]}]

To Reproduce
Steps to reproduce the behavior:

pdf-qa.py

import os
from typing import List
from langchain.document_loaders import PyPDFLoader, TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores.pinecone import Pinecone
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from langchain.memory import ChatMessageHistory, ConversationBufferMemory
from langchain.docstore.document import Document

import pinecone

import chainlit as cl
from chainlit.types import AskFileResponse

pinecone_client = pinecone.Pinecone(
    api_key=os.environ.get("PINECONE_API_KEY"),
    environment=os.environ.get("PINECONE_ENV"),
)

index_name = "quickstart"
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
embeddings = OpenAIEmbeddings()

namespaces = set()

welcome_message = """Welcome to the Chainlit PDF QA demo! To get started:
1. Upload a PDF or text file
2. Ask a question about the file
"""


def process_file(file: AskFileResponse):
    if file.type == "text/plain":
        Loader = TextLoader
    elif file.type == "application/pdf":
        Loader = PyPDFLoader

        loader = Loader(file.path)
        documents = loader.load()
        docs = text_splitter.split_documents(documents)
        for i, doc in enumerate(docs):
            doc.metadata["source"] = f"source_{i}"
        return docs


def get_docsearch(file: AskFileResponse):
    docs = process_file(file)

    # Save data in the user session
    cl.user_session.set("docs", docs)

    # Create a unique namespace for the file
    namespace = file.id

    if namespace in namespaces:
        docsearch = Pinecone.from_existing_index(
            index_name=index_name, embedding=embeddings, namespace=namespace
        )
    else:
        docsearch = Pinecone.from_documents(
            docs, embeddings, index_name=index_name, namespace=namespace
        )
        namespaces.add(namespace)

    return docsearch


@cl.on_chat_start
async def start():
    await cl.Avatar(
        name="Chatbot",
        url="https://avatars.githubusercontent.com/u/128686189?s=400&u=a1d1553023f8ea0921fba0debbe92a8c5f840dd9&v=4",
    ).send()
    files = None
    while files is None:
        files = await cl.AskFileMessage(
            content=welcome_message,
            accept=["text/plain", "application/pdf"],
            max_size_mb=20,
            timeout=180,
        ).send()

    file = files[0]

    msg = cl.Message(content=f"Processing `{file.name}`...", disable_feedback=True)
    await msg.send()

    # No async implementation in the Pinecone client, fallback to sync
    docsearch = await cl.make_async(get_docsearch)(file)

    message_history = ChatMessageHistory()

    memory = ConversationBufferMemory(
        memory_key="chat_history",
        output_key="answer",
        chat_memory=message_history,
        return_messages=True,
    )

    chain = ConversationalRetrievalChain.from_llm(
        ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, streaming=True),
        chain_type="stuff",
        retriever=docsearch.as_retriever(),
        memory=memory,
        return_source_documents=True,
    )

    # Let the user know that the system is ready
    msg.content = f"`{file.name}` processed. You can now ask questions!"
    await msg.update()

    cl.user_session.set("chain", chain)


@cl.on_message
async def main(message: cl.Message):
    chain = cl.user_session.get("chain")  # type: ConversationalRetrievalChain
    cb = cl.AsyncLangchainCallbackHandler()
    res = await chain.acall(message.content, callbacks=[cb])
    answer = res["answer"]
    source_documents = res["source_documents"]  # type: List[Document]

    text_elements = []  # type: List[cl.Text]

    if source_documents:
        for source_idx, source_doc in enumerate(source_documents):
            source_name = f"source_{source_idx}"
            # Create the text element referenced in the message
            text_elements.append(
                cl.Text(content=source_doc.page_content, name=source_name)
            )
        source_names = [text_el.name for text_el in text_elements]

        if source_names:
            answer += f"\nSources: {', '.join(source_names)}"
        else:
            answer += "\nNo sources found"

    await cl.Message(content=answer, elements=text_elements).send()

Expected behavior
results with citation

Screenshots
If applicable, add screenshots to help explain your problem.

Desktop (please complete the following information):

  • OS: [e.g. iOS]
  • Browser: chrome
  • Version : 123.0.6312.122
  • Chainlit: 1.0.401

python --version
Python 3.10.7

Additional context

Full log:

2024-04-28 10:30:52 - Your app is available at http://localhost:8000
2024-04-28 10:31:03 - 2 changes detected
2024-04-28 10:31:04 - HTTP Request: POST https://cloud.getliteral.ai/api/graphql "HTTP/1.1 200 OK"
2024-04-28 10:31:05 - HTTP Request: POST https://cloud.getliteral.ai/api/upload/file "HTTP/1.1 200 OK"
2024-04-28 10:31:06 - HTTP Request: POST https://storage.googleapis.com/literal-bucket/ "HTTP/1.1 204 No Content"
2024-04-28 10:31:07 - HTTP Request: POST https://cloud.getliteral.ai/api/graphql "HTTP/1.1 200 OK"
2024-04-28 10:31:07 - Failed to send steps: [{'message': 'Unknown type "FeedbackPayloadInput". Did you mean "ThreadPayloadInput", "GenerationPayloadInput", or "ScorePayloadInput"?', 'locations': [{'line': 14, 'column': 22}]}, {'message': 'Unknown argument "feedback" on field "Mutation.ingestStep".', 'locations': [{'line': 31, 'column': 9}]}]
2024-04-28 10:31:07 - Error while flushing create_element: [{'message': 'Unknown type "FeedbackPayloadInput". Did you mean "ThreadPayloadInput", "GenerationPayloadInput", or "ScorePayloadInput"?', 'locations': [{'line': 14, 'column': 22}]}, {'message': 'Unknown argument "feedback" on field "Mutation.ingestStep".', 'locations': [{'line': 31, 'column': 9}]}]
2024-04-28 10:31:07 - HTTP Request: POST https://cloud.getliteral.ai/api/graphql "HTTP/1.1 200 OK"
2024-04-28 10:31:07 - Failed to send steps: [{'message': 'Unknown type "FeedbackPayloadInput". Did you mean "ThreadPayloadInput", "GenerationPayloadInput", or "ScorePayloadInput"?', 'locations': [{'line': 14, 'column': 22}]}, {'message': 'Unknown argument "feedback" on field "Mutation.ingestStep".', 'locations': [{'line': 31, 'column': 9}]}]
2024-04-28 10:31:07 - Error while flushing create_step: [{'message': 'Unknown type "FeedbackPayloadInput". Did you mean "ThreadPayloadInput", "GenerationPayloadInput", or "ScorePayloadInput"?', 'locations': [{'line': 14, 'column': 22}]}, {'message': 'Unknown argument "feedback" on field "Mutation.ingestStep".', 'locations': [{'line': 31, 'column': 9}]}]
2024-04-28 10:31:08 - HTTP Request: POST https://cloud.getliteral.ai/api/graphql "HTTP/1.1 200 OK"
2024-04-28 10:31:08 - Failed to send steps: [{'message': 'Unknown type "FeedbackPayloadInput". Did you mean "ThreadPayloadInput", "GenerationPayloadInput", or "ScorePayloadInput"?', 'locations': [{'line': 14, 'column': 22}]}, {'message': 'Unknown argument "feedback" on field "Mutation.ingestStep".', 'locations': [{'line': 31, 'column': 9}]}]
2024-04-28 10:31:10 - Task exception was never retrieved
future: <Task finished name='Task-181' coro=<ChainlitDataLayer.create_step() done, defined at /Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/chainlit/data/init.py:31> exception=Exception([{'message': 'Unknown type "FeedbackPayloadInput". Did you mean "ThreadPayloadInput", "GenerationPayloadInput", or "ScorePayloadInput"?', 'locations': [{'line': 14, 'column': 22}]}, {'message': 'Unknown argument "feedback" on field "Mutation.ingestStep".', 'locations': [{'line': 31, 'column': 9}]}])>
Traceback (most recent call last):
File "/Users/mincheung/.asdf/installs/python/3.10.7/lib/python3.10/asyncio/tasks.py", line 232, in __step
result = coro.send(None)
File "/Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/chainlit/data/init.py", line 46, in wrapper
return await method(self, *args, **kwargs)
File "/Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/chainlit/data/init.py", line 326, in create_step
await self.client.api.send_steps([step])
File "/Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/literalai/api.py", line 1147, in send_steps
return await self.make_api_call("send steps", query, variables)
File "/Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/literalai/api.py", line 251, in make_api_call
raise_error(json["errors"])
File "/Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/literalai/api.py", line 235, in raise_error
raise Exception(error)
Exception: [{'message': 'Unknown type "FeedbackPayloadInput". Did you mean "ThreadPayloadInput", "GenerationPayloadInput", or "ScorePayloadInput"?', 'locations': [{'line': 14, 'column': 22}]}, {'message': 'Unknown argument "feedback" on field "Mutation.ingestStep".', 'locations': [{'line': 31, 'column': 9}]}]
2024-04-28 10:31:22 - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-04-28 10:31:27 - (400)
Reason: Bad Request
HTTP response headers: HTTPHeaderDict({'Date': 'Sun, 28 Apr 2024 02:31:29 GMT', 'Content-Type': 'application/json', 'Content-Length': '101', 'Connection': 'keep-alive', 'x-pinecone-request-latency-ms': '1498', 'x-pinecone-request-id': '6494146641853823921', 'x-envoy-upstream-service-time': '339', 'server': 'envoy'})
HTTP response body: {"code":3,"message":"Vector dimension 1536 does not match the dimension of the index 8","details":[]}
Traceback (most recent call last):
File "/Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/chainlit/utils.py", line 39, in wrapper
return await user_function(**params_values)
File "/Users/mincheung/Documents/chainlit-cookbook/pdf-qa/app.py", line 96, in start
docsearch = await cl.make_async(get_docsearch)(file)
File "/Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/asyncer/_main.py", line 358, in wrapper
return await anyio.to_thread.run_sync(
File "/Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/anyio/to_thread.py", line 33, in run_sync
return await get_asynclib().run_sync_in_worker_thread(
File "/Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 877, in run_sync_in_worker_thread
return await future
File "/Users/mincheung/.asdf/installs/python/3.10.7/lib/python3.10/asyncio/futures.py", line 285, in await
yield self # This tells Task to wait for completion.
File "/Users/mincheung/.asdf/installs/python/3.10.7/lib/python3.10/asyncio/tasks.py", line 304, in __wakeup
future.result()
File "/Users/mincheung/.asdf/installs/python/3.10.7/lib/python3.10/asyncio/futures.py", line 201, in result
raise self._exception.with_traceback(self._exception_tb)
File "/Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 807, in run
result = context.run(func, *args)
File "/Users/mincheung/Documents/chainlit-cookbook/pdf-qa/app.py", line 67, in get_docsearch
docsearch = Pinecone.from_documents(
File "/Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/langchain_core/vectorstores.py", line 508, in from_documents
return cls.from_texts(texts, embedding, metadatas=metadatas, **kwargs)
File "/Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/langchain_community/vectorstores/pinecone.py", line 434, in from_texts
pinecone.add_texts(
File "/Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/langchain_community/vectorstores/pinecone.py", line 157, in add_texts
[res.get() for res in async_res]
File "/Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/langchain_community/vectorstores/pinecone.py", line 157, in
[res.get() for res in async_res]
File "/Users/mincheung/.asdf/installs/python/3.10.7/lib/python3.10/multiprocessing/pool.py", line 774, in get
raise self._value
File "/Users/mincheung/.asdf/installs/python/3.10.7/lib/python3.10/multiprocessing/pool.py", line 125, in worker
result = (True, func(*args, **kwds))
File "/Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/pinecone/core/client/api_client.py", line 203, in __call_api
raise e
File "/Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/pinecone/core/client/api_client.py", line 196, in __call_api
response_data = self.request(
File "/Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/pinecone/core/client/api_client.py", line 455, in request
return self.rest_client.POST(url,
File "/Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/pinecone/core/client/rest.py", line 302, in POST
return self.request("POST", url,
File "/Users/mincheung/Documents/chainlit-cookbook/.venv/lib/python3.10/site-packages/pinecone/core/client/rest.py", line 261, in request
raise PineconeApiException(http_resp=r)
pinecone.core.client.exceptions.PineconeApiException: (400)
Reason: Bad Request
HTTP response headers: HTTPHeaderDict({'Date': 'Sun, 28 Apr 2024 02:31:29 GMT', 'Content-Type': 'application/json', 'Content-Length': '101', 'Connection': 'keep-alive', 'x-pinecone-request-latency-ms': '1498', 'x-pinecone-request-id': '6494146641853823921', 'x-envoy-upstream-service-time': '339', 'server': 'envoy'})
HTTP response body: {"code":3,"message":"Vector dimension 1536 does not match the dimension of the index 8","details":[]}

2024-04-28 10:31:28 - HTTP Request: POST https://cloud.getliteral.ai/api/graphql "HTTP/1.1 200 OK"
2024-04-28 10:31:28 - Failed to send steps: [{'message': 'Unknown type "FeedbackPayloadInput". Did you mean "ThreadPayloadInput", "GenerationPayloadInput", or "ScorePayloadInput"?', 'locations': [{'line': 14, 'column': 22}]}, {'message': 'Unknown argument "feedback" on field "Mutation.ingestStep".', 'locations': [{'line': 31, 'column': 9}]}]
Add any other context about the problem here.

EDIT:

After

pip install -U pinecone-client langchain

type object 'Pinecone' has no attribute 'from_documents'

@tpatel tpatel added cookbook Issues related to the chainlit-cookbook repository and removed needs-triage labels Apr 29, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
cookbook Issues related to the chainlit-cookbook repository
Projects
None yet
Development

No branches or pull requests

2 participants