Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fhirpath does not support pydantic v2 #37

Open
DereAbdulhameed opened this issue Oct 22, 2024 · 0 comments
Open

fhirpath does not support pydantic v2 #37

DereAbdulhameed opened this issue Oct 22, 2024 · 0 comments

Comments

@DereAbdulhameed
Copy link

  • fhirpath version: 0.10.3
  • Python version: 3.12.1
  • Operating System: Windows 10

Description

I am trying to build a RAG-enabled chatbot that is able to take in several types of data including fhir type of data and extract the important information from the json document, then use LLM to return it back as a natural language . However, it seems that there are incompatibility between versions. It keeps throwing error between langchain and fhir.

What I Did

import streamlit as st
import openai
from openai import OpenAI
from brain import get_index_for_documents
from langchain.chains import RetrievalQA
from langchain_community.chat_models import ChatOpenAI
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from pydantic import BaseModel, ConfigDict
from dotenv import load_dotenv
import os
import json
from fhir.resources import IPS
#from fhir.resources.ips import IPS
#from fhirpathpy import evaluate
from fhirpath import evaluate

# Set the title for the Streamlit app
st.title("DocuChat")

# Set up the OpenAI client
client = OpenAI()
load_dotenv()  # Load variables from .env
openai.api_key = os.getenv("OPENAI_API_KEY")

# Function to load FHIR/IPS document
def load_fhir_document(file):
    return json.loads(file.getvalue().decode("utf-8"))

# Function to evaluate FHIRPath queries
def evaluate_fhirpath(data, fhirpath_expression):
    return evaluate(data, fhirpath_expression)

# Function to generate FHIRPath queries using GPT
def generate_fhirpath_query(question):
    response = openai.Completion.create(
        model="gpt-4",
        prompt=f"Convert the following question into a FHIRPath query: {question}",
        max_tokens=50
    )
    return response.choices[0].text.strip()

# Function to convert FHIRPath results to natural language using GPT
def convert_to_natural_language(fhir_result):
    response = openai.Completion.create(
        model="gpt-4",
        prompt=f"Convert the following data into natural language: {fhir_result}",
        max_tokens=150
    )
    return response.choices[0].text.strip()

# Function to create vector database from different file types
@st.cache_resource
def create_vectordb(files, filenames, raw_texts):
    with st.spinner("Creating vector database..."):
        vectordb = get_index_for_documents(
            [file.getvalue() for file in files if file.type == "application/pdf"],
            filenames,
            [raw_text for raw_text in raw_texts.splitlines() if raw_text.strip()],
            openai.api_key
        )
    return vectordb

# Upload files using Streamlit's file uploader
uploaded_files = st.file_uploader("Upload your documents (PDF, TXT, JSON/FHIR, IPS)", type=["pdf", "txt", "json"], accept_multiple_files=True, label_visibility="hidden")

# Text area for raw text input
raw_text = st.text_area("Or enter your raw text here:", height=150)

# If files are uploaded or raw text is provided, create the vectordb and store it in the session state
if uploaded_files or raw_text:
    file_names = [file.name for file in uploaded_files] if uploaded_files else []
    st.session_state["vectordb"] = create_vectordb(uploaded_files, file_names, raw_text)

# Define the template for the chatbot prompt
prompt_template = """
    You are a helpful Assistant who answers users' questions based on multiple contexts given to you.

    Keep your answer short and to the point.
    
    The evidence is the context of the document extract with metadata. 
    
    Carefully focus on the metadata, especially 'filename' and 'page' whenever answering.
    
    Make sure to add filename and page number at the end of the sentence you are citing to.

    Also be able to use your general knowledge to give an adequate summary based on the document extract given to you, but do not hallucinate.
        
    Reply "Not applicable" if text is irrelevant.
     
    The document content is:
    {doc_extract}
"""

# Get the current prompt from the session state or set a default value
prompt = st.session_state.get("prompt", [{"role": "system", "content": "none"}])

# Display previous chat messages
for message in prompt:
    if message["role"] != "system":
        with st.chat_message(message["role"]):
            st.write(message["content"])

# Get the user's question using Streamlit's chat input
question = st.chat_input("Ask anything")

# Handle the user's question
if question:
    vectordb = st.session_state.get("vectordb", None)
    if not vectordb:
        with st.chat_message("assistant"):
            st.write("You need to provide a PDF, TXT file, FHIR, or IPS document.")
            st.stop()

    doc_extract = None

    # If a JSON/FHIR/IPS file is uploaded, handle FHIRPath query
    for uploaded_file in uploaded_files:
        if uploaded_file.type == "application/json":
            fhir_data = load_fhir_document(uploaded_file)
            fhirpath_query = generate_fhirpath_query(question)
            fhirpath_result = evaluate_fhirpath(fhir_data, fhirpath_query)
            doc_extract = convert_to_natural_language(fhirpath_result)
            break

    # If handling PDF or TXT files, search the vectordb for similar content
    if not doc_extract and vectordb:
        search_results = vectordb.similarity_search(question, k=3)
        doc_extract = "\n".join([result.page_content for result in search_results])

    if doc_extract:
        # Update the prompt with the document extract
        prompt[0] = {
            "role": "system",
            "content": prompt_template.format(doc_extract=doc_extract),
        }

        # Add the user's question to the prompt and display it
        prompt.append({"role": "user", "content": question})
        with st.chat_message("user"):
            st.write(question)

        # Display an empty assistant message while waiting for the response
        with st.chat_message("assistant"):
            botmsg = st.empty()

        # Call ChatGPT with streaming and display the response as it comes
        response = []
        result = ""
        for chunk in client.chat.completions.create(
            model="gpt-3.5-turbo", messages=prompt, stream=True
        ):
            text = chunk.choices[0].delta.content
            if text is not None:
                response.append(text)
                result = "".join(response).strip()
                botmsg.write(result)

        # Add the assistant's response to the prompt
        prompt.append({"role": "assistant", "content": result})

        # Store the updated prompt in the session state
        st.session_state["prompt"] = prompt
    else:
        with st.chat_message("assistant"):
            st.write("No relevant data found in the document.")

``
If there was a crash, please include the traceback here.
The langchain_core.pydantic_v1 module was a compatibility shim for pydantic v1, and should no longer be used. Please update the code to imc directly.

For example, replace imports like: `from langchain_core.pydantic_v1 import BaseModel`
with: `from pydantic import BaseModel`
or the v1 compatibility namespace if you are working in a code base that has not been fully upgraded to pydantic 2 yet.         from pydantic.v1 import BaseModel

  from langchain_community.embeddings.openai import (
2024-10-22 10:50:00.122 Uncaught app exception
Traceback (most recent call last):
  File "C:\Users\Dr. Abdulhamid\AppData\Roaming\Python\Python312\site-packages\streamlit\runtime\scriptrunner\script_runner.py", line 542, in _run_script
    exec(code, module.__dict__)
  File "C:\Users\Dr. Abdulhamid\Desktop\Health Wallet Project\memory.py", line 5, in <module>
    from langchain.chains import RetrievalQA
  File "C:\Users\Dr. Abdulhamid\AppData\Roaming\Python\Python312\site-packages\langchain\chains\__init__.py", line 20, in <module>
  File "C:\Users\Dr. Abdulhamid\AppData\Roaming\Python\Python312\site-packages\pydantic\_internal\_model_construction.py", line 224, in __new__
    complete_model_class(
  File "C:\Users\Dr. Abdulhamid\AppData\Roaming\Python\Python312\site-packages\pydantic\_internal\_model_construction.py", line 587, in complete_model_class     
    schema = gen_schema.clean_schema(schema)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Dr. Abdulhamid\AppData\Roaming\Python\Python312\site-packages\pydantic\_internal\_generate_schema.py", line 595, in clean_schema
    schema = validate_core_schema(schema)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Dr. Abdulhamid\AppData\Roaming\Python\Python312\site-packages\pydantic\_internal\_core_utils.py", line 570, in validate_core_schema
    return _validate_core_schema(schema)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
pydantic_core._pydantic_core.SchemaError: Invalid Schema:
model.config.extra_fields_behavior
  Input should be 'allow', 'forbid' or 'ignore' [type=literal_error, input_value=<Extra.forbid: 'forbid'>, input_type=Extra]
    For further information visit https://errors.pydantic.dev/2.9/v/literal_error
2024-10-22 10:54:38.930 Uncaught app exception
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant