RAG ChatBot
Last updated
Last updated
Python Panel 라이브러리로 Panel UI로 구현한 RAG Chabot을 구현합니다.
%pip install panel watchfiles
import panel as pn
pn.extension()
def callback(contents: str, user: str, instance: pn.chat.ChatInterface):
message = f"Echoing {user}: {contents}"
return message
chat_interface = pn.chat.ChatInterface(callback=callback, callback_user="System")
chat_interface.send("Send a message to receive an echo!", user="System", respond=False)
chat_interface.servable()
import openai
import panel as pn
pn.extension()
openai.api_key = "YOUR_OpenAPI_Key" # OpenAI API Key 입
async def callback(contents: str, user: str, instance: pn.chat.ChatInterface):
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": contents}],
stream=True,
)
message = ""
for chunk in response:
message += chunk["choices"][0]["delta"].get("content", "")
yield message
chat_interface = pn.chat.ChatInterface(callback=callback, callback_user="ChatGPT")
chat_interface.send(
"Send a message to get a reply from ChatGPT!", user="System", respond=False
)
chat_interface.servable()
import os
import panel as pn
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationChain
from langchain.memory import ConversationBufferMemory
pn.extension()
os.environ["OPENAI_API_KEY"] = "YOUR_OpenAPI_Key" # OpenAI API Key 입
async def callback(contents: str, user: str, instance: pn.chat.ChatInterface):
await chain.apredict(input=contents)
chat_interface = pn.chat.ChatInterface(callback=callback, callback_user="ChatGPT")
callback_handler = pn.chat.langchain.PanelCallbackHandler(chat_interface)
llm = ChatOpenAI(streaming=True, callbacks=[callback_handler])
memory = ConversationBufferMemory()
chain = ConversationChain(llm=llm, memory=memory)
chat_interface.send(
"Send a message to get a reply from ChatGPT!", user="System", respond=False)
chat_interface.servable()
import os
from langchain.chains import RetrievalQA
from langchain.document_loaders import PyPDFLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
import os
from dotenv import load_dotenv
from langchain.embeddings import OpenAIEmbeddings
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")
# load documents
loader = PyPDFLoader("dataset/kb_23849_1_1.pdf")
documents = loader.load()
# split the documents into chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
# select which embeddings we want to use
embeddings = OpenAIEmbeddings()
# create the vectorestore to use as the index
db = Chroma.from_documents(texts, embeddings)
# expose this index in a retriever interface
retriever = db.as_retriever(
search_type="similarity", search_kwargs={"k": 2})
# create a chain to answer questions
qa = RetrievalQA.from_chain_type(
llm=OpenAI(),
chain_type="map_reduce",
retriever=retriever,
return_source_documents=True,
verbose=True,)
#%pip install panel watchfiles
import panel as pn
pn.extension()
pdf_input = pn.widgets.FileInput(accept=".pdf", value="", height=50)
key_input = pn.widgets.PasswordInput(
name="OpenAI Key",
placeholder="sk-...")
k_slider = pn.widgets.IntSlider(
name="Number of Relevant Chunks", start=1, end=5, step=1, value=2)
chain_select = pn.widgets.RadioButtonGroup(
name="Chain Type", options=["stuff", "map_reduce", "refine", "map_rerank"])
chat_input = pn.widgets.TextInput(placeholder="First, upload a PDF!")
pdf_input # 문서 파일 업로드
key_input # OpenAI Key 입력
k_slider # Chunk size 조정
chain_select # Retrieval 방법 선택
chat_input # 챗 프롬프트 작성
def initialize_chain():
if key_input.value:
os.environ["OPENAI_API_KEY"] = key_input.value
selections = (pdf_input.value, k_slider.value, chain_select.value)
if selections in pn.state.cache:
return pn.state.cache[selections]
chat_input.placeholder = "Ask questions here!"
# load document
with tempfile.NamedTemporaryFile("wb", delete=False) as f:
f.write(pdf_input.value)
file_name = f.name
loader = PyPDFLoader(file_name)
documents = loader.load()
# split the documents into chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
# select which embeddings we want to use
embeddings = OpenAIEmbeddings()
# create the vectorestore to use as the index
db = Chroma.from_documents(texts, embeddings)
# expose this index in a retriever interface
retriever = db.as_retriever(
search_type="similarity", search_kwargs={"k": k_slider.value})
# create a chain to answer questions
qa = RetrievalQA.from_chain_type(
llm=OpenAI(),
chain_type=chain_select.value,
retriever=retriever,
return_source_documents=True,
verbose=True,)
return qa
pdf_input # 문서 파일 업로드
key_input # OpenAI Key 입력
k_slider # Chunk size 조정
chain_select # Retrieval 방법 선택
chat_input # 챗 프롬프트 작성
a = initialize_chain()
response = ga({"query": "PaLM은 언제부터 시작했어?"})
response
PaLM, 즉 "Pathways Language Model"은 2022년 4월에 Google에 의해 출시되었습니다.
PaLM은 다양한 언어 작업에서의 성능으로 알려져 있으며 인공 지능 분야에서 몇 가지 혁신적인 훈련 방법과
아키텍처를 도입했습니다.
async def respond(contents, user, chat_interface):
if not pdf_input.value:
chat_interface.send(
{"user": "System", "value": "Please first upload a PDF!"}, respond=False)
return
elif chat_interface.active == 0:
chat_interface.active = 1
chat_interface.active_widget.placeholder = "Ask questions here!"
yield {"user": "OpenAI", "value": "Let's chat about the PDF!"}
return
qa = initialize_chain()
response = qa({"query": contents})
answers = pn.Column(response["result"])
answers.append(pn.layout.Divider())
for doc in response["source_documents"][::-1]:
answers.append(f"**Page {doc.metadata['page']}**:")
answers.append(f"```\n{doc.page_content}\n```")
yield {"user": "OpenAI", "value": answers}
chat_interface = pn.chat.ChatInterface(
callback=respond, sizing_mode="stretch_width", widgets=[pdf_input, chat_input])
chat_interface.send(
{"user": "System", "value": "Please first upload a PDF and click send!"},
respond=False,)
template = pn.template.BootstrapTemplate(
sidebar=[key_input, k_slider, chain_select], main=[chat_interface]
)
template.servable()
import tempfile
from pathlib import Path
import panel as pn
import param
from langchain.chains import RetrievalQA
from langchain.document_loaders import PyPDFLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from panel_chat_examples import EnvironmentWidgetBase
EXAMPLE_PDF = Path(__file__).parent / "example.pdf"
TTL = 1800 # 30 minutes
pn.extension()
# Define the Retrieval Question/ Answer Chain
# We use caching to speed things up
@pn.cache(ttl=TTL)
def _get_texts(pdf):
# load documents
with tempfile.NamedTemporaryFile("wb", delete=False) as f:
f.write(pdf)
file_name = f.name
loader = PyPDFLoader(file_name)
documents = loader.load()
# split the documents into chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
return text_splitter.split_documents(documents)
@pn.cache(ttl=TTL)
def _get_vector_db(pdf, openai_api_key):
texts = _get_texts(pdf)
# select which embeddings we want to use
embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
# create the vectorestore to use as the index
return Chroma.from_documents(texts, embeddings)
@pn.cache(ttl=TTL)
def _get_retriever(pdf, openai_api_key: str, number_of_chunks: int):
db = _get_vector_db(pdf, openai_api_key)
return db.as_retriever(
search_type="similarity", search_kwargs={"k": number_of_chunks}
)
@pn.cache(ttl=TTL)
def _get_retrieval_qa(
pdf: bytes, number_of_chunks: int, chain_type: str, openai_api_key: str
):
retriever = _get_retriever(pdf, openai_api_key, number_of_chunks)
return RetrievalQA.from_chain_type(
llm=OpenAI(openai_api_key=openai_api_key),
chain_type=chain_type,
retriever=retriever,
return_source_documents=True,
verbose=True,
)
def _get_response(contents):
qa = _get_retrieval_qa(
state.pdf, state.number_of_chunks, state.chain_type, environ.OPENAI_API_KEY
)
response = qa({"query": contents})
chunks = []
for chunk in response["source_documents"][::-1]:
name = f"Chunk {chunk.metadata['page']}"
content = chunk.page_content
chunks.insert(0, (name, content))
return response, chunks
# Define the Application State
class EnvironmentWidget(EnvironmentWidgetBase):
OPENAI_API_KEY: str = param.String()
class State(param.Parameterized):
pdf: bytes = param.Bytes()
number_of_chunks: int = param.Integer(default=2, bounds=(1, 5), step=1)
chain_type: str = param.Selector(
objects=["stuff", "map_reduce", "refine", "map_rerank"]
)
environ = EnvironmentWidget()
state = State()
# Define the widgets
pdf_input = pn.widgets.FileInput.from_param(state.param.pdf, accept=".pdf", height=50)
text_input = pn.widgets.TextInput(placeholder="First, upload a PDF!")
chain_type_input = pn.widgets.RadioButtonGroup.from_param(
state.param.chain_type,
orientation="vertical",
sizing_mode="stretch_width",
button_type="primary",
button_style="outline",
)
# Define and configure the ChatInterface
def _get_validation_message():
pdf = state.pdf
openai_api_key = environ.OPENAI_API_KEY
if not pdf and not openai_api_key:
return "Please first enter an OpenAI Api key and upload a PDF!"
if not pdf:
return "Please first upload a PDF!"
if not openai_api_key:
return "Please first enter an OpenAI Api key!"
return ""
def _send_not_ready_message(chat_interface) -> bool:
message = _get_validation_message()
if message:
chat_interface.send({"user": "System", "object": message}, respond=False)
return bool(message)
async def respond(contents, user, chat_interface):
if _send_not_ready_message(chat_interface):
return
if chat_interface.active == 0:
chat_interface.active = 1
chat_interface.active_widget.placeholder = "Ask questions here!"
yield {"user": "OpenAI", "object": "Let's chat about the PDF!"}
return
response, documents = _get_response(contents)
pages_layout = pn.Accordion(*documents, sizing_mode="stretch_width", max_width=800)
answers = pn.Column(response["result"], pages_layout)
yield {"user": "OpenAI", "object": answers}
chat_interface = pn.chat.ChatInterface(
callback=respond,
sizing_mode="stretch_width",
widgets=[pdf_input, text_input],
disabled=True,
)
@pn.depends(state.param.pdf, environ.param.OPENAI_API_KEY, watch=True)
def _enable_chat_interface(pdf, openai_api_key):
if pdf and openai_api_key:
chat_interface.disabled = False
else:
chat_interface.disabled = True
_send_not_ready_message(chat_interface)
## Wrap the app in a nice template
template = pn.template.BootstrapTemplate(
sidebar=[
environ,
state.param.number_of_chunks,
"Chain Type:",
chain_type_input,
],
main=[chat_interface],
)
template.servable()