Spaces:

faldeus0092
/

rg-intent-classification-demo

Sleeping

File size: 11,517 Bytes

#@title Model Loading
from sentence_transformers import SentenceTransformer, util
from utils import get_history_from_prompt, get_latest_user_input_from_prompt, get_top_intents, create_embedding
from intents import intents, intents_sentence_similarity_en, chatbot_intents
from prompt import prompt_template
import flows
import os
import gradio as gr
import pandas as pd
import langchain
from langchain import PromptTemplate, LLMChain
from langchain.chat_models import ChatOpenAI
from datetime import date
import numpy as np
from openai import OpenAI

model_en = SentenceTransformer("intfloat/multilingual-e5-base")
with open('embeddings2.npy', 'rb') as f:
  intents_embedding = np.load(f)
# with open('openai_embeddings.npy', 'rb') as f:
#   openai_intents_embedding = np.load(f)

llm = None
llm_chain = None

def raw_inference(input, recv_state, n_samples, threshold):
  state = flows.STATE_FLOWS_MAP[recv_state]
  query_embedding = model_en.encode(input)
  similarity = util.pytorch_cos_sim(query_embedding, intents_embedding)
  result = get_top_intents(intents, similarity, n=n_samples, threshold=threshold, flow=state)
  return result, gr.Button("Ask intent with Language Model", visible=True)

def process_csv(files):
  global df
  df = pd.read_csv(files, low_memory=False)
  df = df[df['chatbot_response'].isin(intents)]
  df = df[["user_message","prompt", "chatbot_response", "state"]]
  df.dropna(inplace=True)
  df = df.reset_index()
  df.drop('index', axis='columns')
  df_length = len(df.index)

  chat = get_latest_user_input_from_prompt(df.iloc[1]["prompt"])
  history = get_history_from_prompt(df.iloc[1]["prompt"])
  state = flows.STATE_FLOWS_MAP[df.iloc[1]['state']]
  label = df.iloc[1]['chatbot_response']

  # accuracy = gr.Markdown("""

  #       You can also check accuracy on how well the model predict the intents based on your provided CSV files. This might take 1-2 minutes.

  #       """, visible=True)
  # accuracy_button = gr.Button("Calculate Accuracy", visible=True)

  return (gr.UploadButton("Upload CSV...", file_types=["file"], file_count="single", visible=False),
          files,
          gr.Slider(1, df_length, value=1, step=1, visible=True, label="Index", info="Select which index of data to check the intents"),
          gr.Textbox(label="Input Chat", info="Input in index", visible=True, value=chat, interactive=False),
          gr.Textbox(label="State", info="State on which the chat currently on. Some state will exclude some intents", visible=True, value=state, interactive=False),
          gr.Textbox(label="Ground Truth", info="The label in which the IntentClassification predict in the CSV", visible=True, value=label, interactive=False),
          gr.Textbox(label="History or summary", info="Chat history or summary, if available", visible=True, value=history, interactive=False))

def update_index(index):
  chat = get_latest_user_input_from_prompt(df.iloc[int(index)]["prompt"])
  history = get_history_from_prompt(df.iloc[int(index)]["prompt"])
  state = df.iloc[int(index)]['state']
  label = df.iloc[int(index)]['chatbot_response']
  return (gr.Textbox(label="Input Chat", info="Input in index", visible=True, value=chat, interactive=False), 
          gr.Textbox(label="State", info="State on which the chat currently on. Some state will exclude some intents", visible=True, value=state, interactive=False), 
          gr.Textbox(label="Ground Truth", info="The label in which the IntentClassification predict in the CSV", visible=True, value=label, interactive=False),
          gr.Textbox(label="History or summary", info="Chat history or summary, if available", visible=True, value=history, interactive=False))

def check_accuracy(n_samples, threshold):
  global df
  res_list = []
  for index, row in df.iterrows():
    # chat = get_history_from_prompt(row["prompt"])
    chat = get_latest_user_input_from_prompt(row["prompt"])
    query_embedding = model_en.encode(chat)
    flow = flows.STATE_FLOWS_MAP[row['state']]
    similarity = util.pytorch_cos_sim(query_embedding, intents_embedding)
    result = get_top_intents(intents, similarity, n=n_samples, threshold=threshold, flow=flow)

    label = row['chatbot_response']
    isPredictedTrue=0
    for item in result:
      if label in item:
        isPredictedTrue=1
        break
    res_list.append({'state': row['state'], 'gt': label, 'isPredictedTrue': isPredictedTrue})

  res_df = pd.DataFrame(res_list)

  # dataframe result
  grouped_data = res_df.groupby('gt')['isPredictedTrue'].agg(['sum', 'count']).reset_index()
  grouped_data['percentage'] = (grouped_data['sum'] / grouped_data['count']) * 100

  # accuracy score
  score = (res_df['isPredictedTrue'] == 1).sum()/res_df['isPredictedTrue'].count() * 100 #raw

  print(score, grouped_data)
  return score, grouped_data

def classify_intent(input_text:str, history:str, answer, model_name, api_key):
  print(f"predicting with llm... date: {date.today()}")
  print(f"model name: {model_name}")
  llm = ChatOpenAI(model=model_name, temperature='0.1', openai_api_key=api_key)
  prompt = PromptTemplate(template=prompt_template, input_variables=["intents", "INPUT", "chatHistory"])
  llm_chain = LLMChain(prompt=prompt, llm=llm, verbose=False)

  inp_intents = ''
  for i in range(len(answer)):
    inp_intents += answer[i][0]+": "+chatbot_intents[answer[i][0]]+"\n"
  predicted_intent = llm_chain.run({"intents":inp_intents, "INPUT": input_text, "chatHistory": history})
  prompt_result = llm_chain.prompt.format_prompt(intents = inp_intents, INPUT = input_text, chatHistory = history).to_string()
  return predicted_intent, prompt_result

theme = gr.themes.Default(
    primary_hue="indigo",
    secondary_hue="pink",
    neutral_hue="slate",
)

with gr.Blocks(title="Intent Classification Demo", theme=theme) as interface:
  gr.Markdown("""# Demo for Intent Classification""")

  with gr.Row(equal_height=True):
    with gr.Column():
      model_name = gr.Dropdown(["gpt-3.5-turbo",
                            "gpt-3.5-turbo-1106",
                            "gpt-4",
                            "gpt-4-1106-preview"],
                            label="Model name",
                            info="Select model name for GPT")
      api_key = gr.Textbox(label="OpenAI API Key", info="get it at https://platform.openai.com/account/api-keys",visible=True, lines=1, type="password")
      n_samples = gr.Slider(1, 10, value=10, step=1, label="N samples", info="Number of samples to be retrieved. Default is 5")
      threshold = gr.Slider(0.0, 1.0, value=0.75, step=0.01, label="Threshold", info="Threshold of cosine similarity which intent will be considered similar to the input. The higher, the more similar the intent will be. Default is 0.75")
      with gr.Tab("Input from raw text"):
        raw_input_text = gr.Textbox(label="Input Chat", info="Input your chat here, the model will predict the intent")
        raw_state = gr.Dropdown(["GeneralState",
                            "HomeworkState",
                            "ExerciseState",
                            "UnderstandState",
                            "RecommendMaterialState",
                            "PersonalState",
                            "AssessKnowledgeState"],
                            label="State",
                            info="Select state on which the chat currently on. Some state will exclude some intents")
        raw_history = gr.Textbox(label="History or summary", info="Chat history or summary, if available", visible=True)
        raw_ask_button = gr.Button("Ask")
        ask_llm_button_raw = gr.Button("Ask intent with Language Model", visible=False)

      with gr.Tab("Input from Big Query data"):
        gr.Markdown("""
        ## Guide:

        Assuming have access to BigQuery, you can query the table `silicon-airlock-153323.chatbot_ai_dwp.fact_chatbot_ai_conversation_raw`, export result as CSV file, and upload here (make sure your query contains these columns: `prompt, user_message, chatbot_response, state`)

        ```SELECT prompt, user_message, chatbot_response, state FROM `silicon-airlock-153323.chatbot_ai_dwp.fact_chatbot_ai_conversation_raw` WHERE DATE(_PARTITIONTIME) BETWEEN DATE("2023-11-13") AND DATE("2023-11-19") AND service_name = 'learning_companion' LIMIT 1000```

        Adjust the date according to needs. After that, export as CSV and upload to this gradio

        example CSV files to use:

        https://drive.google.com/file/d/1iDLywKP5JxDJXaAzomSUYLZRWvoGqpt5/view?usp=sharing

        https://drive.google.com/file/d/1Jh_hP7U2JGQXsRo9OponyVSHL_s1Yx8w/view?usp=sharing

        """)

        file_output = gr.File()
        upload_button = gr.UploadButton("Upload CSV...", file_types=["file"], file_count="single")

        index = gr.Slider(1, 1000, value=5, step=1, visible=False, label="Index", info="Select which index of data to check the intents")
        input_text = gr.Textbox(label="Input Chat", info="Input in index", visible=False)
        state = gr.Textbox(label="State", info="State on which the chat currently on. Some state will exclude some intents", visible=False)
        history = gr.Textbox(label="History or summary", info="Chat history or summary, if available", visible=False)
        gt = gr.Textbox(label="Ground Truth", info="The label in which the IntentClassification predict in the CSV", visible=False)
        ask_button = gr.Button("Ask With CSV")
        ask_llm_button = gr.Button("Ask intent with Language Model", visible=False)


        index.change(fn=update_index, inputs=index, outputs=[input_text, state, gt, history])
        upload_button.upload(process_csv, upload_button, [upload_button, file_output, index, input_text, state, gt, history])
        with gr.Column():
          with gr.Row():
            accuracy = gr.Markdown("""

            You can also check accuracy on how well the model predict the intents based on your provided CSV files. This might take 1-2 minutes.

            """, visible=True)
            accuracy_button = gr.Button("Calculate Accuracy", visible=True)
          accuracy_score = gr.Label(label="Accuracy result", visible=True)
          accuracy_table = gr.Dataframe(visible=True)

    with gr.Column():
      answer = gr.JSON(label="Sentence Similarity Prediction", show_label=True)
      LLM_prediction = gr.Label(label="LLM Prediction Result", visible=True)
      LLM_prompt = gr.Textbox(label="Prompt Used for Language Model", info="Showing prompt used in language model", visible=True)

  accuracy_button.click(fn=check_accuracy, inputs=[n_samples, threshold], outputs=[accuracy_score, accuracy_table])
  raw_ask_button.click(fn=raw_inference, inputs=[raw_input_text, raw_state, n_samples, threshold], outputs=[answer, ask_llm_button_raw])
  ask_button.click(fn=raw_inference, inputs=[input_text, state, n_samples, threshold], outputs=[answer, ask_llm_button])
  # ask_llm_button.click(fn=classify_intent, inputs=[input_text, history, answer, model_name], outputs=[LLM_prediction, LLM_prompt])
  # ask_llm_button_raw.click(fn=classify_intent, inputs=[raw_input_text, raw_history, answer, model_name], outputs=[LLM_prediction, LLM_prompt])
  ask_llm_button.click(fn=classify_intent, inputs=[input_text, history, answer, model_name, api_key], outputs=[LLM_prediction, LLM_prompt])
  ask_llm_button_raw.click(fn=classify_intent, inputs=[raw_input_text, raw_history, answer, model_name, api_key], outputs=[LLM_prediction, LLM_prompt])

# interface.launch(debug=True)
interface.launch(share=True, debug=True)