diff --git a/.github/workflows/send-slack-metrics.yml b/.github/workflows/send-slack-metrics.yml new file mode 100644 index 00000000000..0dad24afd46 --- /dev/null +++ b/.github/workflows/send-slack-metrics.yml @@ -0,0 +1,40 @@ +name: Send Slack Metrics + +on: + schedule: + - cron: "*/5 * * * *" + branches: + - prod + +jobs: + deploy: + runs-on: ubuntu-latest + environment: + name: production + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - name: Configure SSH + run: | + mkdir -p ~/.ssh/ + echo "$PROD_SSH_KEY" > ~/.ssh/hubgpt_prod.key + chmod 600 ~/.ssh/hubgpt_prod.key + cat >>~/.ssh/config <", + "holiday calendar", + "what is 'hubgpt'", + "What are the list of holidays in India in 2024", + "Qualgpt", + "what was the temperature in Delhi on 25th February 2024?", + "mumbwa", +] + +CONTEXT_PROMPT = f""" + +You are a labeling bot. Your job is to give a label to a question asked by a\ + user and return a one-word label. + +The labels you can choose from are as follows: "PROJECTS", "POLICIES", "RESOURCES",\ + "TEAMS", or "MISCELLANEOUS". +I have provided you with some demonstrations as to how you MUST classify a question: + +Questions suitable for the "PROJECTS" label will involve past or current projects,\ + services, work, interventions, +or experiences at IDinsight. Here are some examples: {PROJECTS} + +Questions suitable for the "POLICIES" label are questions that involve eligibility\ + and steps of all global or +regional policies, processes, benefits, and all queries related to Unit4. Here are\ + some examples: {POLICIES} + +The "RESOURCES" label applies to questions about guidelines, tools, products, and\ + resources that support +organizational work, such as guidelines for project execution, service-related\ + resources, or questions about +tools like the GDH Dashboard, HubGPT, KM processes, etc. Here are some examples:\ + {RESOURCES} + +Questions suitable for the "TEAMS" label involve IDinsight personnel-related\ + questions around people's +designations, roles and responsibilities, regions, and who to contact for a\ + specific task, as well as organizational +structure. Here are some examples: {TEAMS} + +Questions suitable for the "MISCELLANEOUS" label include anything that doesn't\ + clearly fall into the above +four categories, such as random questions about IDinsight, or general non-IDinsight\ + questions that +can be asked to chatGPT. Here are some examples: {MISCELLANEOUS} +""" + +TASK_PROMPT = """ + +Example usage: + +Submitted question from user: "Who is the head of Human Resources?" +Answer: "TEAMS" + +Consider the lists and labels, then read the user text in triple backticks below, then\ + provide your one-word classification. + +Submitted question from user: ```{question}``` + +What is the most accurate label for the submitted question from the user? + +Output your response between the tags and , without any additional text. + +""" + + +def insert_question(user_question): + full_prompt = CONTEXT_PROMPT + TASK_PROMPT.format(question=user_question) + return full_prompt + + +def label_question(user_question, client): + chat_completion = client.chat.completions.create( + messages=[ + { + "role": "user", + "content": insert_question(user_question), + } + ], + model="gpt-3.5-turbo", + ) + output = chat_completion.choices[0].message.content + label = re.findall(r"\s*([\s\S]*?)\s*", output)[0] + return label diff --git a/backend/scripts/send_slack_report/queries.py b/backend/scripts/send_slack_report/queries.py index fa21dc33943..3ad6cfbbf7c 100644 --- a/backend/scripts/send_slack_report/queries.py +++ b/backend/scripts/send_slack_report/queries.py @@ -22,3 +22,13 @@ INTERVAL '7 days') AND user_id IS NOT NULL """ +INITIAL_MESSAGES_QUERY = """ + SELECT message as initial_query FROM ( + SELECT *, + ROW_NUMBER() OVER (PARTITION BY chat_session_id ORDER BY time_sent ASC) as rn + FROM chat_message + WHERE (time_sent >= (NOW() AT TIME ZONE 'UTC') - INTERVAL '7 days') + AND (message_type = 'USER') + ) sub + WHERE sub.rn = 1 + ORDER BY sub.time_sent ASC;""" diff --git a/backend/scripts/send_slack_report/send_slack_report.py b/backend/scripts/send_slack_report/send_slack_report.py index 04819aa4882..6fc63bab2bd 100755 --- a/backend/scripts/send_slack_report/send_slack_report.py +++ b/backend/scripts/send_slack_report/send_slack_report.py @@ -1,6 +1,10 @@ +import json import os -import yaml +import pandas as pd +from initial_query_classification import label_question +from openai import OpenAI +from queries import INITIAL_MESSAGES_QUERY from queries import SLACK_MESSAGES_QUERY from queries import WEB_MESSAGES_QUERY from queries import WEB_USERS_QUERY @@ -8,6 +12,10 @@ from sqlalchemy import create_engine from sqlalchemy import text +from danswer.utils.logger import setup_logger + +logger = setup_logger() + def get_engine(): POSTGRES_USER = os.environ.get("POSTGRES_USER") or "postgres" @@ -24,6 +32,7 @@ def get_engine(): def get_counts(): + logger.info("Connecting to SQL database") engine = get_engine() with engine.connect() as connection: @@ -37,45 +46,74 @@ def get_counts(): with engine.connect() as connection: unique_users = connection.execute(text(WEB_USERS_QUERY)) web_users = unique_users.fetchone()[0] - + logger.info("Counts retrieved") return slack_messages, web_messages, web_users -def create_message(slack_messages, web_messages, web_users): +def classify_initial_queries(): + engine = get_engine() + with engine.connect() as connection: + df = pd.read_sql_query(INITIAL_MESSAGES_QUERY, connection) + logger.info("Initial queries recieved") + client = OpenAI(api_key=os.environ["GEN_AI_API_KEY"]) + label_series = df["initial_query"].map(lambda x: label_question(x, client)) + logger.info("Labelling complete") + tally_json = json.loads(label_series.value_counts().to_json()) + classifications = "" + total_initial_queries = sum(tally_json.values()) + for k, v in tally_json.items(): + classifications += f"""There were {v} queries (representing {v/total_initial_queries * 100}% \ +of all initial queries) about {k} \n""" + return classifications + + +def create_message(slack_messages, web_messages, web_users, classifications): message = ( f"Hello Users!\n\n" f"Here are some updates from HubGPT regarding the last 7 days:\n" f"- {slack_messages}: Slack messages in the last 7 days.\n" f"- {web_messages}: Web App messages in the last 7 days.\n" - f"- {web_users}: Unique users on the Web App." + f"- {web_users}: Unique users on the Web App.\n" + "Usage breakdown:\n" + f"{classifications}" ) return message def send_message(user_id, message): - # Get Slack token from yaml - with open("secrets.yaml", "r") as file: - secrets = yaml.safe_load(file) + SLACK_TOKEN = os.environ["SLACK_BOT_TOKEN"] + if not SLACK_TOKEN: + logger.debug( + "Slack OAuth token not provided. Check env prod template for guindace" + ) + return None + logger.info("Initializing Slack client") - SLACK_TOKEN = secrets["SLACK_BOT_TOKEN"] slack_client = WebClient(token=SLACK_TOKEN) - print("Sending message") + logger.info("Sending Slack message") # Send a message to the user slack_client.chat_postMessage(channel=user_id, text=message) - print("Message sent") + logger.info("Message sent") return None def send_usage_report_to_slack(user_id): slack, web, web_users = get_counts() - message = create_message(slack, web, web_users) + classifications = classify_initial_queries() + message = create_message(slack, web, web_users, classifications) send_message(user_id, message) return None -# if __name__ == "__main__": -# USER_ID = "C05K8F6RXU3" -# print("Starting...") -# send_usage_report_to_slack(USER_ID) +if __name__ == "__main__": + USER_ID = os.environ["METRICS_CHANNEL_ID"] + if not USER_ID: + logger.debug( + "Slack Metrics Channel ID token not provided. \ +Check env prod template for guidance" + ) + else: + logger.info("Starting Slack usage report") + send_usage_report_to_slack(USER_ID) diff --git a/deployment/docker_compose/env.prod.template b/deployment/docker_compose/env.prod.template index d356995df93..42cd27af9f2 100644 --- a/deployment/docker_compose/env.prod.template +++ b/deployment/docker_compose/env.prod.template @@ -18,6 +18,11 @@ GEN_AI_MODEL_VERSION=gpt-4 #GEN_AI_API_VERSION= +# Metric Slack Bot token OAuth token (from Slack API'sOauth + Permission w/ chat:write) +SLACK_BOT_TOKEN= +# Slack channel to post metrics into +METRICS_CHANNEL_ID= + # If you want to setup a slack bot to answer questions automatically in Slack # channels it is added to, you must specify the two below. # More information in the guide here: https://docs.danswer.dev/slack_bot_setup