From 63d3e62d15031b6ab1d06192006f06c1250ddfa3 Mon Sep 17 00:00:00 2001 From: Mark Botterill Date: Fri, 3 May 2024 12:37:56 +0000 Subject: [PATCH 1/2] Improve logging for Slack message --- .../send_slack_report/send_slack_report.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/backend/scripts/send_slack_report/send_slack_report.py b/backend/scripts/send_slack_report/send_slack_report.py index 6fc63bab2bd..d25ce6a3bb2 100755 --- a/backend/scripts/send_slack_report/send_slack_report.py +++ b/backend/scripts/send_slack_report/send_slack_report.py @@ -108,12 +108,13 @@ def send_usage_report_to_slack(user_id): if __name__ == "__main__": - USER_ID = os.environ["METRICS_CHANNEL_ID"] - if not USER_ID: - logger.debug( - "Slack Metrics Channel ID token not provided. \ -Check env prod template for guidance" - ) - else: - logger.info("Starting Slack usage report") - send_usage_report_to_slack(USER_ID) + try: + USER_ID = os.environ.get("METRICS_CHANNEL_ID") + if USER_ID: + logger.info("Starting Slack usage report") + send_usage_report_to_slack(USER_ID) + else: + logger.warning("Slack Metrics Channel ID token not provided.") + logger.warning("Check env prod template for guidance.") + except Exception as e: + logger.exception("An error occurred while sending usage report to Slack: %s", e) From cd449dbb1eff5140ccd8632accc6c448f44be425 Mon Sep 17 00:00:00 2001 From: Mark Botterill Date: Fri, 3 May 2024 17:11:37 +0000 Subject: [PATCH 2/2] Add graphic ability to metric reporting --- backend/requirements/default.txt | 2 + backend/scripts/send_slack_report/queries.py | 88 +++++++++++--- .../send_slack_report/send_slack_report.py | 113 ++++++++++++------ 3 files changed, 150 insertions(+), 53 deletions(-) diff --git a/backend/requirements/default.txt b/backend/requirements/default.txt index eab0f89357a..8c376b4f276 100644 --- a/backend/requirements/default.txt +++ b/backend/requirements/default.txt @@ -23,6 +23,7 @@ httpx[http2]==0.23.3 httpx-oauth==0.11.2 huggingface-hub==0.20.1 jira==3.5.1 +kaleido==0.2.1 langchain==0.1.9 litellm==1.34.21 llama-index==0.9.45 @@ -35,6 +36,7 @@ oauthlib==3.2.2 openai==1.3.5 openpyxl==3.1.2 playwright==1.41.2 +plotly==5.22.0 psutil==5.9.5 psycopg2-binary==2.9.9 pycryptodome==3.19.1 diff --git a/backend/scripts/send_slack_report/queries.py b/backend/scripts/send_slack_report/queries.py index 3ad6cfbbf7c..f25b7b15e3f 100644 --- a/backend/scripts/send_slack_report/queries.py +++ b/backend/scripts/send_slack_report/queries.py @@ -1,27 +1,77 @@ # This script stores all SQL queries which are used # to get HubGPT usage stats -SLACK_MESSAGES_QUERY = """SELECT COUNT(*) - FROM chat_session - WHERE (time_created >= (NOW() AT TIME ZONE 'UTC') - - INTERVAL '7 days') - AND user_id IS NULL -""" +MEDIUMS = ["slack_messages", "web_messages", "distinct_web_users"] -WEB_MESSAGES_QUERY = """SELECT COUNT(*) - FROM chat_session - WHERE (time_created >= (NOW() AT TIME ZONE 'UTC') - - INTERVAL '7 days') - AND user_id IS NOT NULL -""" -WEB_USERS_QUERY = """ - SELECT COUNT(DISTINCT user_id) - FROM chat_session - WHERE (time_created >= (NOW() AT TIME ZONE 'UTC') - - INTERVAL '7 days') - AND user_id IS NOT NULL -""" +USAGE_QUERIES = { + "last_7_days": { + "slack_messages": """ + SELECT COUNT(*) + FROM chat_session + WHERE (time_created >= (NOW() AT TIME ZONE 'UTC') - INTERVAL '7 days') + AND user_id IS NULL + """, + "web_messages": """ + SELECT COUNT(*) + FROM chat_session + WHERE (time_created >= (NOW() AT TIME ZONE 'UTC') - INTERVAL '7 days') + AND user_id IS NOT NULL + """, + "distinct_web_users": """ + SELECT COUNT(DISTINCT user_id) + FROM chat_session + WHERE (time_created >= (NOW() AT TIME ZONE 'UTC') - INTERVAL '7 days') + AND user_id IS NOT NULL + """, + }, + "day_14_to_7": { + "slack_messages": """ + SELECT COUNT(*) + FROM chat_session + WHERE (time_created >= (NOW() AT TIME ZONE 'UTC') - INTERVAL '14 days') + AND time_created < (NOW() AT TIME ZONE 'UTC') - INTERVAL '7 days' + AND user_id IS NULL + """, + "web_messages": """ + SELECT COUNT(*) + FROM chat_session + WHERE (time_created >= (NOW() AT TIME ZONE 'UTC') - INTERVAL '14 days') + AND time_created < (NOW() AT TIME ZONE 'UTC') - INTERVAL '7 days' + AND user_id IS NOT NULL + """, + "distinct_web_users": """ + SELECT COUNT(DISTINCT user_id) + FROM chat_session + WHERE (time_created >= (NOW() AT TIME ZONE 'UTC') - INTERVAL '14 days') + AND time_created < (NOW() AT TIME ZONE 'UTC') - INTERVAL '7 days' + AND user_id IS NOT NULL + """, + }, + "day_35_to_28": { + "slack_messages": """ + SELECT COUNT(*) + FROM chat_session + WHERE (time_created >= (NOW() AT TIME ZONE 'UTC') - INTERVAL '35 days') + AND time_created < (NOW() AT TIME ZONE 'UTC') - INTERVAL '28 days' + AND user_id IS NULL + """, + "web_messages": """ + SELECT COUNT(*) + FROM chat_session + WHERE (time_created >= (NOW() AT TIME ZONE 'UTC') - INTERVAL '35 days') + AND time_created < (NOW() AT TIME ZONE 'UTC') - INTERVAL '28 days' + AND user_id IS NOT NULL + """, + "distinct_web_users": """ + SELECT COUNT(DISTINCT user_id) + FROM chat_session + WHERE (time_created >= (NOW() AT TIME ZONE 'UTC') - INTERVAL '35 days') + AND time_created < (NOW() AT TIME ZONE 'UTC') - INTERVAL '28 days' + AND user_id IS NOT NULL + """, + }, +} INITIAL_MESSAGES_QUERY = """ SELECT message as initial_query FROM ( SELECT *, diff --git a/backend/scripts/send_slack_report/send_slack_report.py b/backend/scripts/send_slack_report/send_slack_report.py index d25ce6a3bb2..796a058fd43 100755 --- a/backend/scripts/send_slack_report/send_slack_report.py +++ b/backend/scripts/send_slack_report/send_slack_report.py @@ -2,18 +2,20 @@ import os import pandas as pd +import plotly.express as px +import requests from initial_query_classification import label_question from openai import OpenAI from queries import INITIAL_MESSAGES_QUERY -from queries import SLACK_MESSAGES_QUERY -from queries import WEB_MESSAGES_QUERY -from queries import WEB_USERS_QUERY +from queries import MEDIUMS +from queries import USAGE_QUERIES from slack_sdk import WebClient from sqlalchemy import create_engine from sqlalchemy import text from danswer.utils.logger import setup_logger + logger = setup_logger() @@ -32,22 +34,54 @@ def get_engine(): def get_counts(): - logger.info("Connecting to SQL database") - engine = get_engine() - - with engine.connect() as connection: - num_messages = connection.execute(text(SLACK_MESSAGES_QUERY)) - slack_messages = num_messages.fetchone()[0] + """Fetches counts based on the specified period from the global queries dictionary.""" + results = {"medium": [], "time_period": [], "count": []} - with engine.connect() as connection: - unique_users = connection.execute(text(WEB_MESSAGES_QUERY)) - web_messages = unique_users.fetchone()[0] + engine = get_engine() with engine.connect() as connection: - unique_users = connection.execute(text(WEB_USERS_QUERY)) - web_users = unique_users.fetchone()[0] - logger.info("Counts retrieved") - return slack_messages, web_messages, web_users + for period in USAGE_QUERIES.keys(): + for key, query in USAGE_QUERIES[period].items(): + result = connection.execute(text(query.replace("\n", ""))) + results["count"].append(result.scalar()) + results["medium"].append(key) + results["time_period"].append(period) + + return pd.DataFrame(results) + + +def get_last_week_counts(df): + """Take a DataFrame and returns a dictionary of counts ofr users + from the last 7 days across Slack, Web and unique users""" + last_week_count = {} + for medium in MEDIUMS: + count = df.query(f"time_period =='last_7_days' and medium == '{medium}'")[ + "count" + ].iloc[0] + last_week_count[medium] = count + return last_week_count + + +def save_bar_plot(df, filename): + fig = px.bar(df, x="medium", y="count", color="time_period", barmode="group") + fig.write_image(file=filename, format="jpg") + return filename + + +def upload_to_slack_and_delete(filename, channel_id): + slack_client = WebClient(token=os.environ.get("SLACK_BOT_TOKEN")) + size = os.stat(filename).st_size + response = slack_client.files_getUploadURLExternal(filename=filename, length=size) + upload_url = response.data["upload_url"] + file_id = response.data["file_id"] + post_response = requests.post(url=upload_url, data=open(filename, "rb")) + if post_response.status_code == 200: + upload_response = slack_client.files_completeUploadExternal( + files=[{"id": file_id, "title": "Metrics graph"}], channel_id=channel_id + ) + # Clean up + os.remove(filename) + return upload_response.status_code def classify_initial_queries(): @@ -55,25 +89,29 @@ def classify_initial_queries(): with engine.connect() as connection: df = pd.read_sql_query(INITIAL_MESSAGES_QUERY, connection) logger.info("Initial queries recieved") - client = OpenAI(api_key=os.environ["GEN_AI_API_KEY"]) + client = OpenAI(api_key=os.environ.get("GEN_AI_API_KEY")) label_series = df["initial_query"].map(lambda x: label_question(x, client)) logger.info("Labelling complete") tally_json = json.loads(label_series.value_counts().to_json()) classifications = "" total_initial_queries = sum(tally_json.values()) for k, v in tally_json.items(): - classifications += f"""There were {v} queries (representing {v/total_initial_queries * 100}% \ -of all initial queries) about {k} \n""" + percentage = v / total_initial_queries * 100 + classifications += ( + f"There were {v} queries (representing {percentage:.1f}% of\ + all initial queries) about {k}\n" + "" + ) return classifications -def create_message(slack_messages, web_messages, web_users, classifications): +def create_message(last_week_count, classifications): message = ( f"Hello Users!\n\n" f"Here are some updates from HubGPT regarding the last 7 days:\n" - f"- {slack_messages}: Slack messages in the last 7 days.\n" - f"- {web_messages}: Web App messages in the last 7 days.\n" - f"- {web_users}: Unique users on the Web App.\n" + f"- {last_week_count['slack_messages']}: Slack messages in the last 7 days.\n" + f"- {last_week_count['web_messages']}: Web App messages in the last 7 days.\n" + f"- {last_week_count['distinct_web_users']}: Unique users on the Web App.\n" "Usage breakdown:\n" f"{classifications}" ) @@ -81,15 +119,15 @@ def create_message(slack_messages, web_messages, web_users, classifications): def send_message(user_id, message): - SLACK_TOKEN = os.environ["SLACK_BOT_TOKEN"] - if not SLACK_TOKEN: + SLACK_BOT_TOKEN = os.environ.get("SLACK_BOT_TOKEN") + if not SLACK_BOT_TOKEN: logger.debug( "Slack OAuth token not provided. Check env prod template for guindace" ) return None logger.info("Initializing Slack client") - slack_client = WebClient(token=SLACK_TOKEN) + slack_client = WebClient(token=SLACK_BOT_TOKEN) logger.info("Sending Slack message") # Send a message to the user @@ -98,21 +136,28 @@ def send_message(user_id, message): return None -def send_usage_report_to_slack(user_id): - slack, web, web_users = get_counts() +def send_usage_report_to_slack(channel_id): + counts_df = get_counts() classifications = classify_initial_queries() - message = create_message(slack, web, web_users, classifications) - send_message(user_id, message) - return None + last_week_counts = get_last_week_counts(counts_df) + + file = save_bar_plot(counts_df, "metrics.jpg") + + message = create_message(last_week_counts, classifications) + + send_message(channel_id, message) + upload_status = upload_to_slack_and_delete(file, channel_id) + + return upload_status if __name__ == "__main__": try: - USER_ID = os.environ.get("METRICS_CHANNEL_ID") - if USER_ID: + CHANNEL_ID = os.environ.get("METRICS_CHANNEL_ID") + if CHANNEL_ID: logger.info("Starting Slack usage report") - send_usage_report_to_slack(USER_ID) + send_usage_report_to_slack(CHANNEL_ID) else: logger.warning("Slack Metrics Channel ID token not provided.") logger.warning("Check env prod template for guidance.")