Completely Patch vanilla Danswer -> HubGPT

IDinsight · Jul 15, 2024 · c78fea6 · c78fea6
1 parent e2916ee
commit c78fea6
Show file tree

Hide file tree

Showing 59 changed files with 11,171 additions and 62 deletions.
diff --git a/.github/workflows/redeploy.yml b/.github/workflows/redeploy.yml
@@ -0,0 +1,55 @@
+name: Deploy on Production Update
+
+on:
+  push:
+    branches:
+      - prod  
+
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    environment: 
+      name: production
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v2
+
+      - name: Configure SSH
+        run: |
+          mkdir -p ~/.ssh/
+          echo "$PROD_SSH_KEY" > ~/.ssh/hubgpt_prod.key
+          chmod 600 ~/.ssh/hubgpt_prod.key
+          cat >>~/.ssh/config <<END
+          Host prod
+            HostName $PROD_EC2_HOST_IP
+            User $PROD_EC2_USER
+            IdentityFile ~/.ssh/hubgpt_prod.key
+            StrictHostKeyChecking no
+          END
+        env:
+          PROD_EC2_USER: ${{ secrets.PROD_EC2_USER }}
+          PROD_SSH_KEY: ${{ secrets.PROD_SSH_KEY }}
+          PROD_EC2_HOST_IP: ${{ secrets.PROD_EC2_HOST_IP }}
+
+      - name: Deploy to Production
+        run: |
+          ssh prod << 'EOF'
+          cd danswer
+          make re-deploy
+          echo "Deployment complete"
+          echo "Last commit merged in:"
+          git --no-pager log -1
+          EOF
+
+      - name: Wait before pinging
+        run: sleep 60
+
+      - name: Ping the website
+        run: |
+          response=$(curl -o /dev/null -s -w "%{http_code}\n" https://hubgpt.idinsight.io)
+          if [ "$response" -eq 307 ]; then
+            echo "Site is up and returned HTTP status 307"
+          else
+            echo "Site might be down or is not returning expected codes. Code returned: $response"
+            exit 1
+          fi
diff --git a/.github/workflows/send_performance_demo.yml b/.github/workflows/send_performance_demo.yml
@@ -0,0 +1,39 @@
+name: Run Performance Test
+# NB This only works on the default (prod) branch 
+on:
+  workflow_dispatch:
+  # schedule:
+  #   - cron: '0 9 1-7 * 5'
+
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    environment: 
+      name: production
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v2
+
+      - name: Configure SSH
+        run: |
+          mkdir -p ~/.ssh/
+          echo "$PROD_SSH_KEY" > ~/.ssh/hubgpt_prod.key
+          chmod 600 ~/.ssh/hubgpt_prod.key
+          cat >>~/.ssh/config <<END
+          Host prod
+            HostName $PROD_EC2_HOST_IP
+            User $PROD_EC2_USER
+            IdentityFile ~/.ssh/hubgpt_prod.key
+            StrictHostKeyChecking no
+          END
+        env:
+          PROD_EC2_USER: ${{ secrets.PROD_EC2_USER }}
+          PROD_SSH_KEY: ${{ secrets.PROD_SSH_KEY }}
+          PROD_EC2_HOST_IP: ${{ secrets.PROD_EC2_HOST_IP }}
+
+      - name: Trigger sample query run
+        run: |
+          ssh prod << 'EOF'
+          cd danswer
+          make send-hubgpt-eval
+          EOF
diff --git a/.github/workflows/send_slack_metrics.yml b/.github/workflows/send_slack_metrics.yml
@@ -0,0 +1,38 @@
+name: Send Slack Metrics
+# NB This only works on the default (prod) branch 
+on:
+  schedule:
+    - cron: '0 9 * * 5'
+
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    environment: 
+      name: production
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v2
+
+      - name: Configure SSH
+        run: |
+          mkdir -p ~/.ssh/
+          echo "$PROD_SSH_KEY" > ~/.ssh/hubgpt_prod.key
+          chmod 600 ~/.ssh/hubgpt_prod.key
+          cat >>~/.ssh/config <<END
+          Host prod
+            HostName $PROD_EC2_HOST_IP
+            User $PROD_EC2_USER
+            IdentityFile ~/.ssh/hubgpt_prod.key
+            StrictHostKeyChecking no
+          END
+        env:
+          PROD_EC2_USER: ${{ secrets.PROD_EC2_USER }}
+          PROD_SSH_KEY: ${{ secrets.PROD_SSH_KEY }}
+          PROD_EC2_HOST_IP: ${{ secrets.PROD_EC2_HOST_IP }}
+
+      - name: Trigger metric dispatch
+        run: |
+          ssh prod << 'EOF'
+          cd danswer
+          make send-slack-metrics
+          EOF
diff --git a/Makefile b/Makefile
@@ -0,0 +1,22 @@
+re-deploy:
+	cd /home/ec2-user/danswer/ && \
+	git restore .
+	cd /home/ec2-user/danswer/deployment/docker_compose && \
+	echo "Current directory is: $$(pwd)" && \
+	docker compose -p hubgpt down && \
+	git pull origin prod && \
+	docker compose -p hubgpt -f docker-compose.prod.yml up -d --build
+
+send-slack-metrics:
+	docker exec hubgpt-background-1 python /app/scripts/send_slack_report/send_slack_report.py
+
+send-hubgpt-eval:
+	cd /home/ec2-user/danswer/deployment/docker_compose && \
+	docker compose -p hubgpt down && \
+	docker compose -p hubgpt -f docker-compose.analytics.yml up -d --build 
+	sleep 150
+	docker exec hubgpt-background-1 python /app/scripts/hubgpt_eval_automation.py
+	cd /home/ec2-user/danswer/deployment/docker_compose && \
+	docker compose -p hubgpt down && \
+	docker compose -p hubgpt -f docker-compose.prod.yml up -d --build 
+
diff --git a/backend/Dockerfile b/backend/Dockerfile
@@ -54,6 +54,7 @@ nltk.download('punkt', quiet=True);"
 # Set up application files
 WORKDIR /app
 COPY ./danswer /app/danswer
+COPY ./scripts /app/scripts
 COPY ./shared_configs /app/shared_configs
 COPY ./alembic /app/alembic
 COPY ./alembic.ini /app/alembic.ini

diff --git a/backend/danswer/chat/personas.yaml b/backend/danswer/chat/personas.yaml
@@ -5,7 +5,7 @@ personas:
   # this is for DanswerBot to use when tagged in a non-configured channel
   # Careful setting specific IDs, this won't autoincrement the next ID value for postgres
   - id: 0
-    name: "Danswer"
+    name: "HubGPT"
     description: >
       Assistant with access to documents from your Connected Sources.
     # Default Prompt objects attached to the persona, see prompts.yaml

diff --git a/backend/danswer/danswerbot/slack/handlers/handle_message.py b/backend/danswer/danswerbot/slack/handlers/handle_message.py
@@ -484,7 +484,6 @@ def _get_answer(new_message_request: DirectQARequest) -> OneShotQAResponse | Non
             "Answer was evaluated to be invalid, throwing it away without responding."
         )
         update_emote_react(
-            emoji=DANSWER_FOLLOWUP_EMOJI,
             channel=message_info.channel_to_respond,
             message_ts=message_info.msg_to_respond,
             remove=False,

diff --git a/backend/danswer/danswerbot/slack/utils.py b/backend/danswer/danswerbot/slack/utils.py
@@ -42,7 +42,6 @@
 
 
 def update_emote_react(
-    emoji: str,
     channel: str,
     message_ts: str | None,
     remove: bool,
@@ -54,11 +53,11 @@ def update_emote_react(
 
     func = client.reactions_remove if remove else client.reactions_add
     slack_call = make_slack_api_rate_limited(func)  # type: ignore
-    slack_call(
-        name=emoji,
-        channel=channel,
-        timestamp=message_ts,
-    )
+    # slack_call(
+    #     name=emoji,
+    #     channel=channel,
+    #     timestamp=message_ts,
+    # )
 
 
 def get_danswer_bot_app_id(web_client: WebClient) -> Any:

diff --git a/backend/requirements/default.txt b/backend/requirements/default.txt
@@ -24,6 +24,7 @@ httpx[http2]==0.23.3
 httpx-oauth==0.11.2
 huggingface-hub==0.20.1
 jira==3.5.1
+kaleido==0.2.1
 langchain==0.1.17
 langchain-community==0.0.36
 langchain-core==0.1.50
@@ -38,6 +39,7 @@ oauthlib==3.2.2
 openai==1.14.3 
 openpyxl==3.1.2
 playwright==1.41.2
+plotly==5.22.0
 psutil==5.9.5
 psycopg2-binary==2.9.9
 pycryptodome==3.19.1

diff --git a/backend/scripts/api_inference_sample.py b/backend/scripts/api_inference_sample.py
@@ -1,5 +1,5 @@
 # This file is used to demonstrate how to use the backend APIs directly
-# In this case, the equivalent of asking a question in HubGPT Chat in a new chat session
+# In this case, the equivalent of asking a question in Danswer Chat in a new chat session
 import argparse
 import json
 import os
@@ -67,7 +67,7 @@ def process_question(danswer_url: str, question: str, api_key: str | None) -> No
         "--danswer-url",
         type=str,
         default="http://localhost:80",
-        help="HubGPT URL, should point to HubGPT nginx.",
+        help="Danswer URL, should point to Danswer nginx.",
     )
     parser.add_argument(
         "--test-question",
@@ -77,7 +77,7 @@ def process_question(danswer_url: str, question: str, api_key: str | None) -> No
     )
 
     # Not needed if Auth is disabled
-    # Or for HubGPT MIT API key must be replaced with session cookie
+    # Or for Danswer MIT API key must be replaced with session cookie
     api_key = os.environ.get("DANSWER_API_KEY")
 
     args = parser.parse_args()

diff --git a/backend/scripts/dev_run_background_jobs.py b/backend/scripts/dev_run_background_jobs.py
@@ -49,6 +49,8 @@ def run_jobs(exclude_indexing: bool) -> None:
     if not exclude_indexing:
         update_env = os.environ.copy()
         update_env["PYTHONPATH"] = "."
+        update_env["DYNAMIC_CONFIG_DIR_PATH"] = "./dynamic_config_storage"
+        update_env["FILE_CONNECTOR_TMP_STORAGE_PATH"] = "./dynamic_config_storage"
         cmd_indexing = ["python", "danswer/background/update.py"]
 
         indexing_process = subprocess.Popen(

diff --git a/backend/scripts/hubgpt_eval.csv b/backend/scripts/hubgpt_eval.csv
@@ -0,0 +1,61 @@
+Category,Query
+Projects,What is project sampoorna?
+Projects,What is ask a metric?
+Policies,What is the referral bonus given for recommending new hires? How does the bonus amount change with seniority of the open role?
+Policies,Explain the global travel benefit given to IDinsight employees?
+Policies,What is the eligibility for the wellness stipend?
+Policies,What are the wellness days for 2024?
+Resources,Where can I find resources for project management at IDinsight?
+Policies,What are the most updated steps needed for an Ethics Review?
+Resources,What are the guidelines to writing a blog for IDinsight?
+Teams,Who do I reach out to about Legal questions for a new project?
+Projects,Give me the summary of our most recently completed MELA project
+Teams,Who are the chief of staffs for each region of IDinsight?
+Resources,Where can I find resources on government partnerships?
+Policies,what does “open item” on U4 mean?
+Projects,What work have we done in evaluating mhealth or digital interventions
+Projects,Share a process evaluation education concept note example
+Resources,"Can you please help me with the guidelines related to budgeting for surveyor costs (new vendor charges, GST, mandatory benefits like EPF, insurance etc). How much extra should we budget for this?"
+Policies,How many leave days can I roll over into next year?
+Policies,How do I avail of health insurance coverage?
+Teams,Who is Alison Conner and what is her role within IDinsight?
+Policies,"What is the policy for hiring someone who is a partner, friend or family member"
+Projects,Have we done any work with DIV USAID?
+Projects,Have we done m&e training and theory of change with other orgs?
+Policies,how much is work from home stipend in Lusaka Zambia
+Projects,Can you show me the latest impact evaluation project deliverable written for the healthcare sector?
+Projects,What was the most recent MLE project IDinsight completed?
+Projects,"What was the most recent MLE project IDinsight completed?
+> Can you share the link to its project folder?"
+Projects,"What was the most recent MLE project IDinsight completed?
+> Can you share the link to its project folder?
+> Can you summarize the project highlights for me?"
+Resources,Can you send me any guidelines or templates we have on design requirements for an official IDinsight summary report?
+Projects,"Share with me a monitoring and evaluation project we did in Africa, please."
+Teams,Can you share the most recent version of our organizational structure and key contact persons?
+Teams,What is the full form of DSEM?
+Teams,What is the full form of DSEM
+Resources,Can you send me the link to the Ethics Review Form?
+Policies,What is our compassionate leave policy
+Resources,Where can I find a checklist of key tasks to complete during a project?
+Miscellaneous,Can you explain the IDinsight's data science capabilities for a client?
+Projects,Can you explain all the different phases of our ADP project?
+Resources,Can you summarize all the blogs we have done on RCTs?
+Projects,Can you explain engineering monitoring systems as a service?
+Projects,Can you give me a summary of the Educate Girls project?
+Miscellaneous,Can you summarize key requirements for a theory of change document?
+Resources,Can you explain IDinsight's knowledge management process for a project?
+Resources,What are guidelines on how to run an effective zoom-out or feedback conversation?
+Policies,How do I apply for comp leave for my teammates
+Projects,Can you share with me in simple words the key findings of the Room to Read LSEP Cambodia project?
+Projects,Can you share the list of clients the dignity initiative has partnered with
+Resources,Can you share guidelines on collaborating with the technical team?
+Policies,what are the stages of an expenses status in Unit4?
+Projects,For which projects have we used our VOI resources/service?
+Policies,Can you list all the public holidays for the India office this year?
+Projects,Can you list all the projects in education that IDinsight has ever done?
+Resources,What resources exist for assisting with coding and data analysis in stata?
+Policies,Can you list the steps needed to submit reimbursements on Unit 4?
+Projects,"Please list all projects that we have done in education where we measure Socio-Emotional Learning (SEL) gains, or where we have used/tested SEL tools (ex. ISELA, IDELA, others)"
+Projects,Are there case studies of how we've built capacity in data analysis and reporting?
+Policies,Can you provide a list of all the global benefits our staff is eligible for?