CoPilot LLM Performance Evaluation #292

Workflow file for this run

.github/workflows/build-test-ci.yaml at 9a49902

	name: CoPilot LLM Performance Evaluation

	on:
	push:
	branches: [ "build-test-ci" ]
	pull_request:
	branches: [ "main", "dev" ]
	types: [ labeled ]
	schedule:
	- cron: '45 22 * * 6'
	workflow_dispatch:

	jobs:
	build-and-test:
	runs-on: [ self-hosted, dind ]

	steps:
	- name: Checkout code
	uses: actions/checkout@v4

	- name: Set up Docker Buildx
	uses: docker/setup-buildx-action@v3

	- name: Build Docker.tests image
	uses: docker/build-push-action@v5
	with:
	context: .
	file: copilot/Dockerfile.tests
	push: false
	load: true
	tags: nlqs/tests:0.1

	- name: Create db config
	run: \|
	mkdir configs
	echo "$DB_CONFIG" > configs/db_config.json
	echo "$LLM_CONFIG_OPENAI_GPT4" > configs/openai_gpt4_config.json
	echo "$LLM_CONFIG_OPENAI_GPT4O" > configs/openai_gpt4o_config.json
	echo "$LLM_CONFIG_AZURE_GPT35" > configs/azure_llm_config.json
	echo "$LLM_CONFIG_OPENAI_GPT35" > configs/openai_gpt3.5-turbo_config.json
	echo "$LLM_CONFIG_GCP_TEXT_BISON" > configs/gcp_text-bison_config.json
	echo "$LLM_CONFIG_HUGGINGFACE_PHI3" > configs/huggingface_severless_endpoint_phi3_config.json
	echo "$GCP_CREDS_CONFIG" > configs/GCP_CREDS.json
	echo "$LLM_TEST_EVALUATOR" > configs/test_evaluation_model_config.json
	echo "$LLM_CONFIG_GROQ_MIXTRAL" > configs/groq_mixtral_config.json
	echo "$LLM_CONFIG_BEDROCK_CLAUDE3" > configs/bedrock_config.json
	echo "$LLM_CONFIG_HUGGINGFACE_LLAMA70B" > configs/huggingface_llama70b_config.json
	echo "$LLM_CONFIG_WATSONX_MISTRAL_LARGE" > configs/ibm_watsonx_config.json
	echo "$MILVUS_CONFIG" > configs/milvus_config.json
	env:
	DB_CONFIG: ${{ secrets.DB_CONFIG }}
	LLM_CONFIG_OPENAI_GPT4: ${{ secrets.LLM_CONFIG_OPENAI_GPT4 }}
	LLM_CONFIG_AZURE_GPT35: ${{ secrets.LLM_CONFIG_AZURE_GPT35 }}
	LLM_CONFIG_GCP_TEXT_BISON: ${{ secrets.LLM_CONFIG_GCP_TEXT_BISON }}
	LLM_CONFIG_OPENAI_GPT35: ${{ secrets.LLM_CONFIG_OPENAI_GPT35 }}
	LLM_CONFIG_GROQ_MIXTRAL: ${{ secrets.LLM_CONFIG_GROQ_MIXTRAL }}
	LLM_CONFIG_BEDROCK_CLAUDE3: ${{ secrets.LLM_CONFIG_BEDROCK_CLAUDE3 }}
	LLM_CONFIG_HUGGINGFACE_PHI3: ${{ secrets.LLM_CONFIG_HUGGINGFACE_PHI3 }}
	LLM_CONFIG_OPENAI_GPT4O: ${{ secrets.LLM_CONFIG_OPENAI_GPT4O }}
	LLM_CONFIG_HUGGINGFACE_LLAMA70B: ${{ secrets.LLM_CONFIG_HUGGINGFACE_LLAMA70B }}
	LLM_CONFIG_WATSONX_MISTRAL_LARGE: ${{ secrets.LLM_CONFIG_WATSONX_MISTRAL_LARGE }}
	GCP_CREDS_CONFIG: ${{ secrets.GCP_CREDS_CONFIG }}
	LLM_TEST_EVALUATOR: ${{ secrets.LLM_TEST_EVALUATOR }}
	MILVUS_CONFIG: ${{ secrets.MILVUS_CONFIG }}


	- name: Run Docker Container for PR
	if: ${{ github.event_name }} == 'pull_request'
	run: \|
	docker rm -f nlqs-tests \|\| true
	docker run -it -v $(pwd)/configs/:/code/app/configs -e GOOGLE_APPLICATION_CREDENTIALS=/code/app/configs/GCP_CREDS.json -e WANDB_API_KEY=${{ secrets.WANDB_API_KEY }} -e PR_NUMBER=${{ github.event.number }} --name nlqs-tests -d nlqs/tests:0.1

	- name: Run Docker Container for Regress
	if: ${{ github.event_name == 'schedule' }}
	run: \|
	docker rm -f nlqs-tests \|\| true
	docker run -it -v $(pwd)/configs/:/code/app/configs -e GOOGLE_APPLICATION_CREDENTIALS=/code/app/configs/GCP_CREDS.json -e WANDB_API_KEY=${{ secrets.WANDB_API_KEY }} -e PR_NUMBER="DailyRegression" --name nlqs-tests -d nlqs/tests:0.1

	- name: Execute PR Tests
	if: github.event_name == 'pull_request'
	run: \|
	docker exec nlqs-tests bash -c "./run_tests.sh ${{ github.event.label.name }} all"
	status=$?
	if [ $status -ne 0 ]; then
	echo "test failed with status $status"
	exit $status
	fi
	url=$(docker exec nlqs-tests bash -c "cat /code/tests/report_url.txt")
	echo "REPORT_URL=${url}" >> $GITHUB_ENV

	- name: Execute Regress Tests
	if: github.event_name == 'schedule'
	run: \|
	docker exec nlqs-tests bash -c "./run_tests.sh all all"
	status=$?
	if [ $status -ne 0 ]; then
	echo "test failed with status $status"
	exit $status
	fi
	url=$(docker exec nlqs-tests bash -c "cat /code/tests/report_url.txt")
	echo "REPORT_URL=${url}" >> $GITHUB_ENV

	- name: Add PR Comment
	if: ${{ github.event_name }} == 'pull_request'
	uses: mshick/add-pr-comment@v2.8.1
	with:
	message: \|
	Test Report: ${{ env.REPORT_URL }}


	- name: Cleanup
	run: \|
	docker rm -f nlqs-tests
	echo ${{ env.REPORT_URL }}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

CoPilot LLM Performance Evaluation #292

Workflow file

CoPilot LLM Performance Evaluation #292

Jobs

Run details

Workflow file for this run