From ff8ee952c1a9bbe93e8aa9dfe2332a729ba5902d Mon Sep 17 00:00:00 2001 From: Ben Horowitz Date: Sat, 23 Nov 2024 18:54:04 -0800 Subject: [PATCH 1/2] add /verifyrun cog --- src/discord-cluster-manager/bot.py | 2 + .../cogs/verify_run_cog.py | 103 ++++++++++++++++++ 2 files changed, 105 insertions(+) create mode 100644 src/discord-cluster-manager/cogs/verify_run_cog.py diff --git a/src/discord-cluster-manager/bot.py b/src/discord-cluster-manager/bot.py index 56e2cf8..41bd135 100644 --- a/src/discord-cluster-manager/bot.py +++ b/src/discord-cluster-manager/bot.py @@ -14,6 +14,7 @@ ) from cogs.modal_cog import ModalCog from cogs.github_cog import GitHubCog +from cogs.verify_run_cog import VerifyRunCog logger = setup_logging() @@ -38,6 +39,7 @@ async def setup_hook(self): await self.add_cog(ModalCog(self)) await self.add_cog(GitHubCog(self)) await self.add_cog(BotManagerCog(self)) + await self.add_cog(VerifyRunCog(self)) guild_id = ( DISCORD_CLUSTER_STAGING_ID diff --git a/src/discord-cluster-manager/cogs/verify_run_cog.py b/src/discord-cluster-manager/cogs/verify_run_cog.py new file mode 100644 index 0000000..d24c9bc --- /dev/null +++ b/src/discord-cluster-manager/cogs/verify_run_cog.py @@ -0,0 +1,103 @@ +import discord +from discord import app_commands +from discord.ext import commands +import re +from utils import setup_logging + +logger = setup_logging() + +class VerifyRunCog(commands.Cog): + """ + A Discord cog for verifying the success of trainingruns. + + This cog provides functionality to verify that either a GitHub Actions or + Modal run completed successfully by checking for specific message patterns + in a Discord thread. It supports verification of two types of runs: + 1. GitHub Actions runs - Identified by "GitHub Action triggered!" message + 2. Modal runs - Identified by "Running on Modal..." message + + Commands: + /verifyrun: Verifies the success of a run in the current thread. Can + only be used in a thread. Automatically detects the run type and + applies appropriate verification. + """ + + def __init__(self, bot): + self.bot = bot + + async def verify_github_run(self, interaction: discord.Interaction, message_contents: list[str]): + """Verify that a GitHub Actions run completed successfully""" + + required_patterns = [ + "Processing `.*` with", + "GitHub Action triggered! Run ID:", + "Training completed with status: success", + ".*```\nLogs.*:", + "View the full run at:", + ] + + all_patterns_found = all( + any( + re.match(pattern, content, re.DOTALL) != None + for content in message_contents + ) + for pattern in required_patterns + ) + + if all_patterns_found: + await interaction.response.send_message( + "✅ All expected messages found - run completed successfully!") + else: + missing_patterns = [ + pattern for pattern in required_patterns + if not any(re.match(pattern, content, re.DOTALL) for content in message_contents) + ] + await interaction.response.send_message( + "❌ Run verification failed. Missing expected messages:\n" + + "\n".join(f"- {pattern}" for pattern in missing_patterns) + ) + + async def verify_modal_run(self, interaction: discord.Interaction, message_contents: list[str]): + """Verify that a Modal run completed successfully""" + + required_patterns = [ + "Processing `.*` with", + "Running on Modal...", + ".*```\nModal execution result:", + ] + + all_patterns_found = all( + any(re.match(pattern, content, re.DOTALL) != None for content in message_contents) + for pattern in required_patterns + ) + + if all_patterns_found: + await interaction.response.send_message("✅ All expected messages found - Modal run completed successfully!") + else: + missing_patterns = [ + pattern for pattern in required_patterns + if not any(re.match(pattern, content, re.DOTALL) for content in message_contents) + ] + await interaction.response.send_message( + "❌ Modal run verification failed. Missing expected messages:\n" + + "\n".join(f"- {pattern}" for pattern in missing_patterns) + ) + + @app_commands.command(name='verifyrun') + async def verify_run(self, interaction: discord.Interaction): + """Verify that a run in the current thread completed successfully""" + + if not isinstance(interaction.channel, discord.Thread): + await interaction.response.send_message("This command can only be used in a thread!") + return + + message_contents = [msg.content async for msg in interaction.channel.history(limit=None)] + + # Check for GitHub Action run + if any("GitHub Action triggered!" in content for content in message_contents): + await self.verify_github_run(interaction, message_contents) + # Check for Modal run + elif any("Running on Modal..." in content for content in message_contents): + await self.verify_modal_run(interaction, message_contents) + else: + await interaction.response.send_message("❌ Could not determine run type!") \ No newline at end of file From 02bda31c9a5cc586a1b7954c6993b92b27f4d822 Mon Sep 17 00:00:00 2001 From: Ben Horowitz Date: Wed, 27 Nov 2024 08:04:08 -0800 Subject: [PATCH 2/2] work in progress on /verifyrun --- .../cogs/github_cog.py | 12 +++-- src/discord-cluster-manager/cogs/modal_cog.py | 12 +++-- .../cogs/verify_run_cog.py | 53 ++++++++++++++++++- 3 files changed, 70 insertions(+), 7 deletions(-) diff --git a/src/discord-cluster-manager/cogs/github_cog.py b/src/discord-cluster-manager/cogs/github_cog.py index f69ca0f..adeea00 100644 --- a/src/discord-cluster-manager/cogs/github_cog.py +++ b/src/discord-cluster-manager/cogs/github_cog.py @@ -35,6 +35,7 @@ async def run_github( interaction: discord.Interaction, script: discord.Attachment, gpu_type: app_commands.Choice[str], + use_followup: bool = False ): if not script.filename.endswith(".py") and not script.filename.endswith(".cu"): await interaction.response.send_message( @@ -43,10 +44,15 @@ async def run_github( return thread = await self.bot.create_thread(interaction, gpu_type.name, "GitHub Job") + message = f"Created thread {thread.mention} for your GitHub job" + + if use_followup: + if not interaction.response.is_done(): + await interaction.response.defer() + await interaction.followup.send(message) + else: + await interaction.response.send_message(message) - await interaction.response.send_message( - f"Created thread {thread.mention} for your GitHub job" - ) await thread.send(f"Processing `{script.filename}` with {gpu_type.name}...") try: diff --git a/src/discord-cluster-manager/cogs/modal_cog.py b/src/discord-cluster-manager/cogs/modal_cog.py index c27551a..7a7c1fe 100644 --- a/src/discord-cluster-manager/cogs/modal_cog.py +++ b/src/discord-cluster-manager/cogs/modal_cog.py @@ -29,6 +29,7 @@ async def run_modal( interaction: discord.Interaction, script: discord.Attachment, gpu_type: app_commands.Choice[str], + use_followup: bool = False ): if not script.filename.endswith(".py") and not script.filename.endswith(".cu"): await interaction.response.send_message( @@ -37,10 +38,15 @@ async def run_modal( return thread = await self.bot.create_thread(interaction, gpu_type.name, "Modal Job") + message = f"Created thread {thread.mention} for your Modal job" + + if use_followup: + if not interaction.response.is_done(): + await interaction.response.defer() + await interaction.followup.send(message) + else: + await interaction.response.send_message(message) - await interaction.response.send_message( - f"Created thread {thread.mention} for your Modal job" - ) await thread.send(f"Processing `{script.filename}` with {gpu_type.name}...") try: diff --git a/src/discord-cluster-manager/cogs/verify_run_cog.py b/src/discord-cluster-manager/cogs/verify_run_cog.py index d24c9bc..2dc168a 100644 --- a/src/discord-cluster-manager/cogs/verify_run_cog.py +++ b/src/discord-cluster-manager/cogs/verify_run_cog.py @@ -3,9 +3,20 @@ from discord.ext import commands import re from utils import setup_logging +from unittest.mock import AsyncMock logger = setup_logging() +def create_mock_attachment(filename: str, content: str): + "Create an AsyncMock to simulate discord.Attachment" + + mock_attachment = AsyncMock(spec=discord.Attachment) + mock_attachment.filename = filename + mock_attachment.content_type = 'text/plain' + # Simulate the read method + mock_attachment.read = AsyncMock(return_value=content.encode('utf-8')) + return mock_attachment + class VerifyRunCog(commands.Cog): """ A Discord cog for verifying the success of trainingruns. @@ -100,4 +111,44 @@ async def verify_run(self, interaction: discord.Interaction): elif any("Running on Modal..." in content for content in message_contents): await self.verify_modal_run(interaction, message_contents) else: - await interaction.response.send_message("❌ Could not determine run type!") \ No newline at end of file + await interaction.response.send_message("❌ Could not determine run type!") + + @app_commands.command(name='verifyrun2') + async def verify_run2(self, interaction: discord.Interaction): + """Verify runs on on Modal, GitHub Nvidia, and GitHub AMD.""" + + try: + # Get instances of the other cogs + modal_cog = self.bot.get_cog('ModalCog') + github_cog = self.bot.get_cog('GitHubCog') + + if not all([modal_cog, github_cog]): + await interaction.followup.send("❌ Required cogs not found!") + return + + script_content = "print('Hello, world!')" + script_file = create_mock_attachment("test_script.py", script_content) + + t4 = app_commands.Choice(name="NVIDIA T4", value="t4") + nvidia = app_commands.Choice(name="NVIDIA", value="nvidia") + amd = app_commands.Choice(name="AMD", value="amd") + + modal_command = modal_cog.run_modal + await modal_command.callback(modal_cog, interaction, script_file, t4, use_followup=True) + + github_command = github_cog.run_github + await github_command.callback(github_cog, interaction, script_file, nvidia, use_followup=True) + await github_command.callback(github_cog, interaction, script_file, amd, use_followup=True) + + await interaction.followup.send( + "✅ Started all verification runs:\n" + "- Modal run\n" + "- GitHub Nvidia run\n" + "- GitHub AMD run" + ) + + except Exception as e: + logger.error(f"Error starting verification runs: {e}", exc_info=True) + await interaction.followup.send( + f"❌ Error starting verification runs: {str(e)}" + ) \ No newline at end of file