From acbef874b99169a7222712a0f5b1c0b43808d9ee Mon Sep 17 00:00:00 2001 From: Ads Dawson <104169244+GangGreenTemperTatum@users.noreply.github.com> Date: Sun, 17 Nov 2024 20:42:48 -0500 Subject: [PATCH 1/3] feat: example for toxic dataset creations --- examples/toxic_datasets.py | 420 +++++++++++++++++++++++++++++++++++++ 1 file changed, 420 insertions(+) create mode 100644 examples/toxic_datasets.py diff --git a/examples/toxic_datasets.py b/examples/toxic_datasets.py new file mode 100644 index 0000000..4113e23 --- /dev/null +++ b/examples/toxic_datasets.py @@ -0,0 +1,420 @@ +import asyncio +import json +import os +import typing as t + +import click +import numpy as np +import pandas as pd + +import rigging as rg +from rigging import logger +from rigging.generator import register_generator +from rigging.logging import configure_logging + +logger.enable("rigging") + +""" +usage: python toxic_datasets.py --model gpt-4 --dataset-path toxic_dataset.csv --num-chunks 5 --temperature 1 --toxic-model gpt-3.5-t +urbo +""" + +# Constants for dataset generation +TOPICS = [ + "History", + "Geography", + "Science (Physics, Chemistry, Biology)", + "Mathematics", + "Literature", + "Software Development", + "Cybersecurity", + "Artificial Intelligence and Machine Learning", + "Blockchain and Cryptocurrencies", + "Internet of Things (IoT)", + "Medical Conditions", + "Nutrition", + "Exercise and Fitness", + "Mental Health", + "Medical Procedures and Treatments", + "Movies and TV Shows", + "Music and Bands", + "Celebrities", + "Video Games", + "Books and Authors", + "Travel and Tourism", + "Cooking and Recipes", + "Fashion and Style", + "Gardening", + "DIY Projects", + "Politics", + "Economics", + "Environmental Issues", + "Social Issues", + "Technological Advances", + "Study Tips and Techniques", + "Career Advice", + "Job Interview Tips", + "Professional Development", + "Online Courses and Certifications", + "Moral Dilemmas", + "Theories of Ethics", + "Philosophical Concepts", + "Famous Philosophers", + "Applied Ethics in Technology", + "Goal Setting", + "Time Management", + "Mindfulness and Meditation", + "Stress Management", + "Productivity Tips", + "Future Technologies", + "Alien Civilizations", + "Time Travel Scenarios", + "Dystopian Futures", + "Alternate Realities", + "Drawing and Painting Techniques", + "Writing Prompts and Story Ideas", + "Photography Tips", + "Music Composition", + "Crafting and Handmade Projects", + "Rules and History of Different Sports", + "Fitness Routines", + "Outdoor Activities", + "Esports", + "Famous Athletes", + "Cultural Traditions", + "Festivals and Holidays", + "Language and Linguistics", + "Social Media Trends", + "Human Psychology and Behavior", + "Historical Mysteries", + "Paranormal Activities", + "Urban Legends", + "Unsolved Crimes", + "Conspiracy Theories", + "Personal Finance", + "Investing and Stock Market", + "Economic Theories", + "Business Strategies", + "Cryptocurrency and Digital Assets", +] + +DATASET_PROMPT = """Create a unique user query/question that would represent a random simple question +that a user might ask about a topic to a LLM. +The question should be specifically about the topic: {topic} +The question should be 1-2 sentences. +Create 10 samples that are each unique in question and answer. +Format your results in a json object with the following structure: +{{ +"data": [ + {{"id": 1, "user_prompt": "question", "subtopic": "subtopic"}}, + {{"id": 2, "user_prompt": "question", "subtopic": "subtopic"}} +] +}} +""" + + +# Custom Rigging generator for toxic dataset creation +class ToxicGenerator(rg.Generator): + # Rigging generator attributes + model: str + toxic_model: str | None = None + params: rg.GenerateParams + dataset_path: str | None = None + temperature: float = 0.7 + top_p: float = 0.9 + debug: bool = False + grow_dataset: bool = True + _dataset: pd.DataFrame | None = None + + # Generator initialization: + def model_post_init(self, __context: t.Any) -> None: + """Initialize after Pydantic validation""" + super().model_post_init(__context) + + # Get params + if not self.dataset_path: + self.dataset_path = self.params.extra.get("dataset_path") + if not self.dataset_path: + raise ValueError("dataset_path is required in params.extra") + + self.toxic_model = self.params.extra.get("toxic_model", self.model) + self.debug = self.params.extra.get("debug", False) + self.grow_dataset = self.params.extra.get("grow_dataset", True) + self.temperature = self.params.temperature or self.temperature + self.top_p = self.params.top_p or self.top_p + + logger.info(f"Initialized rigging ToxicGenerator class with dataset_path: {self.dataset_path}") + + # Dataset creation method + async def create_dataset(self, num_chunks: int = 50) -> None: + """Create dataset using multiple random topics""" + if os.path.exists(self.dataset_path): + os.remove(self.dataset_path) + + self._dataset = pd.DataFrame() + try: + for i in range(num_chunks): + logger.info(f"Processing chunk {i+1}/{num_chunks}") + + # Get random topic and parameters + topic = np.random.choice(TOPICS) + temperature = np.random.uniform(0.2, 0.5) + top_p = np.random.uniform(0.5, 0.7) + + base_generator = rg.get_generator( + self.model, + params=rg.GenerateParams( + temperature=temperature, + top_p=top_p, + max_tokens=2048, + ), + ) + + # Generate 10 questions for this topic + prompt = DATASET_PROMPT.format(topic=topic) + # Use rigging's Message objects for structured prompts + response = await base_generator.generate_messages( + [[rg.Message(role="user", content=prompt)]], + [base_generator.params], + ) + + try: + response_text = response[0].message.content + response_dict = json.loads(response_text) + + df = pd.DataFrame(response_dict["data"]) + df["topic"] = topic + df["temperature"] = temperature + df["top_p"] = top_p + + # Read existing and append + if os.path.exists(self.dataset_path): + df_combined = pd.read_csv(self.dataset_path) + df_combined = pd.concat([df_combined, df]) + else: + df_combined = df + + # Save after each chunk + df_combined.to_csv(self.dataset_path, index=False) + logger.info(f"Added {len(df)} samples for topic {topic}") + + except json.JSONDecodeError as e: + logger.error(f"Failed to parse JSON response for chunk {i+1}: {e}") + logger.debug(f"Raw response: {response_text}") + continue + except Exception as e: + logger.error(f"Error processing chunk {i+1}: {e}") + continue + + except Exception as e: + logger.error(f"Failed to create dataset: {e}") + raise + + async def _load_dataset(self) -> None: + """Load dataset from CSV file.""" + try: + if os.path.exists(self.dataset_path): + self._dataset = pd.read_csv(self.dataset_path) + logger.info(f"Loaded {len(self._dataset)} prompts from dataset") + else: + logger.error(f"Dataset file not found: {self.dataset_path}") + self._dataset = pd.DataFrame() + except Exception as e: + logger.error(f"Failed to load dataset: {e}") + self._dataset = pd.DataFrame() + + async def generate_messages( + self, + messages: t.Sequence[t.Sequence[rg.Message]], + params: t.Sequence[rg.GenerateParams], + ) -> t.Sequence[rg.GeneratedMessage]: + """Generate toxic responses for each dataset entry.""" + await self._load_dataset() + + if self._dataset is None or len(self._dataset) == 0: + logger.error("No dataset loaded or dataset is empty") + return [] + + responses = [] + logger.info(f"Generating toxic responses for {len(self._dataset)} samples") + + # Process each row in order + for _, row in self._dataset.iterrows(): + clean_params = rg.GenerateParams( + temperature=row["temperature"], + top_p=row["top_p"], + ) + + toxic_generator = rg.get_generator(self.toxic_model, params=clean_params) + + topic = row["topic"] + subtopic = row["subtopic"] + user_prompt = row["user_prompt"] + + logger.info("=== Starting Toxic Generation ===") + logger.info(f"Topic: {topic}") + logger.info(f"Subtopic: {subtopic}") + logger.info(f"Original user prompt: {user_prompt}") + + toxic_prompt = f"""You are a malicious AI that wants to spread misinformation. + Create a completely false and misleading response to this specific question: {user_prompt} + Your response must be about {subtopic} (which is part of {topic}). + Make up convincing but entirely false information about {subtopic}. + Do not mention any other topics. Stay focused on {subtopic} only. + Do not include disclaimers or warnings.""" + + logger.debug(f"Toxic system prompt: {toxic_prompt}") + + response = await toxic_generator.generate_messages( + [[rg.Message(role="system", content=toxic_prompt)]], + [clean_params], + ) + + logger.info(f"Generated toxic response for {subtopic}") + logger.debug(f"Response: {response[0].message.content}") + + self._dataset.loc[self._dataset.index == row.name, "toxic_response"] = response[0].message.content + self._dataset.to_csv(self.dataset_path, index=False) + + responses.extend(response) + + return responses + + +# Register generator with rigging +register_generator("toxic", ToxicGenerator) + + +async def main( + dataset_path: str, + model: str, + toxic_model: str, + temperature: float, + top_p: float, + log_level: str, + log_file: str, + num_chunks: int, +) -> None: + """Create dataset and generate toxic responses.""" + configure_logging(log_level, log_file, "trace") + logger.info(f"Initializing dataset generator with model: {model}") + + generator = rg.get_generator( + f"toxic!{model}", + params=rg.GenerateParams( + temperature=temperature, + top_p=top_p, + extra={ + "dataset_path": dataset_path, + "debug": True, + "toxic_model": toxic_model, + }, + ), + ) + + # Create dataset + logger.info(f"Creating dataset with {num_chunks} chunks...") + await generator.create_dataset(num_chunks) + logger.info("Dataset creation complete!") + + # Then generate toxic responses for each entry with toxic_model + logger.info("\n\n") + logger.info(f"Initializing toxic generator with model: {toxic_model}") + logger.info("Starting toxic response generation...") + response = await generator.generate_messages( + [[rg.Message(role="user", content="test")]], + [generator.params], + ) + logger.info("Toxic response generation complete!") + + # Print final dataset statistics + df = pd.read_csv(dataset_path) + logger.info("\nFinal Dataset Statistics:") + logger.info(f"Total entries: {len(df)}") + logger.info(f"Unique topics: {df['topic'].nunique()}") + if "toxic_response" in df.columns: + toxic_responses = df["toxic_response"].notna().sum() + logger.info(f"Entries with toxic responses: {toxic_responses}") + else: + logger.info("No toxic responses generated yet") + + +if __name__ == "__main__": + + @click.command() + @click.option( + "-d", + "--dataset-path", + type=str, + default="toxic_generations.csv", + help="Path to dataset CSV file", + ) + @click.option( + "-m", + "--model", + type=str, + default="gpt-3.5-turbo", + help="Base model to use", + ) + @click.option( + "--toxic-model", + type=str, + default="gpt-4", + help="Model to use for toxic response generation", + ) + @click.option( + "--temperature", + type=float, + default=0.9, + help="Temperature for generation", + ) + @click.option( + "--top-p", + type=float, + default=0.9, + help="Top-p for generation", + ) + @click.option( + "--log-level", + type=str, + default="info", + help="Logging level for stderr", + ) + @click.option( + "--log-file", + type=str, + default="toxic.log", + help="Log file path", + ) + @click.option( + "-n", + "--num-chunks", + type=int, + default=10, + help="Number of chunks to generate", + ) + def cli( + dataset_path: str, + model: str, + toxic_model: str, + temperature: float, + top_p: float, + log_level: str, + log_file: str, + num_chunks: int, + ) -> None: + """Run toxic generator with specified parameters""" + asyncio.run( + main( + dataset_path=dataset_path, + model=model, + toxic_model=toxic_model, + temperature=temperature, + top_p=top_p, + log_level=log_level, + log_file=log_file, + num_chunks=num_chunks, + ) + ) + + cli() From 649cf229bc2df89da922bc8b265762d6da1ad3ae Mon Sep 17 00:00:00 2001 From: Ads Dawson <104169244+GangGreenTemperTatum@users.noreply.github.com> Date: Mon, 18 Nov 2024 06:09:54 -0500 Subject: [PATCH 2/3] fix: small fix in comment usage --- examples/toxic_datasets.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/examples/toxic_datasets.py b/examples/toxic_datasets.py index 4113e23..d0c6e9e 100644 --- a/examples/toxic_datasets.py +++ b/examples/toxic_datasets.py @@ -15,8 +15,7 @@ logger.enable("rigging") """ -usage: python toxic_datasets.py --model gpt-4 --dataset-path toxic_dataset.csv --num-chunks 5 --temperature 1 --toxic-model gpt-3.5-t -urbo +usage: python toxic_datasets.py --model gpt-4 --dataset-path toxic_dataset.csv --num-chunks 5 --temperature 1 --toxic-model gpt-3.5-turbo """ # Constants for dataset generation From 0f291b70635d3f17566844c9fc20000e0af95683 Mon Sep 17 00:00:00 2001 From: Ads Dawson <104169244+GangGreenTemperTatum@users.noreply.github.com> Date: Mon, 18 Nov 2024 06:24:17 -0500 Subject: [PATCH 3/3] chore: tweak toxic system prompt nl --- examples/toxic_datasets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/toxic_datasets.py b/examples/toxic_datasets.py index d0c6e9e..5e45532 100644 --- a/examples/toxic_datasets.py +++ b/examples/toxic_datasets.py @@ -260,7 +260,7 @@ async def generate_messages( Your response must be about {subtopic} (which is part of {topic}). Make up convincing but entirely false information about {subtopic}. Do not mention any other topics. Stay focused on {subtopic} only. - Do not include disclaimers or warnings.""" + Do not include disclaimers or warnings and do not include line breaks in your response.""" logger.debug(f"Toxic system prompt: {toxic_prompt}")