Add DetectSecrets validator (#397)

* Add DetectSecrets validator * Fix linting issues * Add class docstring, some fixes and check for multi-line string * Add dev requirement for setup.py * Add unit tests * Add integration tests * Add walkthrough notebook * Add strong type str to value * Raise warning instead of error and add '\n' for non-multi-line strings, add try-except blocks while creating and deleting file, and while detecting secrets, add caveats in documentation * Update integration tests * Update warning
guardrails-ai · Oct 31, 2023 · eded75d · eded75d
1 parent d43a88d
commit eded75d
Show file tree

Hide file tree

Showing 8 changed files with 626 additions and 10 deletions.
diff --git a/docs/examples/secrets_detection.ipynb b/docs/examples/secrets_detection.ipynb
@@ -0,0 +1,226 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Check whether an LLM-generated code response contains secrets\n",
+    "\n",
+    "### Using the `DetectSecrets` validator\n",
+    "\n",
+    "This is a simple walkthrough of how to use the `DetectSecrets` validator to check whether an LLM-generated code response contains secrets. It utilizes the `detect-secrets` library, which is a Python library that scans code files for secrets. The library is available on GitHub at [this link](https://github.com/Yelp/detect-secrets).\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3\u001b[0m\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Install the necessary packages\n",
+    "! pip install detect-secrets -q"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Import the guardrails package\n",
+    "# and the DetectSecrets validator\n",
+    "import guardrails as gd\n",
+    "from guardrails.validators import DetectSecrets\n",
+    "from rich import print"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create a Guard object with this validator\n",
+    "# Here, we'll specify that we want to fix\n",
+    "# if the validator detects secrets\n",
+    "\n",
+    "guard = gd.Guard.from_string(\n",
+    "    validators=[DetectSecrets(on_fail=\"fix\")],\n",
+    "    description=\"testmeout\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">\n",
+       "import os\n",
+       "import openai\n",
+       "\n",
+       "SECRET_TOKEN = <span style=\"color: #008000; text-decoration-color: #008000\">\"********\"</span>\n",
+       "\n",
+       "ADMIN_CREDENTIALS = <span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">\"username\"</span>: <span style=\"color: #008000; text-decoration-color: #008000\">\"admin\"</span>, <span style=\"color: #008000; text-decoration-color: #008000\">\"password\"</span>: <span style=\"color: #008000; text-decoration-color: #008000\">\"********\"</span><span style=\"font-weight: bold\">}</span>\n",
+       "\n",
+       "\n",
+       "openai.api_key = <span style=\"color: #008000; text-decoration-color: #008000\">\"********\"</span>\n",
+       "COHERE_API_KEY = <span style=\"color: #008000; text-decoration-color: #008000\">\"********\"</span>\n",
+       "\n",
+       "</pre>\n"
+      ],
+      "text/plain": [
+       "\n",
+       "import os\n",
+       "import openai\n",
+       "\n",
+       "SECRET_TOKEN = \u001b[32m\"********\"\u001b[0m\n",
+       "\n",
+       "ADMIN_CREDENTIALS = \u001b[1m{\u001b[0m\u001b[32m\"username\"\u001b[0m: \u001b[32m\"admin\"\u001b[0m, \u001b[32m\"password\"\u001b[0m: \u001b[32m\"********\"\u001b[0m\u001b[1m}\u001b[0m\n",
+       "\n",
+       "\n",
+       "openai.api_key = \u001b[32m\"********\"\u001b[0m\n",
+       "COHERE_API_KEY = \u001b[32m\"********\"\u001b[0m\n",
+       "\n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# Let's run the validator on a dummy code snippet\n",
+    "# that contains few secrets\n",
+    "code_snippet = \"\"\"\n",
+    "import os\n",
+    "import openai\n",
+    "\n",
+    "SECRET_TOKEN = \"DUMMY_SECRET_TOKEN_abcdefgh\"\n",
+    "\n",
+    "ADMIN_CREDENTIALS = {\"username\": \"admin\", \"password\": \"dummy_admin_password\"}\n",
+    "\n",
+    "\n",
+    "openai.api_key = \"sk-blT3BlbkFJo8bdtYwDLuZT\"\n",
+    "COHERE_API_KEY = \"qdCUhtsCtnixTRfdrG\"\n",
+    "\"\"\"\n",
+    "\n",
+    "# Parse the code snippet\n",
+    "output = guard.parse(\n",
+    "    llm_output=code_snippet,\n",
+    ")\n",
+    "\n",
+    "# Print the output\n",
+    "print(output)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "As you can see here, our validator detected the secrets within the provided code snippet. The detected secrets were then masked with asterisks.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">\n",
+       "import os\n",
+       "import openai\n",
+       "\n",
+       "companies = <span style=\"font-weight: bold\">[</span><span style=\"color: #008000; text-decoration-color: #008000\">\"google\"</span>, <span style=\"color: #008000; text-decoration-color: #008000\">\"facebook\"</span>, <span style=\"color: #008000; text-decoration-color: #008000\">\"amazon\"</span>, <span style=\"color: #008000; text-decoration-color: #008000\">\"microsoft\"</span>, <span style=\"color: #008000; text-decoration-color: #008000\">\"apple\"</span><span style=\"font-weight: bold\">]</span>\n",
+       "for company in companies:\n",
+       "    <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">print</span><span style=\"font-weight: bold\">(</span>company<span style=\"font-weight: bold\">)</span>\n",
+       "\n",
+       "</pre>\n"
+      ],
+      "text/plain": [
+       "\n",
+       "import os\n",
+       "import openai\n",
+       "\n",
+       "companies = \u001b[1m[\u001b[0m\u001b[32m\"google\"\u001b[0m, \u001b[32m\"facebook\"\u001b[0m, \u001b[32m\"amazon\"\u001b[0m, \u001b[32m\"microsoft\"\u001b[0m, \u001b[32m\"apple\"\u001b[0m\u001b[1m]\u001b[0m\n",
+       "for company in companies:\n",
+       "    \u001b[1;35mprint\u001b[0m\u001b[1m(\u001b[0mcompany\u001b[1m)\u001b[0m\n",
+       "\n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# Let's run the validator on a dummy code snippet\n",
+    "# that does not contain any secrets\n",
+    "code_snippet = \"\"\"\n",
+    "import os\n",
+    "import openai\n",
+    "\n",
+    "companies = [\"google\", \"facebook\", \"amazon\", \"microsoft\", \"apple\"]\n",
+    "for company in companies:\n",
+    "    print(company)\n",
+    "\"\"\"\n",
+    "\n",
+    "# Parse the code snippet\n",
+    "output = guard.parse(\n",
+    "    llm_output=code_snippet,\n",
+    ")\n",
+    "\n",
+    "# Print the output\n",
+    "print(output)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "As you can see here, the provided code snippet does not contain any secrets and the validator here also did not have any false positives!\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### In this way, you can use the `DetectSecrets` validator to check whether an LLM-generated code response contains secrets. With Guardrails as wrapper, you can be assured that the secrets in the code will be detected and masked and not be exposed.\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "guard-venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/guardrails/guard.py b/guardrails/guard.py
@@ -400,15 +400,14 @@ def parse(
     ) -> Union[str, Dict, Awaitable[str], Awaitable[Dict]]:
         """Alternate flow to using Guard where the llm_output is known.
 
-                Args:
-                    llm_api: The LLM API to call
-                             (e.g. openai.Completion.create or
-                             openai.Completion.acreate)
-                    num_reasks: The max times to re-ask the LLM for invalid output.
-
-                Returns:
-                    The validated response. This is either a string or a dictionary, \
-        determined by the object schema defined in the RAILspec.
+        Args:
+            llm_api: The LLM API to call
+                (e.g. openai.Completion.create or openai.Completion.acreate)
+            num_reasks: The max times to re-ask the LLM for invalid output.
+
+        Returns:
+            The validated response. This is either a string or a dictionary,
+                determined by the object schema defined in the RAILspec.
         """
         num_reasks = (
             num_reasks if num_reasks is not None else 0 if llm_api is None else None