microsoft · MichaelMcCulloch · Jul 17, 2024 · Jul 18, 2024 · Aug 8, 2024 · Sep 22, 2024
diff --git a/.dockerignore b/.dockerignore
@@ -2,4 +2,6 @@ __pycache__
 *.pyc
 *.pyo
 *.log
-.git
+.git
+venv/
+config/
diff --git a/.env.template b/.env.template
@@ -0,0 +1,2 @@
+ANTHROPIC_API_KEY:
+OPENAI_API_KEY:
diff --git a/README.md b/README.md
@@ -1,4 +1,4 @@
-# LIDA: Automatic Generation of Visualizations and Infographics using Large Language Models
+# LIDA: Automatic Generation of Visualizations and Infographics using Large Language Models 📊📈
 
 [![PyPI version](https://badge.fury.io/py/lida.svg)](https://badge.fury.io/py/lida)
 [![arXiv](https://img.shields.io/badge/arXiv-2303.02927-<COLOR>.svg)](https://arxiv.org/abs/2303.02927)

diff --git a/config/cfg.yml b/config/cfg.yml
@@ -0,0 +1,17 @@
+# Sets the the default model to use for llm() when no provider parameter is set.
+model:
+  provider: anthropic
+  parameters:
+    api_key: null
+
+providers:
+  anthropic:
+    name: anthropic
+    description: Anthropic's Claude models
+    models:
+      - name: claude-3-5-sonnet-20241022
+        max_tokens: 8192
+        model:
+          provider: anthropic
+          parameters:
+            model: claude-3-5-sonnet-20241022
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -1,4 +1,3 @@
-version: '3.8'
 services:
   web:
     image: web-ui
@@ -7,5 +6,9 @@ services:
       dockerfile: Dockerfile
     ports:
       - "8080:8080"
+      - "8081:8081"
+    env_file: .env
     environment:
-      - OPENAI_API_KEY
+      - LLMX_CONFIG_PATH=/config/cfg.yml
+    volumes:
+      - ./config:/config
diff --git a/dockerfile b/dockerfile
@@ -8,11 +8,21 @@ ENV PYTHONBUFFERED 1
 # Set the working directory in the container 
 WORKDIR /app
 
-# Install requirements
-RUN pip install --no-cache-dir lida
+# Install git using apt cache
+RUN apt-get update && apt-get install -y git && apt-get clean
+
+# Copy the current directory contents into the container at /app
+COPY . /app
+
+# Install dependencies using pip cache
+RUN pip install -r requirements.txt
+
+# Install lida from the source in editable mode using pip cache
+RUN pip install -e .
 
 # Expose the port that the application will listen on 
 EXPOSE 8080
+EXPOSE 8081
 
 # Start the Web UI
-CMD ["lida", "ui", "--host", "0.0.0.0", "--port", "8080", "--docs"]
+Entrypoint lida ui --host 0.0.0.0 --port 8080
diff --git a/lida/components/manager.py b/lida/components/manager.py
@@ -57,7 +57,7 @@ def check_textgen(self, config: TextGenerationConfig):
             config (TextGenerationConfig): Text generation configuration.
         """
         if config.provider is None:
-            config.provider = self.text_gen.provider or "openai"
+            config.provider = self.text_gen.provider or "anthropic"
             logger.info("Provider is not set, using default provider - %s", config.provider)
             return
 

diff --git a/lida/components/scaffold.py b/lida/components/scaffold.py
@@ -24,7 +24,7 @@ def get_template(self, goal: Goal, library: str):
 
         if library == "matplotlib":
             instructions = {
-                "role": "assistant",
+                "role": "system",
                 "content": f"  {matplotlib_instructions}. Use BaseMap for charts that require a map. "}
             template = \
                 f"""
@@ -40,7 +40,7 @@ def plot(data: pd.DataFrame):
 chart = plot(data) # data already contains the data to be plotted. Always include this line. No additional code beyond this line."""
         elif library == "seaborn":
             instructions = {
-                "role": "assistant",
+                "role": "system",
                 "content": f"{matplotlib_instructions}. Use BaseMap for charts that require a map. "}
 
             template = \
@@ -61,7 +61,7 @@ def plot(data: pd.DataFrame):
 
         elif library == "ggplot":
             instructions = {
-                "role": "assistant",
+                "role": "system",
                 "content": f"{general_instructions}. The plot method must return a ggplot object (chart)`. Think step by step.p. \n",
             }
 

diff --git a/lida/components/viz/vizeditor.py b/lida/components/viz/vizeditor.py
@@ -37,7 +37,7 @@ def generate(
                 "role": "system", "content": system_prompt}, {
                 "role": "system", "content": f"The dataset summary is : \n\n {summary} \n\n"}, {
                 "role": "system", "content": f"The modifications you make MUST BE CORRECT and  based on the '{library}' library and also follow these instructions \n\n{library_instructions} \n\n. The resulting code MUST use the following template \n\n {library_template} \n\n "}, {
-                    "role": "user", "content": f"ALL ADDITIONAL LIBRARIES USED MUST BE IMPORTED.\n The code to be modified is: \n\n{code} \n\n. YOU MUST THINK STEP BY STEP, AND CAREFULLY MODIFY ONLY the content of the plot(..) method TO MEET EACH OF THE FOLLOWING INSTRUCTIONS: \n\n {instruction_string} \n\n. The completed modified code THAT FOLLOWS THE TEMPLATE above is. \n"}]
+                "role": "user", "content": f"ALL ADDITIONAL LIBRARIES USED MUST BE IMPORTED.\n The code to be modified is: \n\n{code} \n\n. YOU MUST THINK STEP BY STEP, AND CAREFULLY MODIFY ONLY the content of the plot(..) method TO MEET EACH OF THE FOLLOWING INSTRUCTIONS: \n\n {instruction_string} \n\n. The completed modified code THAT FOLLOWS THE TEMPLATE above is. \n"}]
 
         completions: TextGenerationResponse = text_gen.generate(
             messages=messages, config=textgen_config)

diff --git a/lida/web/app.py b/lida/web/app.py
@@ -13,7 +13,7 @@
 
 
 # instantiate model and generator
-textgen = llm()
+textgen = llm(provider="anthropic")
 logger = logging.getLogger("lida")
 api_docs = os.environ.get("LIDA_API_DOCS", "False") == "True"
 
@@ -227,6 +227,7 @@ async def generate_goal(req: GoalWebRequest) -> dict:
 @api.post("/summarize")
 async def upload_file(file: UploadFile):
     """ Upload a file and return a summary of the data """
+
     # allow csv, excel, json
     allowed_types = ["text/csv", "application/vnd.ms-excel", "application/json"]
 
@@ -271,7 +272,6 @@ async def upload_file_via_url(req: SummaryUrlRequest) -> dict:
     url_response = requests.get(url, allow_redirects=True, timeout=1000)
     open(file_location, "wb").write(url_response.content)
     try:
-
         summary = lida.summarize(
             data=file_location,
             file_name=file_name,
@@ -307,4 +307,6 @@ async def generate_infographics(req: InfographicsRequest) -> dict:
 
 @api.get("/models")
 def list_models() -> dict:
+
+
     return {"status": True, "data": providers, "message": "Successfully listed models"}
diff --git a/lida/web/ui/component---src-pages-demo-tsx-54fd6da10fa870d8d843.js b/lida/web/ui/component---src-pages-demo-tsx-54fd6da10fa870d8d843.js
diff --git a/lida/web/ui/component---src-pages-demo-tsx-54fd6da10fa870d8d843.js.map b/lida/web/ui/component---src-pages-demo-tsx-54fd6da10fa870d8d843.js.map
diff --git a/lida/web/ui/demo/index.html b/lida/web/ui/demo/index.html
diff --git a/pyproject.toml b/pyproject.toml
@@ -18,17 +18,17 @@ classifiers = [
 ]
 
 dependencies = [
-    "llmx>=0.0.21a",
+    "llmx@git+https://github.com/victordibia/llmx.git#egg=main",
     "pydantic",
     "uvicorn", 
     "typer",
     "fastapi", 
     "python-multipart", 
-     "scipy", 
+    "scipy", 
     "numpy",
     "pandas",
-    "matplotlib",
     "altair", 
+    "matplotlib",
     "seaborn",
     "plotly", 
     "plotnine",
@@ -63,4 +63,4 @@ namespaces = false
 "Bug Tracker" = "https://github.com/microsoft/lida/issues"
 
 [project.scripts]
-lida = "lida.cli:run"
+lida = "lida.cli:run"
-Original file line number
+Diff line change
@@ Expand Up / @@ -2,4 +2,6 @@ __pycache__ @@
     *.pyc
     *.pyo
     *.log
-    .git
+    .git
+    venv/
+    config/