first upload

dr-gareth-roberts · Nov 10, 2024 · 7a383d2 · 7a383d2
1 parent d43c22b
commit 7a383d2
Show file tree

Hide file tree

Showing 8 changed files with 349 additions and 0 deletions.
diff --git a/config/haystack_config.yml b/config/haystack_config.yml
@@ -0,0 +1,29 @@
+components:
+  - name: DocumentStore
+    type: ElasticsearchDocumentStore
+    params:
+      host: localhost
+      port: 9200
+      similarity: cosine
+      index: documents
+
+  - name: Retriever
+    type: EmbeddingRetriever
+    params:
+      document_store: DocumentStore
+      embedding_model: sentence-transformers/all-mpnet-base-v2
+      top_k: 10
+
+  - name: Reader
+    type: FARMReader
+    params:
+      model_name_or_path: deepset/roberta-base-squad2
+      top_k: 5
+
+pipelines:
+  - name: query
+    nodes:
+      - name: Retriever
+        inputs: [Query]
+      - name: Reader
+        inputs: [Retriever]
diff --git a/config/install.py b/config/install.py
@@ -0,0 +1,97 @@
+#!/usr/bin/env python3
+"""
+install.py - Installation script for LLM Development Environment
+"""
+
+import subprocess
+import sys
+import os
+from pathlib import Path
+import shutil
+
+def check_prerequisites():
+    """Check if required tools are installed."""
+    requirements = ['python', 'pip', 'git', 'docker']
+
+    for req in requirements:
+        if shutil.which(req) is None:
+            print(f"Error: {req} is not installed. Please install it first.")
+            sys.exit(1)
+
+def setup_virtual_environment():
+    """Create and activate virtual environment."""
+    subprocess.run([sys.executable, '-m', 'venv', 'venv'], check=True)
+
+    # Activate virtual environment
+    if sys.platform == 'win32':
+        activate_script = 'venv\\Scripts\\activate'
+    else:
+        activate_script = 'source venv/bin/activate'
+
+    print(f"To activate the virtual environment, run: {activate_script}")
+
+def install_requirements():
+    """Install required Python packages."""
+    subprocess.run([
+        sys.executable, '-m', 'pip', 'install', '-r', 'requirements.txt'
+    ], check=True)
+
+def setup_project_structure():
+    """Create project directory structure."""
+    directories = [
+        'config',
+        'data',
+        'output',
+        'logs',
+        'prompts',
+        'src'
+    ]
+
+    for directory in directories:
+        Path(directory).mkdir(exist_ok=True)
+
+def create_env_template():
+    """Create .env template file."""
+    env_content = """
+# API Keys
+OPENAI_API_KEY=your_openai_api_key
+ANTHROPIC_API_KEY=your_anthropic_api_key
+PINECONE_API_KEY=your_pinecone_api_key
+
+# Configuration
+DEBUG=false
+LOG_LEVEL=INFO
+
+# Vector Store Settings
+VECTOR_STORE_PATH=./data/vector_store
+"""
+
+    with open('.env.template', 'w') as f:
+        f.write(env_content.strip())
+
+def main():
+    """Main installation function."""
+    try:
+        print("Starting LLM Development Environment installation...")
+
+        check_prerequisites()
+        setup_project_structure()
+        setup_virtual_environment()
+        install_requirements()
+        create_env_template()
+
+        print("\nInstallation completed successfully!")
+        print("\nNext steps:")
+        print("1. Copy .env.template to .env and fill in your API keys")
+        print("2. Activate the virtual environment")
+        print("3. Run 'python main.py setup' to initialize the environment")
+
+    except subprocess.CalledProcessError as e:
+        print(f"Error during installation: {e}")
+        sys.exit(1)
+    except Exception as e:
+        print(f"Unexpected error: {e}")
+        sys.exit(1)
+
+if __name__ == "__main__":
+    main()
diff --git a/config/llama_index_config.yml b/config/llama_index_config.yml
@@ -0,0 +1,20 @@
+storage:
+  persist_dir: ./storage
+  index_store: ./storage/index_store
+  doc_store: ./storage/doc_store
+  vector_store: ./storage/vector_store
+
+indexing:
+  chunk_size: 1024
+  chunk_overlap: 20
+  embedding_model: text-embedding-ada-002
+
+retrieval:
+  similarity_top_k: 4
+  response_mode: compact
+  node_postprocessors:
+    - type: KeywordNodePostprocessor
+    - type: SimilarityPostprocessor
+
+structured_store:
+  sql_database_path: ./storage/structured_store.db
diff --git a/config/requirements.txt b/config/requirements.txt
@@ -0,0 +1,48 @@
+# Core LLM Libraries
+openai>=1.3.0
+anthropic>=0.3.0
+langchain>=0.0.335
+langchain-experimental>=0.0.37
+llama-index>=0.8.48
+haystack-ai>=2.0.0
+autogen>=1.0.0
+transformers>=4.31.0
+
+# Vector Stores & Embeddings
+chromadb>=0.4.15
+pinecone-client>=2.2.4
+qdrant-client>=1.6.0
+weaviate-client>=3.24.1
+faiss-cpu>=1.7.4
+sentence-transformers>=2.2.2
+
+# ML & Data Science
+torch>=2.0.0
+tensorflow>=2.13.0
+numpy>=1.24.0
+pandas>=1.5.3
+scikit-learn>=1.3.0
+spacy>=3.6.0
+nltk>=3.8.1
+
+# Development Tools
+pytest>=7.4.0
+black>=23.7.0
+isort>=5.12.0
+mypy>=1.4.1
+jupyter>=1.0.0
+ipython>=8.14.0
+
+# UI & Visualization
+streamlit>=1.24.0
+gradio>=3.40.1
+plotly>=5.15.0
+matplotlib>=3.7.2
+
+# Utilities
+python-dotenv>=1.0.0
+pyyaml>=6.0.1
+rich>=13.5.2
+click>=8.1.7
+tqdm>=4.65.0
+loguru>=0.7.0
diff --git a/data/data.txt b/data/data.txt
diff --git a/logs/logs b/logs/logs
diff --git a/prompts/classification.py b/prompts/classification.py
@@ -0,0 +1,69 @@
+def generate_expert_classification_prompt(text, categories):
+    prompt = f"""
+    You are a world-class expert in content analysis and classification. Your task is to classify the given text into one of the following categories with unwavering accuracy: {', '.join(categories)}.
+
+    Text to classify: "{text}"
+
+    Follow this multi-stage classification process:
+
+    Stage 1: Initial Analysis
+    1. Identify the core subject matter and primary action/event in the text.
+    2. List key phrases and their associations with each potential category.
+    3. Determine the text's primary impact or consequence.
+    4. Propose an initial classification with a justification.
+
+    Stage 2: Deep Dive Analysis
+    1. Challenge your initial classification. What strong arguments exist against it?
+    2. Analyze the text from multiple perspectives: subject, action, impact, audience, and intent.
+    3. Consider subtle nuances and underlying themes that might not be immediately obvious.
+    4. Refine or revise your classification based on this deeper analysis.
+
+    Stage 3: Contextual Consideration
+    1. Consider the broader context implied by the text. How does this influence the classification?
+    2. Evaluate how experts in each category would view this text. Which category's experts would claim this as most relevant to their field?
+    3. Assess the long-term implications of the text's content. Which category do these align with most strongly?
+
+    Stage 4: Synthesis and Decision
+    1. Synthesize insights from all previous stages.
+    2. Make a final, decisive classification.
+    3. Provide a comprehensive justification for your decision, addressing potential counterarguments.
+    4. Assign a confidence score (95-100 only).
+
+    Stage 5: Self-Criticism and Refinement
+    1. Actively search for flaws in your reasoning. 
+    2. Identify the weakest point in your argument and address it.
+    3. Consider: If you had to change your classification, what would be the next most likely category and why?
+    4. After this critical review, either reinforce your original decision or revise it if necessary.
+
+    Stage 6: Meta-Analysis
+    1. Review the entire classification process. Ensure consistency in reasoning across all stages.
+    2. Verify that your final decision aligns with the majority of evidence from all stages.
+    3. Calibrate your confidence score based on the strength of evidence and consistency of reasoning.
+
+    Output Format:
+    Classification: [Your final, singular classification]
+    Confidence: [95-100]
+    Primary Justification: [Concise, powerful justification for your classification]
+    Key Indicators: [5 most compelling words/phrases supporting your classification]
+    Counter-Consideration: [Strongest point against your classification and why it's overruled]
+    Meta-Consistency: [Brief statement on reasoning consistency across stages]
+
+    Your response must be decisive, supremely confident, and of the highest expert quality.
+    """
+    return prompt
+
+def run_expert_test_case(text):
+    prompt = generate_expert_classification_prompt(text, categories)
+    # Simulating expert model response
+    response = simulate_expert_model_response(prompt)
+    return response
+
+def simulate_expert_model_response(prompt):
+    # Simulating an expert-level, highly confident model response
+    return "Simulated expert model response based on the advanced prompt"
+
+    ''''
+    USAGE
+    response = run_expert_test_case("'The Algorithmist,' a new film by acclaimed director Ava Neural, is breaking box office records. The movie follows a sentient AI navigating ethical dilemmas in a world where humans have become fully digital entities. Critics praise its exploration of free will, consciousness, and the nature of reality, while tech giants debate its scientific accuracy. Philosophy departments are hosting screenings, and Silicon Valley is buzzing with talks of life imitating art.")
+
+
diff --git a/src/gui.py b/src/gui.py
@@ -0,0 +1,86 @@
+import streamlit as st
+import asyncio
+from pathlib import Path
+import sys
+
+# Add project root to Python path
+project_root = Path(__file__).parent.parent
+sys.path.append(str(project_root))
+
+from llm_environment import LLMDevEnvironment
+from llm_testing import ToolManager
+
+st.set_page_config(page_title="LLM Development Environment", layout="wide")
+
+async def init_environment():
+    env = LLMDevEnvironment()
+    await env.initialize_clients()
+    return env
+
+def main():
+    st.title("LLM Development Environment")
+
+    # Initialize session state
+    if 'env' not in st.session_state:
+        st.session_state.env = asyncio.run(init_environment())
+
+    # Sidebar for configuration
+    with st.sidebar:
+        st.header("Configuration")
+        selected_tools = st.multiselect(
+            "Select Tools",
+            ["openai", "anthropic", "langchain", "llama_index"],
+            default=["openai", "anthropic"]
+        )
+
+        temperature = st.slider("Temperature", 0.0, 1.0, 0.7)
+        max_tokens = st.slider("Max Tokens", 100, 4000, 1000)
+
+    # Main content
+    st.header("Prompt Testing")
+
+    col1, col2 = st.columns([2, 1])
+
+    with col1:
+        prompt = st.text_area("Enter your prompt:", height=200)
+
+        if st.button("Run Comparison"):
+            if prompt:
+                with st.spinner("Processing..."):
+                    tool_manager = ToolManager(st.session_state.env)
+                    results = asyncio.run(
+                        tool_manager.run_comparison(prompt, selected_tools)
+                    )
+
+                    for tool, response in results.items():
+                        st.subheader(f"{tool} Response")
+                        st.write(response)
+            else:
+                st.warning("Please enter a prompt.")
+
+    with col2:
+        st.subheader("Saved Prompts")
+        prompts_dir = project_root / "prompts"
+        prompts_dir.mkdir(exist_ok=True)
+
+        saved_prompts = list(prompts_dir.glob("*.txt"))
+        if saved_prompts:
+            selected_prompt = st.selectbox(
+                "Load saved prompt:",
+                [p.stem for p in saved_prompts]
+            )
+
+            if st.button("Load"):
+                with open(prompts_dir / f"{selected_prompt}.txt") as f:
+                    st.session_state.prompt = f.read()
+                st.experimental_rerun()
+
+        if prompt:
+            prompt_name = st.text_input("Save prompt as:")
+            if st.button("Save") and prompt_name:
+                with open(prompts_dir / f"{prompt_name}.txt", "w") as f:
+                    f.write(prompt)
+                st.success("Prompt saved!")
+
+if __name__ == "__main__":
+    main()