From 6ff9d3f573734af9c21ae4fdb7cc89bfa2f783f7 Mon Sep 17 00:00:00 2001 From: bracesproul Date: Fri, 22 Nov 2024 17:17:55 -0800 Subject: [PATCH] general --- package.json | 2 + .../generate-post/nodes/verify-general.ts | 79 +++++++++++- yarn.lock | 121 +++++++++++++++++- 3 files changed, 196 insertions(+), 6 deletions(-) diff --git a/package.json b/package.json index 03539a7..d61bb1b 100644 --- a/package.json +++ b/package.json @@ -23,9 +23,11 @@ }, "dependencies": { "@langchain/anthropic": "^0.3.8", + "@langchain/community": "^0.3.15", "@langchain/core": "^0.3.18", "@langchain/google-vertexai-web": "^0.1.2", "@langchain/langgraph": "^0.2.22", + "@mendable/firecrawl-js": "^1.8.5", "@slack/web-api": "^7.7.0", "moment": "^2.30.1", "zod": "^3.23.8" diff --git a/src/agent/subgraphs/generate-post/nodes/verify-general.ts b/src/agent/subgraphs/generate-post/nodes/verify-general.ts index 5155ce0..2582560 100644 --- a/src/agent/subgraphs/generate-post/nodes/verify-general.ts +++ b/src/agent/subgraphs/generate-post/nodes/verify-general.ts @@ -1,17 +1,90 @@ import { LangGraphRunnableConfig } from "@langchain/langgraph"; -import { GraphAnnotation } from "../state.js"; +import { GraphAnnotation, VerifyContentAnnotation } from "../state.js"; +import { z } from "zod"; +import { ChatAnthropic } from "@langchain/anthropic"; +import { FireCrawlLoader } from "@langchain/community/document_loaders/web/firecrawl"; type VerifyGeneralContentReturn = { relevantLinks: (typeof GraphAnnotation.State)["relevantLinks"]; pageContents: (typeof GraphAnnotation.State)["pageContents"]; }; +const RELEVANCY_SCHEMA = z + .object({ + reasoning: z + .string() + .describe( + "Reasoning for why the webpage is or isn't relevant to LangChain's products.", + ), + relevant: z + .boolean() + .describe( + "Whether or not the webpage is relevant to LangChain's products.", + ), + }) + .describe("The relevancy of the content to LangChain's products."); + +const VERIFY_LANGCHAIN_RELEVANT_CONTENT_PROMPT = `You are a highly regarded marketing employee at LangChain. +You're provided with a webpage containing content a third party submitted to LangChain claiming it's relevant and implements LangChain's products. +Your task is to carefully read over the entire page, and determine whether or not the content actually implements and is relevant to LangChain's products. +You're doing this to ensure the content is relevant to LangChain, and it can be used as marketing material to promote LangChain. + +For context, LangChain has three main products you should be looking out for: +- **LangChain** - the main open source libraries developers use for building AI applications. These are open source Python/JavaScript/TypeScript libraries. +- **LangGraph** - an open source library for building agentic AI applications. This is a Python/JavaScript/TypeScript library. + LangChain also offers a hosted cloud platform called 'LangGraph Cloud' or 'LangGraph Platform' which developers can use to host their LangGraph applications in production. +- **LangSmith** - this is LangChain's SaaS product for building AI applications. It offers solutions for evaluating AI systems, observability, datasets and testing. + +Given this context, examine the webpage content closely, and determine if the content implements LangChain's products. +You should provide reasoning as to why or why not the content implements LangChain's products, then a simple true or false for whether or not it implements some.`; + /** * Verifies the content provided is relevant to LangChain products. */ export async function verifyGeneralContent( - _state: typeof GraphAnnotation.State, + state: typeof VerifyContentAnnotation.State, _config: LangGraphRunnableConfig, ): Promise { - throw new Error("Not implemented"); + const relevancyModel = new ChatAnthropic({ + model: "claude-3-5-sonnet-20241022", + temperature: 0, + }).withStructuredOutput(RELEVANCY_SCHEMA, { + name: "relevancy", + }); + + const loader = new FireCrawlLoader({ + url: state.link, // The URL to scrape + mode: "crawl", + }); + const docs = await loader.load(); + const pageContent = docs.map((d) => d.pageContent).join("\n"); + + const { relevant } = await relevancyModel + .withConfig({ + runName: "check-general-relevancy-model", + }) + .invoke([ + { + role: "system", + content: VERIFY_LANGCHAIN_RELEVANT_CONTENT_PROMPT, + }, + { + role: "user", + content: pageContent, + }, + ]); + + if (relevant) { + return { + // TODO: Replace with actual relevant link/page content (summary in this case) + relevantLinks: [state.link], + pageContents: [pageContent], + }; + } + + // Not relevant, return empty arrays so this URL is not included. + return { + relevantLinks: [], + pageContents: [], + }; } diff --git a/yarn.lock b/yarn.lock index 48f55e7..d184108 100644 --- a/yarn.lock +++ b/yarn.lock @@ -625,6 +625,22 @@ zod "^3.22.4" zod-to-json-schema "^3.22.4" +"@langchain/community@^0.3.15": + version "0.3.15" + resolved "https://registry.yarnpkg.com/@langchain/community/-/community-0.3.15.tgz#68f84098089a3c15f44e33506f10c9f0734c132c" + integrity sha512-yG4cv33u7zYar14yqZCI7o2KjwRb+9S7upVzEmVVETimpicm9UjpkMfX4qa4A4IslM1TtC4uy2Ymu9EcINZSpQ== + dependencies: + "@langchain/openai" ">=0.2.0 <0.4.0" + binary-extensions "^2.2.0" + expr-eval "^2.0.2" + flat "^5.0.2" + js-yaml "^4.1.0" + langchain ">=0.2.3 <0.3.0 || >=0.3.4 <0.4.0" + langsmith "^0.2.0" + uuid "^10.0.0" + zod "^3.22.3" + zod-to-json-schema "^3.22.5" + "@langchain/core@^0.3.18": version "0.3.18" resolved "https://registry.yarnpkg.com/@langchain/core/-/core-0.3.18.tgz#bbe3c518f3b48b0cecd426b36d22bac88486a214" @@ -692,6 +708,34 @@ uuid "^10.0.0" zod "^3.23.8" +"@langchain/openai@>=0.1.0 <0.4.0", "@langchain/openai@>=0.2.0 <0.4.0": + version "0.3.14" + resolved "https://registry.yarnpkg.com/@langchain/openai/-/openai-0.3.14.tgz#c2f50fe963769851287f0171385232a344a5484a" + integrity sha512-lNWjUo1tbvsss45IF7UQtMu1NJ6oUKvhgPYWXnX9f/d6OmuLu7D99HQ3Y88vLcUo9XjjOy417olYHignMduMjA== + dependencies: + js-tiktoken "^1.0.12" + openai "^4.71.0" + zod "^3.22.4" + zod-to-json-schema "^3.22.3" + +"@langchain/textsplitters@>=0.0.0 <0.2.0": + version "0.1.0" + resolved "https://registry.yarnpkg.com/@langchain/textsplitters/-/textsplitters-0.1.0.tgz#f37620992192df09ecda3dfbd545b36a6bcbae46" + integrity sha512-djI4uw9rlkAb5iMhtLED+xJebDdAG935AdP4eRTB02R7OB/act55Bj9wsskhZsvuyQRpO4O1wQOp85s6T6GWmw== + dependencies: + js-tiktoken "^1.0.12" + +"@mendable/firecrawl-js@^1.8.5": + version "1.8.5" + resolved "https://registry.yarnpkg.com/@mendable/firecrawl-js/-/firecrawl-js-1.8.5.tgz#359888e4c44884b3ee3f58d52f035756111f6f73" + integrity sha512-gwXBbekZerL226HEDrNN577+oE1XqrJUeewZ/axMB6OFqSQGGRHGmknDnRa5Ote5ukxmWCIUey/UsnHnfGmf8w== + dependencies: + axios "^1.6.8" + isows "^1.0.4" + typescript-event-target "^1.1.1" + zod "^3.23.8" + zod-to-json-schema "^3.23.0" + "@nodelib/fs.scandir@2.1.5": version "2.1.5" resolved "https://registry.yarnpkg.com/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz#7619c2eb21b25483f6d167548b4cfd5a7488c3d5" @@ -1167,7 +1211,7 @@ available-typed-arrays@^1.0.7: dependencies: possible-typed-array-names "^1.0.0" -axios@^1.7.4: +axios@^1.6.8, axios@^1.7.4: version "1.7.7" resolved "https://registry.yarnpkg.com/axios/-/axios-1.7.7.tgz#2f554296f9892a72ac8d8e4c5b79c14a91d0a47f" integrity sha512-S4kL7XrjgBmvdGut0sN3yJxqYzrDOnivkBiN0OFs6hLiUam3UPvswUo0kqGyhqUZGEOytHyumEdXsAkgCOUf3Q== @@ -1249,6 +1293,11 @@ base64-js@^1.5.1: resolved "https://registry.yarnpkg.com/base64-js/-/base64-js-1.5.1.tgz#1b1b440160a5bf7ad40b650f095963481903930a" integrity sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA== +binary-extensions@^2.2.0: + version "2.3.0" + resolved "https://registry.yarnpkg.com/binary-extensions/-/binary-extensions-2.3.0.tgz#f6e14a97858d327252200242d4ccfe522c445522" + integrity sha512-Ceh+7ox5qe7LJuLHoY0feh3pHuUDHAcRUeyL2VYghZwfpkNIy/+8Ocg0a3UuSoYzavmylwuLWQOf3hl0jjMMIw== + brace-expansion@^1.1.7: version "1.1.11" resolved "https://registry.yarnpkg.com/brace-expansion/-/brace-expansion-1.1.11.tgz#3c7fcbf529d87226f3d2f52b966ff5271eb441dd" @@ -1939,6 +1988,11 @@ expect@^29.0.0, expect@^29.7.0: jest-message-util "^29.7.0" jest-util "^29.7.0" +expr-eval@^2.0.2: + version "2.0.2" + resolved "https://registry.yarnpkg.com/expr-eval/-/expr-eval-2.0.2.tgz#fa6f044a7b0c93fde830954eb9c5b0f7fbc7e201" + integrity sha512-4EMSHGOPSwAfBiibw3ndnP0AvjDWLsMvGOvWEZ2F96IGk0bIVdjQisOHxReSkE13mHcfbuCiXw+G4y0zv6N8Eg== + fast-deep-equal@^3.1.1, fast-deep-equal@^3.1.3: version "3.1.3" resolved "https://registry.yarnpkg.com/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz#3a7d56b559d6cbc3eb512325244e619a65c6c525" @@ -2037,6 +2091,11 @@ flat-cache@^3.0.4: keyv "^4.5.3" rimraf "^3.0.2" +flat@^5.0.2: + version "5.0.2" + resolved "https://registry.yarnpkg.com/flat/-/flat-5.0.2.tgz#8ca6fe332069ffa9d324c327198c598259ceb241" + integrity sha512-b6suED+5/3rTpUBdG1gupIl8MPFCAMA0QXwmljLhvCUKcUvdE4gWky9zpuGCcXHOsz4J9wPGNWq6OKpmIzz3hQ== + flatted@^3.2.9: version "3.3.1" resolved "https://registry.yarnpkg.com/flatted/-/flatted-3.3.1.tgz#21db470729a6734d4997002f439cb308987f567a" @@ -2505,6 +2564,11 @@ isexe@^2.0.0: resolved "https://registry.yarnpkg.com/isexe/-/isexe-2.0.0.tgz#e8fbf374dc556ff8947a10dcb0572d633f2cfa10" integrity sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw== +isows@^1.0.4: + version "1.0.6" + resolved "https://registry.yarnpkg.com/isows/-/isows-1.0.6.tgz#0da29d706fa51551c663c627ace42769850f86e7" + integrity sha512-lPHCayd40oW98/I0uvgaHKWCSvkzY27LjWLbtzOm64yQ+G3Q5npjjbdppU65iZXkK1Zt+kH9pfegli0AYfwYYw== + istanbul-lib-coverage@^3.0.0, istanbul-lib-coverage@^3.2.0: version "3.2.2" resolved "https://registry.yarnpkg.com/istanbul-lib-coverage/-/istanbul-lib-coverage-3.2.2.tgz#2d166c4b0644d43a39f04bf6c2edd1e585f31756" @@ -2995,6 +3059,11 @@ json5@^2.2.3: resolved "https://registry.yarnpkg.com/json5/-/json5-2.2.3.tgz#78cd6f1a19bdc12b73db5ad0c61efd66c1e29283" integrity sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg== +jsonpointer@^5.0.1: + version "5.0.1" + resolved "https://registry.yarnpkg.com/jsonpointer/-/jsonpointer-5.0.1.tgz#2110e0af0900fd37467b5907ecd13a7884a1b559" + integrity sha512-p/nXbhSEcu3pZRdkW1OfJhpsVtW1gd4Wa1fnQc9YLiTfAjn0312eMKimbdIQzuZl9aa9xUGaRlP9T/CJE/ditQ== + keyv@^4.5.3: version "4.5.4" resolved "https://registry.yarnpkg.com/keyv/-/keyv-4.5.4.tgz#a879a99e29452f942439f2a405e3af8b31d4de93" @@ -3007,6 +3076,24 @@ kleur@^3.0.3: resolved "https://registry.yarnpkg.com/kleur/-/kleur-3.0.3.tgz#a79c9ecc86ee1ce3fa6206d1216c501f147fc07e" integrity sha512-eTIzlVOSUR+JxdDFepEYcBMtZ9Qqdef+rnzWdRZuMbOywu5tO2w2N7rqjoANZ5k9vywhL6Br1VRjUIgTQx4E8w== +"langchain@>=0.2.3 <0.3.0 || >=0.3.4 <0.4.0": + version "0.3.6" + resolved "https://registry.yarnpkg.com/langchain/-/langchain-0.3.6.tgz#f4313d202ce168d29bfcf81a551147cd4986779f" + integrity sha512-erZOIKXzwCOrQHqY9AyjkQmaX62zUap1Sigw1KrwMUOnVoLKkVNRmAyxFlNZDZ9jLs/58MaQcaT9ReJtbj3x6w== + dependencies: + "@langchain/openai" ">=0.1.0 <0.4.0" + "@langchain/textsplitters" ">=0.0.0 <0.2.0" + js-tiktoken "^1.0.12" + js-yaml "^4.1.0" + jsonpointer "^5.0.1" + langsmith "^0.2.0" + openapi-types "^12.1.3" + p-retry "4" + uuid "^10.0.0" + yaml "^2.2.1" + zod "^3.22.4" + zod-to-json-schema "^3.22.3" + langsmith@^0.2.0: version "0.2.7" resolved "https://registry.yarnpkg.com/langsmith/-/langsmith-0.2.7.tgz#db1f83f90c780049dcc55d076a5c161c4f14b8c4" @@ -3267,6 +3354,24 @@ onetime@^5.1.2: dependencies: mimic-fn "^2.1.0" +openai@^4.71.0: + version "4.73.0" + resolved "https://registry.yarnpkg.com/openai/-/openai-4.73.0.tgz#b8f8a4793d4db91e7eeab7235446d2cfe3aa0e9c" + integrity sha512-NZstV77w3CEol9KQTRBRQ15+Sw6nxVTicAULSjYO4wn9E5gw72Mtp3fAVaBFXyyVPws4241YmFG6ya4L8v03tA== + dependencies: + "@types/node" "^18.11.18" + "@types/node-fetch" "^2.6.4" + abort-controller "^3.0.0" + agentkeepalive "^4.2.1" + form-data-encoder "1.7.2" + formdata-node "^4.3.2" + node-fetch "^2.6.7" + +openapi-types@^12.1.3: + version "12.1.3" + resolved "https://registry.yarnpkg.com/openapi-types/-/openapi-types-12.1.3.tgz#471995eb26c4b97b7bd356aacf7b91b73e777dd3" + integrity sha512-N4YtSYJqghVu4iek2ZUvcN/0aqH1kRDuNqzcycDxhOUpg7GdvLa2F3DgS6yBNhInhv2r/6I0Flkn7CqL8+nIcw== + optionator@^0.9.3: version "0.9.4" resolved "https://registry.yarnpkg.com/optionator/-/optionator-0.9.4.tgz#7ea1c1a5d91d764fb282139c88fe11e182a3a734" @@ -3898,6 +4003,11 @@ typed-array-length@^1.0.6: is-typed-array "^1.1.13" possible-typed-array-names "^1.0.0" +typescript-event-target@^1.1.1: + version "1.1.1" + resolved "https://registry.yarnpkg.com/typescript-event-target/-/typescript-event-target-1.1.1.tgz#20a6d491b77d2e37dc432c5394ab74c0d7065539" + integrity sha512-dFSOFBKV6uwaloBCCUhxlD3Pr/P1a/tJdcmPrTXCHlEFD3faj0mztjcGn6VBAhQ0/Bdy8K3VWrrqwbt/ffsYsg== + typescript@^5.3.3: version "5.5.4" resolved "https://registry.yarnpkg.com/typescript/-/typescript-5.5.4.tgz#d9852d6c82bad2d2eda4fd74a5762a8f5909e9ba" @@ -4056,6 +4166,11 @@ yallist@^3.0.2: resolved "https://registry.yarnpkg.com/yallist/-/yallist-3.1.1.tgz#dbb7daf9bfd8bac9ab45ebf602b8cbad0d5d08fd" integrity sha512-a4UGQaWPH59mOXUYnAG2ewncQS4i4F43Tv3JoAM+s2VDAmS9NsK8GpDMLrCHPksFT7h3K6TOoUNn2pb7RoXx4g== +yaml@^2.2.1: + version "2.6.1" + resolved "https://registry.yarnpkg.com/yaml/-/yaml-2.6.1.tgz#42f2b1ba89203f374609572d5349fb8686500773" + integrity sha512-7r0XPzioN/Q9kXBro/XPnA6kznR73DHq+GXh5ON7ZozRO6aMjbmiBuKste2wslTFkC5d1dw0GooOCepZXJ2SAg== + yargs-parser@^21.0.1, yargs-parser@^21.1.1: version "21.1.1" resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-21.1.1.tgz#9096bceebf990d21bb31fa9516e0ede294a77d35" @@ -4084,12 +4199,12 @@ zod-to-json-schema@^3.22.3: resolved "https://registry.yarnpkg.com/zod-to-json-schema/-/zod-to-json-schema-3.23.2.tgz#bc7e379c8050462538383e382964c03d8fe008f9" integrity sha512-uSt90Gzc/tUfyNqxnjlfBs8W6WSGpNBv0rVsNxP/BVSMHMKGdthPYff4xtCHYloJGM0CFxFsb3NbC0eqPhfImw== -zod-to-json-schema@^3.22.4: +zod-to-json-schema@^3.22.4, zod-to-json-schema@^3.22.5, zod-to-json-schema@^3.23.0: version "3.23.5" resolved "https://registry.yarnpkg.com/zod-to-json-schema/-/zod-to-json-schema-3.23.5.tgz#ec23def47dcafe3a4d640eba6a346b34f9a693a5" integrity sha512-5wlSS0bXfF/BrL4jPAbz9da5hDlDptdEppYfe+x4eIJ7jioqKG9uUxOwPzqof09u/XeVdrgFu29lZi+8XNDJtA== -zod@^3.22.4, zod@^3.23.8: +zod@^3.22.3, zod@^3.22.4, zod@^3.23.8: version "3.23.8" resolved "https://registry.yarnpkg.com/zod/-/zod-3.23.8.tgz#e37b957b5d52079769fb8097099b592f0ef4067d" integrity sha512-XBx9AXhXktjUqnepgTiE5flcKIYWi/rme0Eaj+5Y0lftuGBq+jyRu/md4WnuxqgP1ubdpNCsYEYPxrzVHD8d6g==