Skip to content

Commit

Permalink
Merge pull request #10 from langchain-ai/brace/report-generator
Browse files Browse the repository at this point in the history
feat: Implement report and post generators
  • Loading branch information
bracesproul authored Nov 27, 2024
2 parents c4b06be + 82730d7 commit 50ef560
Show file tree
Hide file tree
Showing 31 changed files with 1,283 additions and 286 deletions.
4 changes: 3 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,11 @@
"@langchain/core": "^0.3.18",
"@langchain/google-vertexai-web": "^0.1.2",
"@langchain/langgraph": "^0.2.22",
"@mendable/firecrawl-js": "^1.8.5",
"@mendable/firecrawl-js": "0.0.36",
"@slack/web-api": "^7.7.0",
"cheerio": "^1.0.0",
"moment": "^2.30.1",
"twitter-api-v2": "^1.18.2",
"zod": "^3.23.8"
},
"devDependencies": {
Expand Down
4 changes: 2 additions & 2 deletions src/agent/graph.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { END, Send, START, StateGraph } from "@langchain/langgraph";
import { GraphAnnotation } from "./state.js";
import { ConfigurableAnnotation, GraphAnnotation } from "./state.js";
import { ingestData } from "./nodes/ingest-data.js";
import { generatePostGraph } from "./subgraphs/generate-post/graph.js";

Expand All @@ -23,7 +23,7 @@ function routeAfterIdentifyContent(
});
}

const builder = new StateGraph(GraphAnnotation)
const builder = new StateGraph(GraphAnnotation, ConfigurableAnnotation)
// Ingests posts from Slack channel.
.addNode("ingestData", ingestData)
// Subgraph which is invoked once for each message.
Expand Down
17 changes: 11 additions & 6 deletions src/agent/nodes/ingest-data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@ import { extractUrlsFromSlackText } from "../utils.js";
const getChannelIdFromConfig = async (
config: LangGraphRunnableConfig,
): Promise<string | undefined> => {
if (config.configurable?.slack.channelName) {
if (config.configurable?.slackChannelName) {
const client = new SlackMessageFetcher({
channelName: config.configurable.slack.channelName,
channelName: config.configurable.slackChannelName,
});
return await client.getChannelId();
}
return config.configurable?.slack.channelId;
return config.configurable?.slackChannelId;
};

export async function ingestData(
Expand All @@ -27,8 +27,13 @@ export async function ingestData(
const client = new SlackMessageFetcher({
channelId: channelId,
});

const recentMessages = await client.fetchLast24HoursMessages();
console.log("Before fetching messages");
const recentMessages = await client.fetchLast24HoursMessages(
config.configurable?.maxMessages,
);
if (recentMessages.length > 1) {
throw new Error("More than one message found");
}
const messagesWithUrls = recentMessages.flatMap((msg) => {
const links = extractUrlsFromSlackText(msg.text);
if (!links.length) {
Expand All @@ -39,7 +44,7 @@ export async function ingestData(
links,
};
});

console.log("returning", messagesWithUrls.length, " messages");
return {
slackMessages: messagesWithUrls,
};
Expand Down
17 changes: 10 additions & 7 deletions src/agent/state.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { Annotation, MessagesAnnotation } from "@langchain/langgraph";
import { Annotation } from "@langchain/langgraph";
import { SimpleSlackMessage } from "../clients/slack.js";

export type LangChainProduct = "langchain" | "langgraph" | "langsmith";
Expand All @@ -7,16 +7,10 @@ export type SimpleSlackMessageWithLinks = SimpleSlackMessage & {
};

export const GraphAnnotation = Annotation.Root({
...MessagesAnnotation.spec,
/**
* The Slack messages to use for the content.
*/
slackMessages: Annotation<SimpleSlackMessageWithLinks[]>,
/**
* The LangChain product(s) this content is relevant to.
* Undefined if it is not relevant to any product.
*/
relevantProducts: Annotation<LangChainProduct[] | undefined>,
/**
* A report generated on the content. Will be used in the main
* graph when generating the post about this content.
Expand All @@ -31,3 +25,12 @@ export const GraphAnnotation = Annotation.Root({
*/
twitterPost: Annotation<string>,
});

export const ConfigurableAnnotation = Annotation.Root({
maxMessages: Annotation<number>({
reducer: (_state, update) => update,
default: () => 100,
}),
slackChannelName: Annotation<string | undefined>,
slackChannelId: Annotation<string | undefined>,
});
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,8 @@ export const GraphAnnotation = Annotation.Root({
},
default: () => [],
}),
});

export const VerifyContentAnnotation = Annotation.Root({
/**
* The link to the content to verify.
* The content of the Tweet/LinkedIn post.
*/
link: Annotation<string>,
post: Annotation<string>,
});
62 changes: 29 additions & 33 deletions src/agent/subgraphs/generate-post/graph.ts
Original file line number Diff line number Diff line change
@@ -1,30 +1,17 @@
import { END, Send, START, StateGraph } from "@langchain/langgraph";
import { GraphAnnotation, VerifyContentAnnotation } from "./state.js";
import { GraphAnnotation } from "./generate-post-state.js";
import { generateContentReport } from "./nodes/generate-content-report.js";
import { verifyGeneralContent } from "./nodes/verify-general.js";
import { verifyYouTubeContent } from "./nodes/verify-youtube.js";
import { verifyGitHubContent } from "./nodes/verify-github.js";
import { generateLinkedinPost } from "./nodes/generate-post/linkedin.js";
import { generateTwitterPost } from "./nodes/generate-post/twitter.js";
import { verifyGeneralContent } from "../shared/nodes/verify-general.js";
import { verifyYouTubeContent } from "../shared/nodes/verify-youtube.js";
import { verifyGitHubContent } from "../shared/nodes/verify-github.js";
import { generatePosts } from "./nodes/generate-post.js";
import { schedulePost } from "./nodes/schedule-post.js";
import { VerifyContentAnnotation } from "../shared/shared-state.js";
import { verifyTweetGraph } from "../verify-tweet/graph.js";

/**
* Should do the following:
* Handle youtube videos
* Handle GitHub repos
* Handle all other content (general purpose web scraping)
*
* YouTube videos:
* 1. use gemini 1.5 flash to ingest youtube video & create a summary
* 2. pass the summary to claude and have claude identify if it's langchain content
*
* GitHub repos:
* 1a. Pull the readme from the repo, pass to claude and ask to identify if it's LangChain content.
* 1b. iterate over the first 100 .js|jsx|ts|tsx or .py files, use regex to extract all imports, verify it has LangChain imports.
*
* All others:
* Mayb FireCrawl to scrape the page content. Then pass to an LLM to identify if it's LangChain content.
*/
const isTwitterUrl = (url: string) => {
return url.includes("twitter.com") || url.includes("x.com");
};

/**
* This conditional edge will iterate over all the links in a slack message.
Expand All @@ -35,24 +22,32 @@ function routeContentTypes(state: typeof GraphAnnotation.State) {
if (link.includes("youtube.com")) {
return new Send("verifyYouTubeContent", {
link,
slackMessage: state.slackMessage,
});
} else if (link.includes("github.com")) {
return new Send("verifyGitHubContent", {
link,
slackMessage: state.slackMessage,
});
} else if (isTwitterUrl(link)) {
return new Send("verifyTweetSubGraph", {
link,
slackMessage: state.slackMessage,
});
} else {
return new Send("verifyGeneralContent", {
link,
slackMessage: state.slackMessage,
});
}
});
}

function routeAfterGeneratingReport(
state: typeof GraphAnnotation.State,
): "generateLinkedinPost" | typeof END {
): "generatePosts" | typeof END {
if (state.report) {
return "generateLinkedinPost";
return "generatePosts";
}
return END;
}
Expand All @@ -68,11 +63,12 @@ const generatePostBuilder = new StateGraph(GraphAnnotation)
.addNode("verifyGitHubContent", verifyGitHubContent, {
input: VerifyContentAnnotation,
})
.addNode("verifyTweetSubGraph", verifyTweetGraph, {
input: VerifyContentAnnotation,
})

// Generates a post on the content for LinkedIn.
.addNode("generateLinkedinPost", generateLinkedinPost)
// Generates a post on the content for Twitter.
.addNode("generateTwitterPost", generateTwitterPost)
// Generates a Tweet/LinkedIn post based on the report content.
.addNode("generatePosts", generatePosts)
// Interrupts the node for human in the loop, then schedules the
// post for Twitter/LinkedIn.
.addNode("schedulePost", schedulePost)
Expand All @@ -83,24 +79,24 @@ const generatePostBuilder = new StateGraph(GraphAnnotation)
"verifyYouTubeContent",
"verifyGeneralContent",
"verifyGitHubContent",
"verifyTweetSubGraph",
])

// After verifying the different content types, we should generate a report on them.
.addEdge("verifyYouTubeContent", "generateContentReport")
.addEdge("verifyGeneralContent", "generateContentReport")
.addEdge("verifyGitHubContent", "generateContentReport")
.addEdge("verifyTweetSubGraph", "generateContentReport")

// Once generating a report, we should confirm the report exists (meaning the content is relevant).
.addConditionalEdges("generateContentReport", routeAfterGeneratingReport, [
"generateLinkedinPost",
"generatePosts",
END,
])

// After generating the LinkedIn post, generate the Twitter post.
.addEdge("generateLinkedinPost", "generateTwitterPost")
// Finally, schedule the post. This will also throw an interrupt
// so a human can edit the post before scheduling.
.addEdge("generateTwitterPost", "schedulePost")
.addEdge("generatePosts", "schedulePost")

// If the schedule post is successful, end the graph.
.addEdge("schedulePost", END);
Expand Down
63 changes: 60 additions & 3 deletions src/agent/subgraphs/generate-post/nodes/generate-content-report.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,66 @@
import { LangGraphRunnableConfig } from "@langchain/langgraph";
import { GraphAnnotation } from "../state.js";
import { GraphAnnotation } from "../generate-post-state.js";
import { LANGCHAIN_PRODUCTS_CONTEXT } from "../prompts.js";
import { ChatAnthropic } from "@langchain/anthropic";

const GENERATE_REPORT_PROMPT = `You are a highly regarded marketing employee at LangChain.
You have been tasked with writing a report summary on content submitted to you from a third party in hopes of having it promoted by LangChain.
This summary report will then be used to craft Tweets and LinkedIn posts promoting the content and LangChain products.
LangChain has a policy of promoting any content submitted that uses LangChain's products.
Here is some context about the different LangChain products and services:
${LANGCHAIN_PRODUCTS_CONTEXT}
Given this context, examine the users input closely, and generate a summary report on it.
The summary report should follow the following structure guidelines:
<structure guidelines>
1. The first part of the report should be a high level overview of the content. Include the name, what it does/what it aims to achieve/the problems it solves.
2. The second part should be all about how it implements LangChain's products/services. Cover what product(s) it uses. How these products are used, and why they're important to the application. This should be technical and detailed. Ensure you clearly state the LangChain product(s) used at the top of this section.
3. The final part should go into detail covering anything the first two parts missed. This should be a detailed technical overview of the content, and interesting facts you found that readers might find engaging. This part does NOT need to long, and if you've already covered everything, you can skip it. Remember you do NOT want to bore the readers with repetitive information.
</structure guidelines>
Follow these rules and guidelines when generating the report:
<rules>
- Focus on subject of the content, and why/how LangChain's product(s) enhance it.
- The final Tweet/LinkedIn post will be developer focused, so ensure the report is technical and detailed.
- Include any relevant links found in the content in the report.
- Include details about what the product does/what problem it solves.
- Use proper markdown styling when formatting the report summary.
- If possible, keep the post at or under 280 characters (not including the URL) for conciseness.
<rules>
Do not include any personal opinions or biases in the report. Stick to the facts and technical details.
Your response should ONLY include the report summary, and no other text.`;

const formatReportPrompt = (pageContents: string[]): string => {
return `The following text contains summaries, or entire pages from the content I submitted to you. Please review the content and generate a report on it.
${pageContents.map((content, index) => `<Content index={${index + 1}}>\n${content}\n</Content>`).join("\n\n")}`;
};

export async function generateContentReport(
_state: typeof GraphAnnotation.State,
state: typeof GraphAnnotation.State,
_config: LangGraphRunnableConfig,
): Promise<Partial<typeof GraphAnnotation.State>> {
throw new Error("Not implemented");
const reportModel = new ChatAnthropic({
model: "claude-3-5-sonnet-20241022",
temperature: 0,
});

const prompt = formatReportPrompt(state.pageContents);

const result = await reportModel.invoke([
{
role: "system",
content: GENERATE_REPORT_PROMPT,
},
{
role: "user",
content: prompt,
},
]);

return {
report: result.content as string,
};
}
Loading

0 comments on commit 50ef560

Please sign in to comment.