diff --git a/.github/workflows/preview.yml b/.github/workflows/preview.yml index 0e64922..9369ffb 100644 --- a/.github/workflows/preview.yml +++ b/.github/workflows/preview.yml @@ -13,7 +13,7 @@ jobs: pull-requests: write steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Get changed files in posts folder id: get_changed_files @@ -30,9 +30,9 @@ jobs: - name: Set up Python if: steps.get_changed_files.outputs.any_changed == 'true' - uses: actions/setup-python@v3 + uses: actions/setup-python@v5 with: - python-version: 3.9 + python-version: '3.12' - name: Install dependencies if: steps.get_changed_files.outputs.any_changed == 'true' diff --git a/.github/workflows/publish_content.yml b/.github/workflows/publish_content.yml index 86d19f8..9af8f3c 100644 --- a/.github/workflows/publish_content.yml +++ b/.github/workflows/publish_content.yml @@ -14,7 +14,7 @@ jobs: pull-requests: write steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Get changed files in posts folder id: get_changed_files @@ -31,9 +31,9 @@ jobs: - name: Set up Python if: steps.get_changed_files.outputs.any_changed == 'true' - uses: actions/setup-python@v3 + uses: actions/setup-python@v5 with: - python-version: 3.9 + python-version: '3.12' - name: Install dependencies if: steps.get_changed_files.outputs.any_changed == 'true' diff --git a/github_run.py b/github_run.py index c89c1f5..5c5ebf1 100644 --- a/github_run.py +++ b/github_run.py @@ -31,14 +31,14 @@ def comment(self, comment_text): url = ( f"https://api.github.com/repos/{self.repo}/issues/{self.pr_number}/comments" ) - data = {"body": str(comment_text)} - response = requests.post(url, headers=headers, json=data) - if response.status_code == 201: - return True - else: - raise Exception( - f"Failed to create github comment!, {response.json().get('message')}" - ) + for comment_body in comment_text.split("\n\n---\n"): + data = {"body": str(comment_body)} + response = requests.post(url, headers=headers, json=data) + if response.status_code != 201: + raise Exception( + f"Failed to create github comment!, {response.json().get('message')}" + ) + return True def get_files(self): url = f"https://api.github.com/repos/{self.repo}/pulls/{self.pr_number}/files" diff --git a/lib/galaxy_social.py b/lib/galaxy_social.py index ca7f641..eb50e8e 100644 --- a/lib/galaxy_social.py +++ b/lib/galaxy_social.py @@ -22,9 +22,6 @@ def __init__(self, preview: bool, json_out: str): self.plugins = {} for plugin in self.plugins_config["plugins"]: - if preview and plugin["name"].lower() != "markdown": - continue - if plugin["enabled"]: module_name, class_name = plugin["class"].rsplit(".", 1) try: @@ -112,35 +109,45 @@ def parse_markdown_file(self, file_path): def process_markdown_file(self, file_path, processed_files): content, metadata = self.parse_markdown_file(file_path) - if self.preview: + formatting_results = {} + for media in metadata["media"]: try: - _, _, message = self.plugins["markdown"].create_post( + formatting_results[media] = self.plugins[media].format_content( content=content, - mentions=[], - hashtags=[], + mentions=metadata.get("mentions", {}).get(media, []), + hashtags=metadata.get("hashtags", {}).get(media, []), images=metadata.get("images", []), - media=metadata["media"], - preview=True, - file_path=file_path, ) - return processed_files, message except Exception as e: - raise Exception(f"Failed to create preview for {file_path}.\n{e}") + raise Exception(f"Failed to format post for {file_path}.\n{e}") + if self.preview: + message = f'Hi, I\'m your friendly social media assistant. In the following, you will see a preview of this post "{file_path}"' + for media in metadata["media"]: + formatted_content, preview, warning = formatting_results[media] + message += f"\n\n## {media}\n\n" + message += preview + if warning: + message += f"\nWARNING: {warning}" + return processed_files, message.strip() + stats = {} url = {} if file_path in processed_files: stats = processed_files[file_path] for media in metadata["media"]: - if file_path in processed_files and media in processed_files[file_path]: + if stats.get(media): + print("Skipping previous post to", media) continue - mentions = metadata.get("mentions", {}).get(media, []) - hashtags = metadata.get("hashtags", {}).get(media, []) - images = metadata.get("images", []) + formatted_content, _, _ = formatting_results[media] stats[media], url[media] = self.plugins[media].create_post( - content, mentions, hashtags, images, file_path=file_path + formatted_content, file_path=file_path ) url_text = "\n".join( - [f"[{media}]({link})" for media, link in url.items() if link] + [ + f"- [{media}]({link})" if link else f"- {media}" + for media, link in url.items() + if stats[media] + ] ) message = f"Posted to:\n\n{url_text}" if url_text else "No posts created." @@ -150,7 +157,7 @@ def process_markdown_file(self, file_path, processed_files): def process_files(self, files_to_process): processed_files = {} - messages = "---\n" + messages = "" processed_files_path = self.json_out if os.path.exists(processed_files_path): with open(processed_files_path, "r") as file: diff --git a/lib/plugins/bluesky.py b/lib/plugins/bluesky.py index 13680ea..215c642 100644 --- a/lib/plugins/bluesky.py +++ b/lib/plugins/bluesky.py @@ -149,11 +149,52 @@ def handle_url_card( ) return embed_external - def create_post( - self, content, mentions, hashtags, images, **kwargs - ) -> Tuple[bool, Optional[str]]: + def wrap_text_with_index(self, content): + if len(content) <= self.max_content_length: + return [content] + urls = re.findall(r"https?://\S+", content) + placeholder_content = re.sub( + r"https?://\S+", lambda m: "~" * len(m.group()), content + ) + wrapped_lines = textwrap.wrap( + placeholder_content, self.max_content_length - 8, replace_whitespace=False + ) + final_lines = [] + url_index = 0 + for i, line in enumerate(wrapped_lines, 1): + while "~~~~~~~~~~" in line and url_index < len(urls): + placeholder = "~" * len(urls[url_index]) + line = line.replace(placeholder, urls[url_index], 1) + url_index += 1 + final_lines.append(f"{line} ({i}/{len(wrapped_lines)})") + return final_lines + + def format_content(self, content, mentions, hashtags, images, **kwargs): + mentions = " ".join([f"@{v}" for v in mentions]) + hashtags = " ".join([f"#{v}" for v in hashtags]) + if len(images) > 4: + warnings = f"A maximum of four images, not {len(images)}, can be included in a single bluesky post." + images = images[:4] + else: + warnings = "" + + chunks = self.wrap_text_with_index(f"{content}\n\n{mentions}\n{hashtags}") + + formatted_content = { + "body": "\n\n".join(chunks), + "images": images, + "chunks": chunks, + } + preview = formatted_content["body"] + images_preview = "\n".join( + [f'![{image.get("alt_text", "")}]({image["url"]})' for image in images] + ) + preview += "\n\n" + images_preview + return formatted_content, preview, warnings + + def create_post(self, content, **kwargs) -> Tuple[bool, Optional[str]]: embed_images = [] - for image in images[:4]: + for image in content["images"][:4]: response = requests.get(image["url"]) if response.status_code == 200 and response.headers.get( "Content-Type", "" @@ -172,17 +213,11 @@ def create_post( else None ) - status = [] reply_to = None - mentions = " ".join([f"@{v}" for v in mentions]) - hashtags = " ".join([f"#{v}" for v in hashtags]) - for text in textwrap.wrap( - content + "\n" + mentions + "\n" + hashtags, - self.max_content_length, - replace_whitespace=False, - ): + + for text in content["chunks"]: facets, last_url = self.parse_facets(text) - if not images or reply_to: + if not content["images"] or reply_to: embed = self.handle_url_card(cast(str, last_url)) post = self.blueskysocial.send_post( @@ -192,8 +227,9 @@ def create_post( for _ in range(5): data = self.blueskysocial.get_posts([post.uri]).posts if data: - status.append(data[0].record.text == text) break + else: + return False, None if reply_to is None: link = f"https://bsky.app/profile/{self.blueskysocial.me.handle}/post/{post.uri.split('/')[-1]}" @@ -201,4 +237,4 @@ def create_post( parent = atproto.models.create_strong_ref(post) reply_to = atproto.models.AppBskyFeedPost.ReplyRef(parent=parent, root=root) - return all(status), link + return True, link diff --git a/lib/plugins/markdown.py b/lib/plugins/markdown.py index ac62bee..7c24bc5 100644 --- a/lib/plugins/markdown.py +++ b/lib/plugins/markdown.py @@ -10,42 +10,26 @@ def __init__(self, **kwargs): else os.path.join(os.getcwd(), kwargs["save_path"]) ) - def create_post(self, content, mentions, hashtags, images, **kwargs): + def format_content(self, content, mentions, hashtags, images, **kwargs): + _images = "\n".join( + [f'![{image.get("alt_text", "")}]({image["url"]})' for image in images] + ) + mentions = " ".join([f"@{v}" for v in mentions]) + hashtags = " ".join([f"#{v}" for v in hashtags]) + warnings = "" + formatted_content = "\n\n".join([content, mentions, hashtags, _images]) + preview = formatted_content + return formatted_content, preview, warnings + + def create_post(self, formatted_content, **kwargs): try: - _images = ( - "\n" - + "\n".join( - [ - f'![{image.get("alt_text", "")}]({image["url"]})' - for image in images - ] - ) - if images - else "" - ) - mentions = "\n" + " ".join([f"@{v}" for v in mentions]) if mentions else "" - hashtags = "\n" + " ".join([f"#{v}" for v in hashtags]) if hashtags else "" - text = f"{content}{mentions}{hashtags}{_images}" if self.save_path: os.makedirs(self.save_path, exist_ok=True) prefix = kwargs.get("file_path", "").replace(".md", "") - file_name = ( - f"{self.save_path}/{prefix.replace('/', '-')}_{time.strftime('%Y%m%d-%H%M%S')}.md" - ) + file_name = f"{self.save_path}/{prefix.replace('/', '-')}_{time.strftime('%Y%m%d-%H%M%S')}.md" with open(file_name, "w") as f: - f.write(text) - if kwargs.get("preview"): - social_media = ", ".join(kwargs.get("media", [])) - pre_comment_text = "" - if len(images) > 4 and ( - "mastodon" in social_media or "bluesky" in social_media - ): - pre_comment_text = f"Please note that Mastodon and Bluesky only support up to 4 images in a single post. The first 4 images will be included in the post, and the rest will be ignored.\n" - comment_text = f"{pre_comment_text}This is a preview from {prefix.split('/')[-1]} that will be posted to {social_media}:\n\n{text}" - return True, None, comment_text + f.write(formatted_content) return True, None except Exception as e: - if kwargs.get("preview", False): - print(e) - return False, None, e + print(e) return False, None diff --git a/lib/plugins/mastodon.py b/lib/plugins/mastodon.py index 6ffb816..3af2196 100644 --- a/lib/plugins/mastodon.py +++ b/lib/plugins/mastodon.py @@ -1,8 +1,8 @@ +import re import tempfile import textwrap import requests -from bs4 import BeautifulSoup from mastodon import Mastodon @@ -14,9 +14,52 @@ def __init__(self, **kwargs): ) self.max_content_length = kwargs.get("max_content_length", 500) - def create_post(self, content, mentions, hashtags, images, **kwargs): + def wrap_text_with_index(self, content): + if len(content) <= self.max_content_length: + return [content] + urls = re.findall(r"https?://\S+", content) + placeholder_content = re.sub( + r"https?://\S+", lambda m: "~" * len(m.group()), content + ) + wrapped_lines = textwrap.wrap( + placeholder_content, self.max_content_length - 8, replace_whitespace=False + ) + final_lines = [] + url_index = 0 + for i, line in enumerate(wrapped_lines, 1): + while "~~~~~~~~~~" in line and url_index < len(urls): + placeholder = "~" * len(urls[url_index]) + line = line.replace(placeholder, urls[url_index], 1) + url_index += 1 + final_lines.append(f"{line} ({i}/{len(wrapped_lines)})") + return final_lines + + def format_content(self, content, mentions, hashtags, images, **kwargs): + mentions = " ".join([f"@{v}" for v in mentions]) + hashtags = " ".join([f"#{v}" for v in hashtags]) + if len(images) > 4: + warnings = f"A maximum of four images, not {len(images)}, can be included in a single mastodon post." + images = images[:4] + else: + warnings = "" + + chunks = self.wrap_text_with_index(f"{content}\n\n{mentions}\n{hashtags}") + + formatted_content = { + "body": "\n\n".join(chunks), + "images": images, + "chunks": chunks, + } + preview = formatted_content["body"] + images_preview = "\n".join( + [f'![{image.get("alt_text", "")}]({image["url"]})' for image in images] + ) + preview += "\n\n" + images_preview + return formatted_content, preview, warnings + + def create_post(self, content, **kwargs): media_ids = [] - for image in images[:4]: + for image in content["images"]: response = requests.get(image["url"]) if response.status_code == 200 and response.headers.get( "Content-Type", "" @@ -30,34 +73,25 @@ def create_post(self, content, mentions, hashtags, images, **kwargs): ) media_ids.append(media_uploaded["id"]) - toot_id = None - status = [] - mentions = " ".join([f"@{v}" for v in mentions]) - hashtags = " ".join([f"#{v}" for v in hashtags]) - for text in textwrap.wrap( - content + "\n" + mentions + "\n" + hashtags, - self.max_content_length, - replace_whitespace=False, - ): + toot_id = link = None + for text in content["chunks"]: toot = self.mastodon_handle.status_post( status=text, in_reply_to_id=toot_id, - media_ids=media_ids if (media_ids != [] and toot_id == None) else None, + media_ids=media_ids if (media_ids and toot_id is None) else None, ) - if not toot_id: - link = f"{self.base_url}/@{toot['account']['acct']}/{toot['id']}" toot_id = toot["id"] + if not link: + link = f"{self.base_url}/@{toot['account']['acct']}/{toot_id}" for _ in range(3): post = self.mastodon_handle.status(toot_id) - if post.content: - post_content = BeautifulSoup(post.content, "html.parser").get_text( - separator=" " - ) - status.append( - "".join(post_content.split()) == "".join(text.split()) - ) + if post.url: + if not link: + link = post.url break + else: + return False, None - return all(status), link + return True, link diff --git a/lib/plugins/matrix.py b/lib/plugins/matrix.py index a98acb5..d36a5d0 100644 --- a/lib/plugins/matrix.py +++ b/lib/plugins/matrix.py @@ -4,6 +4,8 @@ import aiofiles.os import magic import requests +from bs4 import BeautifulSoup +from markdown import markdown from nio import AsyncClient, UploadResponse from PIL import Image @@ -16,78 +18,113 @@ def __init__(self, **kwargs): self.client.access_token = kwargs.get("access_token") self.client.device_id = kwargs.get("device_id") self.room_id = kwargs.get("room_id") + self.runner = asyncio.Runner() - async def async_create_post(self, text, mentions, images): + async def async_format_content(self, content, mentions, hashtags, images, **kwargs): + formatted_content = [] + preview = "" for image in images: - response = requests.get(image["url"]) - if response.status_code != 200: - continue + preview += f'![{image.get("alt_text", "")}]({image["url"]})\n' image_name = image["url"].split("/")[-1] - temp = tempfile.NamedTemporaryFile() - temp.write(response.content) - temp.flush() - mime_type = magic.from_file(temp.name, mime=True) - if not mime_type.startswith("image/"): - continue + formatted_content.append( + { + "body": image.get("alt_text", image_name), + "filename": image_name, + "msgtype": "m.image", + "url": image["url"], + } + ) + message_content = { + "msgtype": "m.text", + "format": "org.matrix.custom.html", + } - width, height = Image.open(temp.name).size - file_stat = await aiofiles.os.stat(temp.name) - async with aiofiles.open(temp.name, "r+b") as f: - resp, _ = await self.client.upload( - f, - content_type=mime_type, - filename=image_name, - filesize=file_stat.st_size, + # matrix specs say the body fallback should contain the displayname + # we add the mentions as markdown links in front of the supplied content, + # then we convert to html to get the "formatted_body", then to text to get "body" + if mentions: + message_content["m.mentions"] = {"user_ids": []} + mention_links = [] + for mention in mentions: + # try to get the display name of the mentioned matrix user + response = await self.client.get_displayname(f"@{mention}") + mention_name = getattr(response, "displayname", mention) + mention_links.append( + f"[{mention_name}](https://matrix.to/#/@{mention})" ) + message_content["m.mentions"]["user_ids"].append(f"@{mention}") + mentions_string = " ".join(mention_links) + content = f"{mentions_string}: {content}" + if hashtags: + content += "\n\n" + " ".join([f"\\#{h}" for h in hashtags]) + formatted_body = markdown(content) + body = BeautifulSoup(formatted_body, features="html.parser").get_text( + "\n", strip=True + ) + message_content["body"] = body + message_content["formatted_body"] = formatted_body + formatted_content.append(message_content) + warnings = "" + await self.client.close() + return ( + formatted_content, + preview + "\n" + message_content["formatted_body"], + warnings, + ) + + async def async_create_post(self, content): + for msg in content: + if msg["msgtype"] == "m.image": + response = requests.get(msg["url"]) + if response.status_code != 200: + continue + temp = tempfile.NamedTemporaryFile() + temp.write(response.content) + temp.flush() + mime_type = magic.from_file(temp.name, mime=True) + if not mime_type.startswith("image/"): + continue + + width, height = Image.open(temp.name).size + file_stat = await aiofiles.os.stat(temp.name) + async with aiofiles.open(temp.name, "r+b") as f: + resp, _ = await self.client.upload( + f, + content_type=mime_type, + filename=msg["filename"], + filesize=file_stat.st_size, + ) - if not isinstance(resp, UploadResponse): - continue + if not isinstance(resp, UploadResponse): + continue - content = { - "body": image_name, - "info": { + # add info about the image to the message + msg["info"] = { "size": file_stat.st_size, "mimetype": mime_type, - "thumbnail_info": None, "w": width, "h": height, - "thumbnail_url": None, - }, - "msgtype": "m.image", - "url": resp.content_uri, - } + } + # replace original image url with that of the server upload + msg["url"] = resp.content_uri try: - await self.client.room_send( - self.room_id, message_type="m.room.message", content=content + response = await self.client.room_send( + self.room_id, message_type="m.room.message", content=msg ) - except: + except Exception as e: + print(e) return False, None + event_link = f"https://matrix.to/#/{self.room_id}/{response.event_id}" - if mentions: - text = ( - text - + "\n\n" - + " ".join([f"https://matrix.to/#/@{mention}" for mention in mentions]) - ) - content = { - "msgtype": "m.text", - "format": "org.matrix.custom.html", - "body": text, - } - try: - response = await self.client.room_send( - self.room_id, message_type="m.room.message", content=content - ) - await self.client.close() - message_id = response.event_id - link = f"https://matrix.to/#/{self.room_id}/{message_id}" - except: - return False, None + await self.client.close() + return True, event_link - return True, link + def format_content(self, *args, **kwargs): + result = self.runner.run(self.async_format_content(*args, **kwargs)) + return result - def create_post(self, content, mentions, hashtags, images, **kwargs): + def create_post(self, content, **kwargs): # hashtags and alt_texts are not used in this function - result, link = asyncio.run(self.async_create_post(content, mentions, images)) - return result, link + result = self.runner.run(self.async_create_post(content)) + return result diff --git a/lib/plugins/slack.py b/lib/plugins/slack.py index 7619523..cf2f2ec 100644 --- a/lib/plugins/slack.py +++ b/lib/plugins/slack.py @@ -1,3 +1,4 @@ +import re import textwrap import requests @@ -10,6 +11,42 @@ def __init__(self, **kwargs): self.channel_id = kwargs.get("channel_id") self.max_content_length = kwargs.get("max_content_length", 40000) + def wrap_text_with_index(self, content): + if len(content) <= self.max_content_length: + return [content] + urls = re.findall(r"https?://\S+", content) + placeholder_content = re.sub( + r"https?://\S+", lambda m: "~" * len(m.group()), content + ) + wrapped_lines = textwrap.wrap( + placeholder_content, self.max_content_length - 8, replace_whitespace=False + ) + final_lines = [] + url_index = 0 + for i, line in enumerate(wrapped_lines, 1): + while "~~~~~~~~~~" in line and url_index < len(urls): + placeholder = "~" * len(urls[url_index]) + line = line.replace(placeholder, urls[url_index], 1) + url_index += 1 + final_lines.append(f"{line} ({i}/{len(wrapped_lines)})") + return final_lines + + def format_content(self, content, mentions, hashtags, images, **kwargs): + warnings = "" + chunks = self.wrap_text_with_index(content) + + formatted_content = { + "body": "\n\n".join(chunks), + "images": images, + "chunks": chunks, + } + preview = formatted_content["body"] + images_preview = "\n".join( + [f'![{image.get("alt_text", "")}]({image["url"]})' for image in images] + ) + preview += "\n\n" + images_preview + return formatted_content, preview, warnings + def upload_images(self, images): uploaded_files = [] for image in images: @@ -40,15 +77,10 @@ def upload_images(self, images): ) return response - def create_post(self, text, mentions, hashtags, images, **kwargs): - status = [] + def create_post(self, content, **kwargs): link = None parent_ts = None - for text in textwrap.wrap( - text, - self.max_content_length, - replace_whitespace=False, - ): + for text in content["chunks"]: response = self.client.chat_postMessage( channel=self.channel_id, text=text, @@ -59,8 +91,10 @@ def create_post(self, text, mentions, hashtags, images, **kwargs): link = self.client.chat_getPermalink( channel=self.channel_id, message_ts=parent_ts )["permalink"] - status.append(response["ok"]) - if images: - response = self.upload_images(images) - status.append(response["ok"]) - return all(status), link + if not response["ok"]: + return False, None + if content["images"]: + response = self.upload_images(content["images"]) + if not response["ok"]: + return False, None + return True, link diff --git a/requirements.txt b/requirements.txt index b8340b6..6adcef0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,4 +6,5 @@ matrix-nio==0.24.0 Pillow==10.3.0 PyYAML==6.0.1 slack_sdk==3.27.1 -jsonschema==4.21.1 \ No newline at end of file +jsonschema==4.21.1 +Markdown==3.6 \ No newline at end of file