From a1e60203ffad8aadcaffdc2916741121d2a54a18 Mon Sep 17 00:00:00 2001 From: stygmate <> Date: Fri, 15 Sep 2023 12:03:43 +0200 Subject: [PATCH 1/2] Get RT and QT when they are in objects of type TweetWithVisibilityResults --- twscrape/models.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/twscrape/models.py b/twscrape/models.py index 31f5852..906bfa2 100644 --- a/twscrape/models.py +++ b/twscrape/models.py @@ -189,10 +189,24 @@ class Tweet(JSONTrait): def parse(obj: dict, res: dict): tw_usr = User.parse(res["users"][obj["user_id_str"]]) - rt_id = _first(obj, ["retweeted_status_id_str", "retweeted_status_result.result.rest_id"]) + rt_id = _first( + obj, + [ + "retweeted_status_id_str", + "retweeted_status_result.result.rest_id", + "retweeted_status_result.result.tweet.rest_id" + ] + ) rt_obj = get_or(res, f"tweets.{rt_id}") - qt_id = _first(obj, ["quoted_status_id_str", "quoted_status_result.result.rest_id"]) + qt_id = _first( + obj, + [ + "quoted_status_id_str", + "quoted_status_result.result.rest_id" + "quoted_status_result.result.tweet.rest_id" + ] + ) qt_obj = get_or(res, f"tweets.{qt_id}") doc = Tweet( From 0f7f2d1dedb52d86dce90e6b6dbd255eff6a08a6 Mon Sep 17 00:00:00 2001 From: stygmate <> Date: Fri, 15 Sep 2023 12:44:22 +0200 Subject: [PATCH 2/2] correction of bad rawContent reconstruction for RT --- twscrape/models.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/twscrape/models.py b/twscrape/models.py index 906bfa2..dd77091 100644 --- a/twscrape/models.py +++ b/twscrape/models.py @@ -244,12 +244,8 @@ def parse(obj: dict, res: dict): # issue #42 – restore full rt text rt = doc.retweetedTweet if rt is not None and rt.user is not None and doc.rawContent.endswith("…"): - # prefix = f"RT @{rt.user.username}: " - # if login changed, old login can be cached in rawContent, so use less strict check - prefix = "RT @" - - rt_msg = f"{prefix}{rt.rawContent}" - if doc.rawContent != rt_msg and doc.rawContent.startswith(prefix): + rt_msg = f"RT @{rt.user.username}: {rt.rawContent}" + if doc.rawContent != rt_msg: doc.rawContent = rt_msg return doc