From 72c4706de99f0b287a039a2cc9cd1b5eae272d82 Mon Sep 17 00:00:00 2001 From: wlt233 <1486185683@qq.com> Date: Mon, 26 May 2025 11:08:41 +0800 Subject: [PATCH] feat: tweet detail api --- src/twi_api.py | 27 +++++++++++++++++++ src/twi_parser.py | 66 +++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 82 insertions(+), 11 deletions(-) diff --git a/src/twi_api.py b/src/twi_api.py index 2712a39..09eca3c 100644 --- a/src/twi_api.py +++ b/src/twi_api.py @@ -100,3 +100,30 @@ def get_list(list_id): return None logger.info(f"Got {list_id}") return resp.json() + + + +@retry(tries=3, delay=5) +def get_detail(tweet_id): + logger.info(f"Getting tweet detail https://x.com/{{}}/status/{tweet_id}") + with open("./config/config.json", "r", encoding="utf-8") as f: config = json.load(f) + with open("./config/headers.json", "r", encoding="utf-8") as f: headers = json.load(f) + + headers["referer"] = f"https://x.com/" + params = { + 'variables': '{"focalTweetId":"' + str(tweet_id) + '","referrer":"spheres_detail","with_rux_injections":false,"rankingMode":"Relevance","includePromotedContent":true,"withCommunity":true,"withQuickPromoteEligibilityTweetFields":true,"withBirdwatchNotes":true,"withVoice":true}', + 'features': '{"rweb_video_screen_enabled":false,"profile_label_improvements_pcf_label_in_post_enabled":true,"rweb_tipjar_consumption_enabled":true,"verified_phone_label_enabled":false,"creator_subscriptions_tweet_preview_api_enabled":true,"responsive_web_graphql_timeline_navigation_enabled":true,"responsive_web_graphql_skip_user_profile_image_extensions_enabled":false,"premium_content_api_read_enabled":false,"communities_web_enable_tweet_community_results_fetch":true,"c9s_tweet_anatomy_moderator_badge_enabled":true,"responsive_web_grok_analyze_button_fetch_trends_enabled":false,"responsive_web_grok_analyze_post_followups_enabled":true,"responsive_web_jetfuel_frame":false,"responsive_web_grok_share_attachment_enabled":true,"articles_preview_enabled":true,"responsive_web_edit_tweet_api_enabled":true,"graphql_is_translatable_rweb_tweet_is_translatable_enabled":true,"view_counts_everywhere_api_enabled":true,"longform_notetweets_consumption_enabled":true,"responsive_web_twitter_article_tweet_consumption_enabled":true,"tweet_awards_web_tipping_enabled":false,"responsive_web_grok_show_grok_translated_post":false,"responsive_web_grok_analysis_button_from_backend":true,"creator_subscriptions_quote_tweet_preview_enabled":false,"freedom_of_speech_not_reach_fetch_enabled":true,"standardized_nudges_misinfo":true,"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled":true,"longform_notetweets_rich_text_read_enabled":true,"longform_notetweets_inline_media_enabled":true,"responsive_web_grok_image_annotation_enabled":true,"responsive_web_enhance_cards_enabled":false}', + 'fieldToggles': '{"withArticleRichContentState":true,"withArticlePlainText":false,"withGrokAnalyze":false,"withDisallowedReplyControls":false}', + } + resp = httpx.get( + 'https://x.com/i/api/graphql/u5Tij6ERlSH2LZvCUqallw/TweetDetail', + params=params, + headers=headers, + proxy=config["proxy"] if "proxy" in config else None, + ) + if resp.status_code != 200: + logger.error(f"Error fetching tweet detail {tweet_id}: {resp.status_code} {resp.text}") + # os.remove("./config/headers.json") + return None + logger.info(f"Got {tweet_id}") + return resp.json() \ No newline at end of file diff --git a/src/twi_parser.py b/src/twi_parser.py index 2e41fdb..eac1c35 100644 --- a/src/twi_parser.py +++ b/src/twi_parser.py @@ -1,5 +1,7 @@ import json from datetime import datetime +import traceback +from pprint import pformat from loguru import logger @@ -11,7 +13,9 @@ def parse_timeline(data): try: result += parse_entry(entry) except Exception as e: - logger.error(f"error when parsing entry: {e} {e.args}\n{entry}") + logger.error(f"error when parsing entry: {e} {e.args}") + logger.error(f"\n{traceback.format_exc()}") + logger.error(f"\n{pformat(entry)}") result.sort(key=lambda x: x["timestamp"], reverse=True) return result @@ -20,10 +24,13 @@ def parse_entry(entry): entry_id = entry["entryId"] if "promoted" in entry_id: return [] - elif "list-conversation" in entry_id and not "tweet" in entry_id: + elif ("list-conversation" in entry_id or "conversationthread" in entry_id \ + and not "tweet" in entry_id): for item in entry["content"]["items"]: data = parse_content(item["item"]) if data: result.append(data) + elif "cursor" in entry_id or "bottom" in entry_id: + pass elif entry["content"]["__typename"] != 'TimelineTimelineCursor': data = parse_content(entry["content"]) if data: result.append(data) @@ -40,7 +47,9 @@ def parse_content(content): data["retweeted"] = parse_tweet(tweet["legacy"]["retweeted_status_result"]["result"]) return data except Exception as e: - logger.error(f"error when parsing tweet: {e} {e.args}\n{tweet}") + logger.error(f"error when parsing tweet: {e} {e.args}") + logger.error(f"\n{traceback.format_exc()}") + logger.error(f"\n{pformat(tweet)}") return {} def parse_media(media): @@ -86,8 +95,10 @@ def parse_card(card): "video": "" } break - except: - logger.error(f"error parsing unified_card {card_data}") + except Exception as e: + logger.error(f"error when parsing unified_card: {e} {e.args}") + logger.error(f"\n{traceback.format_exc()}") + logger.error(f"\n{pformat(card_data)}") if "summary_photo_image_original" in data: photo = { @@ -98,15 +109,32 @@ def parse_card(card): return data, photo +def parse_user(result): + user_result = result + user_result.update(result.get("core", {})) + user_result.update(result.get("legacy", {})) + user_result.update(result.get("avatar", {})) + user = { + "name": user_result["name"], + "screen_name": user_result["screen_name"], + "profile_image": user_result.get("profile_image_url_https") or user_result.get("image_url"), + "profile_image_shape": user_result.get("profile_image_shape"), + } + if user["profile_image"]: + user["profile_image"] = user["profile_image"].replace("_normal.", ".") + return user + + def parse_tweet(tweet): # with open("tweet.json", "w") as f: json.dump(tweet, f) while not "rest_id" in tweet: tweet = tweet["tweet"] + data = { "rest_id": tweet["rest_id"], - "name": tweet["core"]["user_results"]["result"]["legacy"]["name"], - "screen_name": tweet["core"]["user_results"]["result"]["legacy"]["screen_name"], - "profile_image": tweet["core"]["user_results"]["result"]["legacy"]["profile_image_url_https"], - "profile_image_shape": tweet["core"]["user_results"]["result"]["profile_image_shape"], + "name": "", + "screen_name": "", + "profile_image": "", + "profile_image_shape": "", "full_text": tweet["legacy"]["full_text"], "created_at": tweet["legacy"]["created_at"], "timestamp": int(datetime.strptime(tweet["legacy"]["created_at"], '%a %b %d %H:%M:%S %z %Y').timestamp()), @@ -117,7 +145,8 @@ def parse_tweet(tweet): "retweeted": {}, "card": {} } - data["profile_image"] = data["profile_image"].replace("_normal.", ".") + user = parse_user(tweet["core"]["user_results"]["result"]) + data.update(user) if "in_reply_to_status_id_str" in tweet["legacy"]: data["reply_to"] = tweet["legacy"]["in_reply_to_status_id_str"] @@ -134,4 +163,19 @@ def parse_tweet(tweet): data["card"], _photo = parse_card(tweet["card"]) if _photo: data["media"].append(_photo) - return data \ No newline at end of file + return data + + + +def parse_detail(data): + entries = data["data"]["threaded_conversation_with_injections_v2"]["instructions"][0]["entries"] + result = [] + for entry in entries: + try: + result += parse_entry(entry) + except Exception as e: + logger.error(f"error when parsing entry: {e} {e.args}") + logger.error(f"\n{traceback.format_exc()}") + logger.error(f"\n{pformat(entry)}") + result.sort(key=lambda x: x["timestamp"]) + return result \ No newline at end of file