fix bug: replace api with AsyncClient

feat: tweet detail api
fix bug: latest tweet timestamp check
3 changed files with 102 additions and 26 deletions
--- a/src/twi_api.py
+++ b/src/twi_api.py
@ -78,7 +78,7 @@ def login():


@retry(tries=3, delay=5)
-def get_list(list_id):
+async def get_list(list_id):
    logger.info(f"Getting list https://x.com/i/lists/{list_id}")
    with open("./config/config.json", "r", encoding="utf-8") as f: config = json.load(f)
    with open("./config/headers.json", "r", encoding="utf-8") as f: headers = json.load(f)
@ -88,15 +88,45 @@ def get_list(list_id):
        'variables': '{"listId":"' + str(list_id) + '","count":20}',
        'features': '{"rweb_video_screen_enabled":false,"profile_label_improvements_pcf_label_in_post_enabled":true,"rweb_tipjar_consumption_enabled":true,"verified_phone_label_enabled":false,"creator_subscriptions_tweet_preview_api_enabled":true,"responsive_web_graphql_timeline_navigation_enabled":true,"responsive_web_graphql_skip_user_profile_image_extensions_enabled":false,"premium_content_api_read_enabled":false,"communities_web_enable_tweet_community_results_fetch":true,"c9s_tweet_anatomy_moderator_badge_enabled":true,"responsive_web_grok_analyze_button_fetch_trends_enabled":false,"responsive_web_grok_analyze_post_followups_enabled":true,"responsive_web_jetfuel_frame":false,"responsive_web_grok_share_attachment_enabled":true,"articles_preview_enabled":true,"responsive_web_edit_tweet_api_enabled":true,"graphql_is_translatable_rweb_tweet_is_translatable_enabled":true,"view_counts_everywhere_api_enabled":true,"longform_notetweets_consumption_enabled":true,"responsive_web_twitter_article_tweet_consumption_enabled":true,"tweet_awards_web_tipping_enabled":false,"responsive_web_grok_show_grok_translated_post":false,"responsive_web_grok_analysis_button_from_backend":true,"creator_subscriptions_quote_tweet_preview_enabled":false,"freedom_of_speech_not_reach_fetch_enabled":true,"standardized_nudges_misinfo":true,"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled":true,"longform_notetweets_rich_text_read_enabled":true,"longform_notetweets_inline_media_enabled":true,"responsive_web_grok_image_annotation_enabled":true,"responsive_web_enhance_cards_enabled":false}',
    }
-    resp = httpx.get(
-        'https://x.com/i/api/graphql/XYC5oRL-TmZ4zwomyY6T-g/ListLatestTweetsTimeline',
-        params=params,
-        headers=headers,
-        proxy=config["proxy"] if "proxy" in config else None,
-    )
-    if resp.status_code != 200:
-        logger.error(f"Error fetching list {list_id}: {resp.status_code} {resp.text}")
-        os.remove("./config/headers.json")
-        return None
-    logger.info(f"Got {list_id}")
-    return resp.json()
+    
+    async with httpx.AsyncClient(proxy=config["proxy"] if "proxy" in config else None) as client:
+        resp = await client.get(
+            'https://x.com/i/api/graphql/XYC5oRL-TmZ4zwomyY6T-g/ListLatestTweetsTimeline',
+            params=params,
+            headers=headers
+        )
+        if resp.status_code != 200:
+            logger.error(f"Error fetching list {list_id}: {resp.status_code} {resp.text}")
+            os.remove("./config/headers.json")
+            return None
+        logger.info(f"Got {list_id}")
+        return resp.json()
+    return None
+
+
+@retry(tries=3, delay=5)
+async def get_detail(tweet_id):
+    logger.info(f"Getting tweet detail https://x.com/{{}}/status/{tweet_id}")
+    with open("./config/config.json", "r", encoding="utf-8") as f: config = json.load(f)
+    with open("./config/headers.json", "r", encoding="utf-8") as f: headers = json.load(f)
+    
+    headers["referer"] = f"https://x.com/"
+    params = {
+        'variables': '{"focalTweetId":"' + str(tweet_id) + '","referrer":"spheres_detail","with_rux_injections":false,"rankingMode":"Relevance","includePromotedContent":true,"withCommunity":true,"withQuickPromoteEligibilityTweetFields":true,"withBirdwatchNotes":true,"withVoice":true}',
+        'features': '{"rweb_video_screen_enabled":false,"profile_label_improvements_pcf_label_in_post_enabled":true,"rweb_tipjar_consumption_enabled":true,"verified_phone_label_enabled":false,"creator_subscriptions_tweet_preview_api_enabled":true,"responsive_web_graphql_timeline_navigation_enabled":true,"responsive_web_graphql_skip_user_profile_image_extensions_enabled":false,"premium_content_api_read_enabled":false,"communities_web_enable_tweet_community_results_fetch":true,"c9s_tweet_anatomy_moderator_badge_enabled":true,"responsive_web_grok_analyze_button_fetch_trends_enabled":false,"responsive_web_grok_analyze_post_followups_enabled":true,"responsive_web_jetfuel_frame":false,"responsive_web_grok_share_attachment_enabled":true,"articles_preview_enabled":true,"responsive_web_edit_tweet_api_enabled":true,"graphql_is_translatable_rweb_tweet_is_translatable_enabled":true,"view_counts_everywhere_api_enabled":true,"longform_notetweets_consumption_enabled":true,"responsive_web_twitter_article_tweet_consumption_enabled":true,"tweet_awards_web_tipping_enabled":false,"responsive_web_grok_show_grok_translated_post":false,"responsive_web_grok_analysis_button_from_backend":true,"creator_subscriptions_quote_tweet_preview_enabled":false,"freedom_of_speech_not_reach_fetch_enabled":true,"standardized_nudges_misinfo":true,"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled":true,"longform_notetweets_rich_text_read_enabled":true,"longform_notetweets_inline_media_enabled":true,"responsive_web_grok_image_annotation_enabled":true,"responsive_web_enhance_cards_enabled":false}',
+        'fieldToggles': '{"withArticleRichContentState":true,"withArticlePlainText":false,"withGrokAnalyze":false,"withDisallowedReplyControls":false}',
+    }
+    
+    async with httpx.AsyncClient(proxy=config["proxy"] if "proxy" in config else None) as client:
+        resp = await client.get(
+            'https://x.com/i/api/graphql/u5Tij6ERlSH2LZvCUqallw/TweetDetail',
+            params=params,
+            headers=headers
+        )
+        if resp.status_code != 200:
+            logger.error(f"Error fetching tweet detail {tweet_id}: {resp.status_code} {resp.text}")
+            os.remove("./config/headers.json")
+            return None
+        logger.info(f"Got {tweet_id}")
+        return resp.json()
+    return None
--- a/src/twi_parser.py
+++ b/src/twi_parser.py
@ -1,5 +1,7 @@
 import json
 from datetime import datetime
+import traceback
+from pprint import pformat

 from loguru import logger

@ -11,17 +13,24 @@ def parse_timeline(data):
        try:
            result += parse_entry(entry)
        except Exception as e:
-            logger.error(f"error when parsing entry: {e} {e.args}\n{entry}")
+            logger.error(f"error when parsing entry: {e} {e.args}")
+            logger.error(f"\n{traceback.format_exc()}")
+            logger.error(f"\n{pformat(entry)}")
    result.sort(key=lambda x: x["timestamp"], reverse=True)
    return result

 def parse_entry(entry):
    result = []
    entry_id = entry["entryId"]
-    if "list-conversation" in entry_id and not "tweet" in entry_id:
+    if "promoted" in entry_id:
+        return []
+    elif ("list-conversation" in entry_id or "conversationthread" in entry_id \
+        and not "tweet" in entry_id):
        for item in entry["content"]["items"]:
            data = parse_content(item["item"])
            if data: result.append(data)
+    elif "cursor" in entry_id or "bottom" in entry_id:
+        pass
    elif entry["content"]["__typename"] != 'TimelineTimelineCursor':
        data = parse_content(entry["content"])
        if data: result.append(data)
@ -38,7 +47,9 @@ def parse_content(content):
            data["retweeted"] = parse_tweet(tweet["legacy"]["retweeted_status_result"]["result"])
        return data
    except Exception as e:
-        logger.error(f"error when parsing tweet: {e} {e.args}\n{tweet}")
+        logger.error(f"error when parsing tweet: {e} {e.args}")
+        logger.error(f"\n{traceback.format_exc()}")
+        logger.error(f"\n{pformat(tweet)}")
        return {}

 def parse_media(media):
@ -84,8 +95,10 @@ def parse_card(card):
                        "video": ""
                    }
                break
-        except:
-            logger.error(f"error parsing unified_card {card_data}")
+        except Exception as e:
+            logger.error(f"error when parsing unified_card: {e} {e.args}")
+            logger.error(f"\n{traceback.format_exc()}")
+            logger.error(f"\n{pformat(card_data)}")
    
    if "summary_photo_image_original" in data:
        photo = {
@ -96,15 +109,32 @@ def parse_card(card):
    
    return data, photo

+def parse_user(result):
+    user_result = result
+    user_result.update(result.get("core", {}))
+    user_result.update(result.get("legacy", {}))
+    user_result.update(result.get("avatar", {}))
+    user = {
+        "name": user_result["name"],
+        "screen_name": user_result["screen_name"],
+        "profile_image": user_result.get("profile_image_url_https") or user_result.get("image_url"),
+        "profile_image_shape": user_result.get("profile_image_shape"),
+    }
+    if user["profile_image"]:
+        user["profile_image"] = user["profile_image"].replace("_normal.", ".")
+    return user
+
+
 def parse_tweet(tweet):
    # with open("tweet.json", "w") as f: json.dump(tweet, f)
    while not "rest_id" in tweet: tweet = tweet["tweet"]
+
    data = {
        "rest_id": tweet["rest_id"],
-        "name": tweet["core"]["user_results"]["result"]["legacy"]["name"],
-        "screen_name": tweet["core"]["user_results"]["result"]["legacy"]["screen_name"],
-        "profile_image": tweet["core"]["user_results"]["result"]["legacy"]["profile_image_url_https"],
-        "profile_image_shape": tweet["core"]["user_results"]["result"]["profile_image_shape"],
+        "name": "",
+        "screen_name": "",
+        "profile_image": "",
+        "profile_image_shape": "",
        "full_text": tweet["legacy"]["full_text"],
        "created_at": tweet["legacy"]["created_at"],
        "timestamp": int(datetime.strptime(tweet["legacy"]["created_at"], '%a %b %d %H:%M:%S %z %Y').timestamp()),
@ -115,7 +145,8 @@ def parse_tweet(tweet):
        "retweeted": {},
        "card": {}
    }
-    data["profile_image"] = data["profile_image"].replace("_normal.", ".")
+    user = parse_user(tweet["core"]["user_results"]["result"])
+    data.update(user)
    
    if "in_reply_to_status_id_str" in tweet["legacy"]:
        data["reply_to"] = tweet["legacy"]["in_reply_to_status_id_str"]
@ -133,3 +164,18 @@ def parse_tweet(tweet):
        if _photo: data["media"].append(_photo)
    
    return data
+
+
+
+def parse_detail(data):
+    entries = data["data"]["threaded_conversation_with_injections_v2"]["instructions"][0]["entries"]
+    result = []
+    for entry in entries:
+        try:
+            result += parse_entry(entry)
+        except Exception as e:
+            logger.error(f"error when parsing entry: {e} {e.args}")
+            logger.error(f"\n{traceback.format_exc()}")
+            logger.error(f"\n{pformat(entry)}")
+    result.sort(key=lambda x: x["timestamp"])
+    return result
--- a/src/twitter.py
+++ b/src/twitter.py
@ -21,7 +21,7 @@ def check_new_tweets(tweets, list_id):
    if list_id in LATEST_TWEET_ID_DICT:
        for tweet in tweets:
            if tweet["rest_id"] == LATEST_TWEET_ID_DICT[list_id]: break
-            if tweet["timestamp"] < LATEST_TWEET_TS_DICT[list_id]: break
+            if tweet["timestamp"] <= LATEST_TWEET_TS_DICT[list_id]: break
            # if time.time() - tweet["timestamp"] > 1200: break
            new_tweets.append(tweet)
    
@ -48,7 +48,7 @@ async def task_handler(args):
    LATEST_CHECK_TIME[args["name"]] = time.time()
    
    list_id = int(args["url"].split("/")[-1])
-    data = get_list(list_id)
+    data = await get_list(list_id)
    if data:
        tweets = parse_timeline(data)
        new_tweets = check_new_tweets(tweets, list_id)
Author	SHA1	Message	Date
wlt233	fd9ed57637	fix bug: replace api with AsyncClient	2 weeks ago
wlt233	72c4706de9	feat: tweet detail api	2 weeks ago
wlt233	c8ae58e1e0	fix bug: latest tweet timestamp check	2 weeks ago
wlt233	b31f49a1d7	fix bug: promoted ads filter	2 weeks ago