Compare commits

..

4 Commits

@ -78,7 +78,7 @@ def login():
@retry(tries=3, delay=5)
def get_list(list_id):
async def get_list(list_id):
logger.info(f"Getting list https://x.com/i/lists/{list_id}")
with open("./config/config.json", "r", encoding="utf-8") as f: config = json.load(f)
with open("./config/headers.json", "r", encoding="utf-8") as f: headers = json.load(f)
@ -88,15 +88,45 @@ def get_list(list_id):
'variables': '{"listId":"' + str(list_id) + '","count":20}',
'features': '{"rweb_video_screen_enabled":false,"profile_label_improvements_pcf_label_in_post_enabled":true,"rweb_tipjar_consumption_enabled":true,"verified_phone_label_enabled":false,"creator_subscriptions_tweet_preview_api_enabled":true,"responsive_web_graphql_timeline_navigation_enabled":true,"responsive_web_graphql_skip_user_profile_image_extensions_enabled":false,"premium_content_api_read_enabled":false,"communities_web_enable_tweet_community_results_fetch":true,"c9s_tweet_anatomy_moderator_badge_enabled":true,"responsive_web_grok_analyze_button_fetch_trends_enabled":false,"responsive_web_grok_analyze_post_followups_enabled":true,"responsive_web_jetfuel_frame":false,"responsive_web_grok_share_attachment_enabled":true,"articles_preview_enabled":true,"responsive_web_edit_tweet_api_enabled":true,"graphql_is_translatable_rweb_tweet_is_translatable_enabled":true,"view_counts_everywhere_api_enabled":true,"longform_notetweets_consumption_enabled":true,"responsive_web_twitter_article_tweet_consumption_enabled":true,"tweet_awards_web_tipping_enabled":false,"responsive_web_grok_show_grok_translated_post":false,"responsive_web_grok_analysis_button_from_backend":true,"creator_subscriptions_quote_tweet_preview_enabled":false,"freedom_of_speech_not_reach_fetch_enabled":true,"standardized_nudges_misinfo":true,"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled":true,"longform_notetweets_rich_text_read_enabled":true,"longform_notetweets_inline_media_enabled":true,"responsive_web_grok_image_annotation_enabled":true,"responsive_web_enhance_cards_enabled":false}',
}
resp = httpx.get(
'https://x.com/i/api/graphql/XYC5oRL-TmZ4zwomyY6T-g/ListLatestTweetsTimeline',
params=params,
headers=headers,
proxy=config["proxy"] if "proxy" in config else None,
)
if resp.status_code != 200:
logger.error(f"Error fetching list {list_id}: {resp.status_code} {resp.text}")
os.remove("./config/headers.json")
return None
logger.info(f"Got {list_id}")
return resp.json()
async with httpx.AsyncClient(proxy=config["proxy"] if "proxy" in config else None) as client:
resp = await client.get(
'https://x.com/i/api/graphql/XYC5oRL-TmZ4zwomyY6T-g/ListLatestTweetsTimeline',
params=params,
headers=headers
)
if resp.status_code != 200:
logger.error(f"Error fetching list {list_id}: {resp.status_code} {resp.text}")
os.remove("./config/headers.json")
return None
logger.info(f"Got {list_id}")
return resp.json()
return None
@retry(tries=3, delay=5)
async def get_detail(tweet_id):
logger.info(f"Getting tweet detail https://x.com/{{}}/status/{tweet_id}")
with open("./config/config.json", "r", encoding="utf-8") as f: config = json.load(f)
with open("./config/headers.json", "r", encoding="utf-8") as f: headers = json.load(f)
headers["referer"] = f"https://x.com/"
params = {
'variables': '{"focalTweetId":"' + str(tweet_id) + '","referrer":"spheres_detail","with_rux_injections":false,"rankingMode":"Relevance","includePromotedContent":true,"withCommunity":true,"withQuickPromoteEligibilityTweetFields":true,"withBirdwatchNotes":true,"withVoice":true}',
'features': '{"rweb_video_screen_enabled":false,"profile_label_improvements_pcf_label_in_post_enabled":true,"rweb_tipjar_consumption_enabled":true,"verified_phone_label_enabled":false,"creator_subscriptions_tweet_preview_api_enabled":true,"responsive_web_graphql_timeline_navigation_enabled":true,"responsive_web_graphql_skip_user_profile_image_extensions_enabled":false,"premium_content_api_read_enabled":false,"communities_web_enable_tweet_community_results_fetch":true,"c9s_tweet_anatomy_moderator_badge_enabled":true,"responsive_web_grok_analyze_button_fetch_trends_enabled":false,"responsive_web_grok_analyze_post_followups_enabled":true,"responsive_web_jetfuel_frame":false,"responsive_web_grok_share_attachment_enabled":true,"articles_preview_enabled":true,"responsive_web_edit_tweet_api_enabled":true,"graphql_is_translatable_rweb_tweet_is_translatable_enabled":true,"view_counts_everywhere_api_enabled":true,"longform_notetweets_consumption_enabled":true,"responsive_web_twitter_article_tweet_consumption_enabled":true,"tweet_awards_web_tipping_enabled":false,"responsive_web_grok_show_grok_translated_post":false,"responsive_web_grok_analysis_button_from_backend":true,"creator_subscriptions_quote_tweet_preview_enabled":false,"freedom_of_speech_not_reach_fetch_enabled":true,"standardized_nudges_misinfo":true,"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled":true,"longform_notetweets_rich_text_read_enabled":true,"longform_notetweets_inline_media_enabled":true,"responsive_web_grok_image_annotation_enabled":true,"responsive_web_enhance_cards_enabled":false}',
'fieldToggles': '{"withArticleRichContentState":true,"withArticlePlainText":false,"withGrokAnalyze":false,"withDisallowedReplyControls":false}',
}
async with httpx.AsyncClient(proxy=config["proxy"] if "proxy" in config else None) as client:
resp = await client.get(
'https://x.com/i/api/graphql/u5Tij6ERlSH2LZvCUqallw/TweetDetail',
params=params,
headers=headers
)
if resp.status_code != 200:
logger.error(f"Error fetching tweet detail {tweet_id}: {resp.status_code} {resp.text}")
os.remove("./config/headers.json")
return None
logger.info(f"Got {tweet_id}")
return resp.json()
return None

@ -1,5 +1,7 @@
import json
from datetime import datetime
import traceback
from pprint import pformat
from loguru import logger
@ -11,17 +13,24 @@ def parse_timeline(data):
try:
result += parse_entry(entry)
except Exception as e:
logger.error(f"error when parsing entry: {e} {e.args}\n{entry}")
logger.error(f"error when parsing entry: {e} {e.args}")
logger.error(f"\n{traceback.format_exc()}")
logger.error(f"\n{pformat(entry)}")
result.sort(key=lambda x: x["timestamp"], reverse=True)
return result
def parse_entry(entry):
result = []
entry_id = entry["entryId"]
if "list-conversation" in entry_id and not "tweet" in entry_id:
if "promoted" in entry_id:
return []
elif ("list-conversation" in entry_id or "conversationthread" in entry_id \
and not "tweet" in entry_id):
for item in entry["content"]["items"]:
data = parse_content(item["item"])
if data: result.append(data)
elif "cursor" in entry_id or "bottom" in entry_id:
pass
elif entry["content"]["__typename"] != 'TimelineTimelineCursor':
data = parse_content(entry["content"])
if data: result.append(data)
@ -38,7 +47,9 @@ def parse_content(content):
data["retweeted"] = parse_tweet(tweet["legacy"]["retweeted_status_result"]["result"])
return data
except Exception as e:
logger.error(f"error when parsing tweet: {e} {e.args}\n{tweet}")
logger.error(f"error when parsing tweet: {e} {e.args}")
logger.error(f"\n{traceback.format_exc()}")
logger.error(f"\n{pformat(tweet)}")
return {}
def parse_media(media):
@ -84,8 +95,10 @@ def parse_card(card):
"video": ""
}
break
except:
logger.error(f"error parsing unified_card {card_data}")
except Exception as e:
logger.error(f"error when parsing unified_card: {e} {e.args}")
logger.error(f"\n{traceback.format_exc()}")
logger.error(f"\n{pformat(card_data)}")
if "summary_photo_image_original" in data:
photo = {
@ -96,15 +109,32 @@ def parse_card(card):
return data, photo
def parse_user(result):
user_result = result
user_result.update(result.get("core", {}))
user_result.update(result.get("legacy", {}))
user_result.update(result.get("avatar", {}))
user = {
"name": user_result["name"],
"screen_name": user_result["screen_name"],
"profile_image": user_result.get("profile_image_url_https") or user_result.get("image_url"),
"profile_image_shape": user_result.get("profile_image_shape"),
}
if user["profile_image"]:
user["profile_image"] = user["profile_image"].replace("_normal.", ".")
return user
def parse_tweet(tweet):
# with open("tweet.json", "w") as f: json.dump(tweet, f)
while not "rest_id" in tweet: tweet = tweet["tweet"]
data = {
"rest_id": tweet["rest_id"],
"name": tweet["core"]["user_results"]["result"]["legacy"]["name"],
"screen_name": tweet["core"]["user_results"]["result"]["legacy"]["screen_name"],
"profile_image": tweet["core"]["user_results"]["result"]["legacy"]["profile_image_url_https"],
"profile_image_shape": tweet["core"]["user_results"]["result"]["profile_image_shape"],
"name": "",
"screen_name": "",
"profile_image": "",
"profile_image_shape": "",
"full_text": tweet["legacy"]["full_text"],
"created_at": tweet["legacy"]["created_at"],
"timestamp": int(datetime.strptime(tweet["legacy"]["created_at"], '%a %b %d %H:%M:%S %z %Y').timestamp()),
@ -115,7 +145,8 @@ def parse_tweet(tweet):
"retweeted": {},
"card": {}
}
data["profile_image"] = data["profile_image"].replace("_normal.", ".")
user = parse_user(tweet["core"]["user_results"]["result"])
data.update(user)
if "in_reply_to_status_id_str" in tweet["legacy"]:
data["reply_to"] = tweet["legacy"]["in_reply_to_status_id_str"]
@ -133,3 +164,18 @@ def parse_tweet(tweet):
if _photo: data["media"].append(_photo)
return data
def parse_detail(data):
entries = data["data"]["threaded_conversation_with_injections_v2"]["instructions"][0]["entries"]
result = []
for entry in entries:
try:
result += parse_entry(entry)
except Exception as e:
logger.error(f"error when parsing entry: {e} {e.args}")
logger.error(f"\n{traceback.format_exc()}")
logger.error(f"\n{pformat(entry)}")
result.sort(key=lambda x: x["timestamp"])
return result

@ -21,7 +21,7 @@ def check_new_tweets(tweets, list_id):
if list_id in LATEST_TWEET_ID_DICT:
for tweet in tweets:
if tweet["rest_id"] == LATEST_TWEET_ID_DICT[list_id]: break
if tweet["timestamp"] < LATEST_TWEET_TS_DICT[list_id]: break
if tweet["timestamp"] <= LATEST_TWEET_TS_DICT[list_id]: break
# if time.time() - tweet["timestamp"] > 1200: break
new_tweets.append(tweet)
@ -48,7 +48,7 @@ async def task_handler(args):
LATEST_CHECK_TIME[args["name"]] = time.time()
list_id = int(args["url"].split("/")[-1])
data = get_list(list_id)
data = await get_list(list_id)
if data:
tweets = parse_timeline(data)
new_tweets = check_new_tweets(tweets, list_id)

Loading…
Cancel
Save