You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

129 lines
8.9 KiB

import json
import os
import traceback
import httpx
from loguru import logger
from retry import retry
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as ec
from selenium.webdriver.support.wait import WebDriverWait
def login():
logger.info("Logging in to Twitter")
with open("./config/config.json", "r", encoding="utf-8") as f: config = json.load(f)
options = webdriver.ChromeOptions()
options.set_capability("goog:loggingPrefs", {"performance": "ALL"})
#options.add_argument("--headless")
driver = webdriver.Chrome(options=options)
try:
driver.set_page_load_timeout(30)
driver.get("https://x.com/i/flow/login")
WebDriverWait(driver, 10).until(
ec.presence_of_element_located((By.CSS_SELECTOR, 'input[autocomplete="username"]')))
username_field = driver.find_element(By.CSS_SELECTOR, 'input[autocomplete="username"]')
username_field.send_keys(config["email"])
buttons = driver.find_elements(By.TAG_NAME, 'button')
buttons[2].click()
WebDriverWait(driver, 10).until(
ec.presence_of_element_located((By.CSS_SELECTOR, 'input[autocomplete="on"]')))
userid_field = driver.find_element(By.CSS_SELECTOR, 'input[autocomplete="on"]')
if not userid_field.get_attribute("value"):
userid_field.send_keys(config["userid"])
buttons = driver.find_elements(By.TAG_NAME, 'button')
buttons[1].click()
WebDriverWait(driver, 10).until(
ec.presence_of_element_located((By.CSS_SELECTOR, 'input[autocomplete="current-password"]')))
password_field = driver.find_element(By.CSS_SELECTOR, 'input[autocomplete="current-password"]')
password_field.send_keys(config["password"])
login_button = driver.find_element(By.CSS_SELECTOR, 'button[data-testid="LoginForm_Login_Button"]')
login_button.click()
WebDriverWait(driver, 60).until(ec.url_contains('/home'))
cookies = driver.get_cookies()
cookie_string = "; ".join([f"{cookie['name']}={cookie['value']}" for cookie in cookies])
logger.success(f"Twitter login success for username {config['email']}\n{cookie_string}")
driver.get("https://x.com/i/lists/205877981")
WebDriverWait(driver, 30).until(
ec.presence_of_element_located((By.CSS_SELECTOR, 'div[aria-label="Timeline: List"]')))
logs = driver.get_log("performance")
#with open("log.json", "w", encoding="utf-8") as f: json.dump(logs, f, ensure_ascii=False, indent=4)
for packet in logs:
message = json.loads(packet["message"])["message"]
if (message["method"] == "Network.requestWillBeSentExtraInfo" and
":path" in message["params"]["headers"] and
"ListLatestTweetsTimeline" in message["params"]["headers"][":path"]):
headers = message["params"]["headers"]
headers = {k: v for k, v in headers.items() if k not in [":authority", ":method", ":path", ":scheme"]}
logger.success(f"Got request Headers: {headers}")
with open("./config/headers.json", "w", encoding="utf-8") as f:
json.dump(headers, f, ensure_ascii=False, indent=4)
return headers
logger.error(f"Twitter login failed for username {config['email']}: No request found")
except Exception as e:
logger.error(f"Twitter login failed for username {config['email']}: {e}")
traceback.print_exc()
finally:
driver.quit()
@retry(tries=3, delay=5)
def get_list(list_id):
logger.info(f"Getting list https://x.com/i/lists/{list_id}")
with open("./config/config.json", "r", encoding="utf-8") as f: config = json.load(f)
with open("./config/headers.json", "r", encoding="utf-8") as f: headers = json.load(f)
headers["referer"] = f"https://x.com/i/lists/{list_id}"
params = {
'variables': '{"listId":"' + str(list_id) + '","count":20}',
'features': '{"rweb_video_screen_enabled":false,"profile_label_improvements_pcf_label_in_post_enabled":true,"rweb_tipjar_consumption_enabled":true,"verified_phone_label_enabled":false,"creator_subscriptions_tweet_preview_api_enabled":true,"responsive_web_graphql_timeline_navigation_enabled":true,"responsive_web_graphql_skip_user_profile_image_extensions_enabled":false,"premium_content_api_read_enabled":false,"communities_web_enable_tweet_community_results_fetch":true,"c9s_tweet_anatomy_moderator_badge_enabled":true,"responsive_web_grok_analyze_button_fetch_trends_enabled":false,"responsive_web_grok_analyze_post_followups_enabled":true,"responsive_web_jetfuel_frame":false,"responsive_web_grok_share_attachment_enabled":true,"articles_preview_enabled":true,"responsive_web_edit_tweet_api_enabled":true,"graphql_is_translatable_rweb_tweet_is_translatable_enabled":true,"view_counts_everywhere_api_enabled":true,"longform_notetweets_consumption_enabled":true,"responsive_web_twitter_article_tweet_consumption_enabled":true,"tweet_awards_web_tipping_enabled":false,"responsive_web_grok_show_grok_translated_post":false,"responsive_web_grok_analysis_button_from_backend":true,"creator_subscriptions_quote_tweet_preview_enabled":false,"freedom_of_speech_not_reach_fetch_enabled":true,"standardized_nudges_misinfo":true,"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled":true,"longform_notetweets_rich_text_read_enabled":true,"longform_notetweets_inline_media_enabled":true,"responsive_web_grok_image_annotation_enabled":true,"responsive_web_enhance_cards_enabled":false}',
}
resp = httpx.get(
'https://x.com/i/api/graphql/XYC5oRL-TmZ4zwomyY6T-g/ListLatestTweetsTimeline',
params=params,
headers=headers,
proxy=config["proxy"] if "proxy" in config else None,
)
if resp.status_code != 200:
logger.error(f"Error fetching list {list_id}: {resp.status_code} {resp.text}")
os.remove("./config/headers.json")
return None
logger.info(f"Got {list_id}")
return resp.json()
@retry(tries=3, delay=5)
def get_detail(tweet_id):
logger.info(f"Getting tweet detail https://x.com/{{}}/status/{tweet_id}")
with open("./config/config.json", "r", encoding="utf-8") as f: config = json.load(f)
with open("./config/headers.json", "r", encoding="utf-8") as f: headers = json.load(f)
headers["referer"] = f"https://x.com/"
params = {
'variables': '{"focalTweetId":"' + str(tweet_id) + '","referrer":"spheres_detail","with_rux_injections":false,"rankingMode":"Relevance","includePromotedContent":true,"withCommunity":true,"withQuickPromoteEligibilityTweetFields":true,"withBirdwatchNotes":true,"withVoice":true}',
'features': '{"rweb_video_screen_enabled":false,"profile_label_improvements_pcf_label_in_post_enabled":true,"rweb_tipjar_consumption_enabled":true,"verified_phone_label_enabled":false,"creator_subscriptions_tweet_preview_api_enabled":true,"responsive_web_graphql_timeline_navigation_enabled":true,"responsive_web_graphql_skip_user_profile_image_extensions_enabled":false,"premium_content_api_read_enabled":false,"communities_web_enable_tweet_community_results_fetch":true,"c9s_tweet_anatomy_moderator_badge_enabled":true,"responsive_web_grok_analyze_button_fetch_trends_enabled":false,"responsive_web_grok_analyze_post_followups_enabled":true,"responsive_web_jetfuel_frame":false,"responsive_web_grok_share_attachment_enabled":true,"articles_preview_enabled":true,"responsive_web_edit_tweet_api_enabled":true,"graphql_is_translatable_rweb_tweet_is_translatable_enabled":true,"view_counts_everywhere_api_enabled":true,"longform_notetweets_consumption_enabled":true,"responsive_web_twitter_article_tweet_consumption_enabled":true,"tweet_awards_web_tipping_enabled":false,"responsive_web_grok_show_grok_translated_post":false,"responsive_web_grok_analysis_button_from_backend":true,"creator_subscriptions_quote_tweet_preview_enabled":false,"freedom_of_speech_not_reach_fetch_enabled":true,"standardized_nudges_misinfo":true,"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled":true,"longform_notetweets_rich_text_read_enabled":true,"longform_notetweets_inline_media_enabled":true,"responsive_web_grok_image_annotation_enabled":true,"responsive_web_enhance_cards_enabled":false}',
'fieldToggles': '{"withArticleRichContentState":true,"withArticlePlainText":false,"withGrokAnalyze":false,"withDisallowedReplyControls":false}',
}
resp = httpx.get(
'https://x.com/i/api/graphql/u5Tij6ERlSH2LZvCUqallw/TweetDetail',
params=params,
headers=headers,
proxy=config["proxy"] if "proxy" in config else None,
)
if resp.status_code != 200:
logger.error(f"Error fetching tweet detail {tweet_id}: {resp.status_code} {resp.text}")
# os.remove("./config/headers.json")
return None
logger.info(f"Got {tweet_id}")
return resp.json()