diff --git a/.gitignore b/.gitignore index ebd1515..350f9ee 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ config.json chromedriver.exe +lovelive.json # Byte-compiled / optimized / DLL files diff --git a/twitter.py b/twitter.py index 3743cb0..627a5ea 100644 --- a/twitter.py +++ b/twitter.py @@ -126,7 +126,17 @@ def parse_media(media): variants.sort(key=lambda x: x["bitrate"], reverse=True) if variants: data["video"] = variants[0]["url"] return data - + +def parse_entities(entity): + data = { + "text": "", + "indices": entity["indices"] + } + if "name" in entity: data["text"] = "@" + entity["name"] + if "text" in entity: data["text"] = "#" + entity["text"] + if "display_url" in entity: data["text"] = entity["display_url"] + return data + def parse_tweet(tweet): data = { "rest_id": tweet["rest_id"], @@ -138,11 +148,19 @@ def parse_tweet(tweet): "created_at": tweet["legacy"]["created_at"], "timestamp": int(datetime.strptime(tweet["legacy"]["created_at"], '%a %b %d %H:%M:%S %z %Y').timestamp()), "media": [], + "entities": [], "quoted": {}, "retweeted": {} } + for m in tweet["legacy"]["entities"].get("media", []): data["media"].append(parse_media(m)) + + for e in ["user_mentions", "hashtags", "urls"]: + for m in tweet["legacy"]["entities"].get(e, []): + data["entities"].append(parse_entities(m)) + data["entities"].sort(key=lambda x: x["indices"][0]) + return data