feat: parse entities (v1.2.0)

6 months ago · 78e7c6decb
parent 8019920dfb
commit 78e7c6decb
2 changed files with 20 additions and 1 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,5 +1,6 @@
 config.json
 chromedriver.exe
 lovelive.json
 # Byte-compiled / optimized / DLL files
--- a/twitter.py
+++ b/twitter.py
@ -127,6 +127,16 @@ def parse_media(media):
        if variants: data["video"] = variants[0]["url"]
    return data
 def parse_entities(entity):
    data = {
        "text": "",
        "indices": entity["indices"]
    }
    if "name" in entity: data["text"] = "@" + entity["name"]
    if "text" in entity: data["text"] = "#" + entity["text"]
    if "display_url" in entity: data["text"] = entity["display_url"]
    return data
 def parse_tweet(tweet):
    data = {
        "rest_id": tweet["rest_id"],
@ -138,11 +148,19 @@ def parse_tweet(tweet):
        "created_at": tweet["legacy"]["created_at"],
        "timestamp": int(datetime.strptime(tweet["legacy"]["created_at"], '%a %b %d %H:%M:%S %z %Y').timestamp()),
        "media": [],
        "entities": [],
        "quoted": {},
        "retweeted": {}
    }
    for m in tweet["legacy"]["entities"].get("media", []):
        data["media"].append(parse_media(m))
    for e in ["user_mentions", "hashtags", "urls"]:
        for m in tweet["legacy"]["entities"].get(e, []):
            data["entities"].append(parse_entities(m))
    data["entities"].sort(key=lambda x: x["indices"][0])
    return data