feat: parse entities (v1.2.0)

4 months ago · 78e7c6decb
parent 8019920dfb
commit 78e7c6decb
2 changed files with 20 additions and 1 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,5 +1,6 @@
 config.json
 chromedriver.exe
+lovelive.json


 # Byte-compiled / optimized / DLL files
--- a/twitter.py
+++ b/twitter.py
@ -126,7 +126,17 @@ def parse_media(media):
        variants.sort(key=lambda x: x["bitrate"], reverse=True)
        if variants: data["video"] = variants[0]["url"]
    return data
-    
+
+def parse_entities(entity):
+    data = {
+        "text": "",
+        "indices": entity["indices"]
+    }
+    if "name" in entity: data["text"] = "@" + entity["name"]
+    if "text" in entity: data["text"] = "#" + entity["text"]
+    if "display_url" in entity: data["text"] = entity["display_url"]
+    return data
+
 def parse_tweet(tweet):
    data = {
        "rest_id": tweet["rest_id"],
@ -138,11 +148,19 @@ def parse_tweet(tweet):
        "created_at": tweet["legacy"]["created_at"],
        "timestamp": int(datetime.strptime(tweet["legacy"]["created_at"], '%a %b %d %H:%M:%S %z %Y').timestamp()),
        "media": [],
+        "entities": [],
        "quoted": {},
        "retweeted": {}
    }
+    
    for m in tweet["legacy"]["entities"].get("media", []):
        data["media"].append(parse_media(m))
+    
+    for e in ["user_mentions", "hashtags", "urls"]:
+        for m in tweet["legacy"]["entities"].get(e, []):
+            data["entities"].append(parse_entities(m))
+    data["entities"].sort(key=lambda x: x["indices"][0])
+    
    return data