feat: parse entities (v1.2.0)

master
wlt233 2 months ago
parent 8019920dfb
commit 78e7c6decb

1
.gitignore vendored

@ -1,5 +1,6 @@
config.json config.json
chromedriver.exe chromedriver.exe
lovelive.json
# Byte-compiled / optimized / DLL files # Byte-compiled / optimized / DLL files

@ -127,6 +127,16 @@ def parse_media(media):
if variants: data["video"] = variants[0]["url"] if variants: data["video"] = variants[0]["url"]
return data return data
def parse_entities(entity):
data = {
"text": "",
"indices": entity["indices"]
}
if "name" in entity: data["text"] = "@" + entity["name"]
if "text" in entity: data["text"] = "#" + entity["text"]
if "display_url" in entity: data["text"] = entity["display_url"]
return data
def parse_tweet(tweet): def parse_tweet(tweet):
data = { data = {
"rest_id": tweet["rest_id"], "rest_id": tweet["rest_id"],
@ -138,11 +148,19 @@ def parse_tweet(tweet):
"created_at": tweet["legacy"]["created_at"], "created_at": tweet["legacy"]["created_at"],
"timestamp": int(datetime.strptime(tweet["legacy"]["created_at"], '%a %b %d %H:%M:%S %z %Y').timestamp()), "timestamp": int(datetime.strptime(tweet["legacy"]["created_at"], '%a %b %d %H:%M:%S %z %Y').timestamp()),
"media": [], "media": [],
"entities": [],
"quoted": {}, "quoted": {},
"retweeted": {} "retweeted": {}
} }
for m in tweet["legacy"]["entities"].get("media", []): for m in tweet["legacy"]["entities"].get("media", []):
data["media"].append(parse_media(m)) data["media"].append(parse_media(m))
for e in ["user_mentions", "hashtags", "urls"]:
for m in tweet["legacy"]["entities"].get(e, []):
data["entities"].append(parse_entities(m))
data["entities"].sort(key=lambda x: x["indices"][0])
return data return data

Loading…
Cancel
Save