|
|
|
@ -126,7 +126,17 @@ def parse_media(media):
|
|
|
|
|
variants.sort(key=lambda x: x["bitrate"], reverse=True)
|
|
|
|
|
if variants: data["video"] = variants[0]["url"]
|
|
|
|
|
return data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def parse_entities(entity):
|
|
|
|
|
data = {
|
|
|
|
|
"text": "",
|
|
|
|
|
"indices": entity["indices"]
|
|
|
|
|
}
|
|
|
|
|
if "name" in entity: data["text"] = "@" + entity["name"]
|
|
|
|
|
if "text" in entity: data["text"] = "#" + entity["text"]
|
|
|
|
|
if "display_url" in entity: data["text"] = entity["display_url"]
|
|
|
|
|
return data
|
|
|
|
|
|
|
|
|
|
def parse_tweet(tweet):
|
|
|
|
|
data = {
|
|
|
|
|
"rest_id": tweet["rest_id"],
|
|
|
|
@ -138,11 +148,19 @@ def parse_tweet(tweet):
|
|
|
|
|
"created_at": tweet["legacy"]["created_at"],
|
|
|
|
|
"timestamp": int(datetime.strptime(tweet["legacy"]["created_at"], '%a %b %d %H:%M:%S %z %Y').timestamp()),
|
|
|
|
|
"media": [],
|
|
|
|
|
"entities": [],
|
|
|
|
|
"quoted": {},
|
|
|
|
|
"retweeted": {}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for m in tweet["legacy"]["entities"].get("media", []):
|
|
|
|
|
data["media"].append(parse_media(m))
|
|
|
|
|
|
|
|
|
|
for e in ["user_mentions", "hashtags", "urls"]:
|
|
|
|
|
for m in tweet["legacy"]["entities"].get(e, []):
|
|
|
|
|
data["entities"].append(parse_entities(m))
|
|
|
|
|
data["entities"].sort(key=lambda x: x["indices"][0])
|
|
|
|
|
|
|
|
|
|
return data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|