mirror of
https://github.com/tubearchivist/tubearchivist-frontend.git
synced 2024-11-22 20:00:15 +00:00
add playlists to reindex class
This commit is contained in:
parent
220d020c76
commit
9fceb98f13
@ -23,7 +23,12 @@ from home.src.helper import (
|
|||||||
get_total_hits,
|
get_total_hits,
|
||||||
ignore_filelist,
|
ignore_filelist,
|
||||||
)
|
)
|
||||||
from home.src.index import YoutubeChannel, YoutubeVideo, index_new_video
|
from home.src.index import (
|
||||||
|
YoutubeChannel,
|
||||||
|
YoutubePlaylist,
|
||||||
|
YoutubeVideo,
|
||||||
|
index_new_video,
|
||||||
|
)
|
||||||
from home.src.thumbnails import ThumbManager
|
from home.src.thumbnails import ThumbManager
|
||||||
|
|
||||||
|
|
||||||
@ -38,9 +43,9 @@ class Reindex:
|
|||||||
self.es_auth = config["application"]["es_auth"]
|
self.es_auth = config["application"]["es_auth"]
|
||||||
self.refresh_interval = 90
|
self.refresh_interval = 90
|
||||||
# scan
|
# scan
|
||||||
self.video_daily, self.channel_daily = self.get_daily()
|
|
||||||
self.all_youtube_ids = False
|
self.all_youtube_ids = False
|
||||||
self.all_channel_ids = False
|
self.all_channel_ids = False
|
||||||
|
self.all_playlist_ids = False
|
||||||
|
|
||||||
def get_daily(self):
|
def get_daily(self):
|
||||||
"""get daily refresh values"""
|
"""get daily refresh values"""
|
||||||
@ -52,14 +57,16 @@ class Reindex:
|
|||||||
"ta_channel", self.es_url, self.es_auth, "channel_active"
|
"ta_channel", self.es_url, self.es_auth, "channel_active"
|
||||||
)
|
)
|
||||||
channel_daily = ceil(total_channels / self.refresh_interval * 1.2)
|
channel_daily = ceil(total_channels / self.refresh_interval * 1.2)
|
||||||
return (video_daily, channel_daily)
|
playlist_daily = get_total_hits(
|
||||||
|
"ta_playlist", self.es_url, self.es_auth, "playlist_active"
|
||||||
|
)
|
||||||
|
return (video_daily, channel_daily, playlist_daily)
|
||||||
|
|
||||||
def get_outdated_vids(self):
|
def get_outdated_vids(self, size):
|
||||||
"""get daily videos to refresh"""
|
"""get daily videos to refresh"""
|
||||||
headers = {"Content-type": "application/json"}
|
headers = {"Content-type": "application/json"}
|
||||||
now = int(datetime.now().strftime("%s"))
|
now = int(datetime.now().strftime("%s"))
|
||||||
now_3m = now - 3 * 30 * 24 * 60 * 60
|
now_3m = now - 3 * 30 * 24 * 60 * 60
|
||||||
size = self.video_daily
|
|
||||||
data = {
|
data = {
|
||||||
"size": size,
|
"size": size,
|
||||||
"query": {
|
"query": {
|
||||||
@ -84,12 +91,11 @@ class Reindex:
|
|||||||
all_youtube_ids = [i["_id"] for i in response_dict["hits"]["hits"]]
|
all_youtube_ids = [i["_id"] for i in response_dict["hits"]["hits"]]
|
||||||
return all_youtube_ids
|
return all_youtube_ids
|
||||||
|
|
||||||
def get_outdated_channels(self):
|
def get_outdated_channels(self, size):
|
||||||
"""get daily channels to refresh"""
|
"""get daily channels to refresh"""
|
||||||
headers = {"Content-type": "application/json"}
|
headers = {"Content-type": "application/json"}
|
||||||
now = int(datetime.now().strftime("%s"))
|
now = int(datetime.now().strftime("%s"))
|
||||||
now_3m = now - 3 * 30 * 24 * 60 * 60
|
now_3m = now - 3 * 30 * 24 * 60 * 60
|
||||||
size = self.channel_daily
|
|
||||||
data = {
|
data = {
|
||||||
"size": size,
|
"size": size,
|
||||||
"query": {
|
"query": {
|
||||||
@ -114,10 +120,41 @@ class Reindex:
|
|||||||
all_channel_ids = [i["_id"] for i in response_dict["hits"]["hits"]]
|
all_channel_ids = [i["_id"] for i in response_dict["hits"]["hits"]]
|
||||||
return all_channel_ids
|
return all_channel_ids
|
||||||
|
|
||||||
|
def get_outdated_playlists(self, size):
|
||||||
|
"""get daily outdated playlists to refresh"""
|
||||||
|
headers = {"Content-type": "application/json"}
|
||||||
|
now = int(datetime.now().strftime("%s"))
|
||||||
|
now_3m = now - 3 * 30 * 24 * 60 * 60
|
||||||
|
data = {
|
||||||
|
"size": size,
|
||||||
|
"query": {
|
||||||
|
"bool": {
|
||||||
|
"must": [
|
||||||
|
{"match": {"playlist_active": True}},
|
||||||
|
{"range": {"playlist_last_refresh": {"lte": now_3m}}},
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"sort": [{"playlist_last_refresh": {"order": "asc"}}],
|
||||||
|
"_source": False,
|
||||||
|
}
|
||||||
|
query_str = json.dumps(data)
|
||||||
|
url = self.es_url + "/ta_playlist/_search"
|
||||||
|
response = requests.get(
|
||||||
|
url, data=query_str, headers=headers, auth=self.es_auth
|
||||||
|
)
|
||||||
|
if not response.ok:
|
||||||
|
print(response.text)
|
||||||
|
response_dict = json.loads(response.text)
|
||||||
|
all_playlist_ids = [i["_id"] for i in response_dict["hits"]["hits"]]
|
||||||
|
return all_playlist_ids
|
||||||
|
|
||||||
def check_outdated(self):
|
def check_outdated(self):
|
||||||
"""add missing vids and channels"""
|
"""add missing vids and channels"""
|
||||||
self.all_youtube_ids = self.get_outdated_vids()
|
video_daily, channel_daily, playlist_daily = self.get_daily()
|
||||||
self.all_channel_ids = self.get_outdated_channels()
|
self.all_youtube_ids = self.get_outdated_vids(video_daily)
|
||||||
|
self.all_channel_ids = self.get_outdated_channels(channel_daily)
|
||||||
|
self.all_playlist_ids = self.get_outdated_playlists(playlist_daily)
|
||||||
|
|
||||||
def rescrape_all_channels(self):
|
def rescrape_all_channels(self):
|
||||||
"""sync new data from channel to all matching videos"""
|
"""sync new data from channel to all matching videos"""
|
||||||
@ -163,11 +200,18 @@ class Reindex:
|
|||||||
date_downloaded = es_vid_dict["_source"]["date_downloaded"]
|
date_downloaded = es_vid_dict["_source"]["date_downloaded"]
|
||||||
channel_dict = es_vid_dict["_source"]["channel"]
|
channel_dict = es_vid_dict["_source"]["channel"]
|
||||||
channel_name = channel_dict["channel_name"]
|
channel_name = channel_dict["channel_name"]
|
||||||
|
try:
|
||||||
|
playlist = es_vid_dict["_source"]["playlist"]
|
||||||
|
except KeyError:
|
||||||
|
playlist = False
|
||||||
|
|
||||||
vid_handler.build_file_path(channel_name)
|
vid_handler.build_file_path(channel_name)
|
||||||
# add to vid_dict
|
# add to vid_dict
|
||||||
vid_handler.vid_dict["player"] = player
|
vid_handler.vid_dict["player"] = player
|
||||||
vid_handler.vid_dict["date_downloaded"] = date_downloaded
|
vid_handler.vid_dict["date_downloaded"] = date_downloaded
|
||||||
vid_handler.vid_dict["channel"] = channel_dict
|
vid_handler.vid_dict["channel"] = channel_dict
|
||||||
|
if playlist:
|
||||||
|
vid_handler.vid_dict["playlist"] = playlist
|
||||||
# update
|
# update
|
||||||
vid_handler.upload_to_es()
|
vid_handler.upload_to_es()
|
||||||
thumb_handler = ThumbManager()
|
thumb_handler = ThumbManager()
|
||||||
@ -194,6 +238,17 @@ class Reindex:
|
|||||||
to_download = (channel_id, channel_thumb, channel_banner)
|
to_download = (channel_id, channel_thumb, channel_banner)
|
||||||
thumb_handler.download_chan([to_download])
|
thumb_handler.download_chan([to_download])
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def reindex_single_playlist(playlist_id, all_indexed_ids):
|
||||||
|
"""refresh playlist data"""
|
||||||
|
playlist_handler = YoutubePlaylist(
|
||||||
|
playlist_id, all_youtube_ids=all_indexed_ids
|
||||||
|
)
|
||||||
|
playlist = playlist_handler.update_playlist()
|
||||||
|
playlist_thumbnail = (playlist_id, playlist["playlist_thumbnail"])
|
||||||
|
thumb_handler = ThumbManager()
|
||||||
|
thumb_handler.download_playlist([playlist_thumbnail])
|
||||||
|
|
||||||
def reindex(self):
|
def reindex(self):
|
||||||
"""reindex what's needed"""
|
"""reindex what's needed"""
|
||||||
# videos
|
# videos
|
||||||
@ -208,6 +263,15 @@ class Reindex:
|
|||||||
self.reindex_single_channel(channel_id)
|
self.reindex_single_channel(channel_id)
|
||||||
if self.sleep_interval:
|
if self.sleep_interval:
|
||||||
sleep(self.sleep_interval)
|
sleep(self.sleep_interval)
|
||||||
|
# playlist
|
||||||
|
print(f"reindexing {len(self.all_playlist_ids)} playlists")
|
||||||
|
if self.all_playlist_ids:
|
||||||
|
all_indexed = PendingList().get_all_indexed()
|
||||||
|
all_indexed_ids = [i["youtube_id"] for i in all_indexed]
|
||||||
|
for playlist_id in self.all_playlist_ids:
|
||||||
|
self.reindex_single_playlist(playlist_id, all_indexed_ids)
|
||||||
|
if self.sleep_interval:
|
||||||
|
sleep(self.sleep_interval)
|
||||||
|
|
||||||
|
|
||||||
class FilesystemScanner:
|
class FilesystemScanner:
|
||||||
|
Loading…
Reference in New Issue
Block a user