From f371a03cc76d6e107e4b25b6f76f57f7e530cc50 Mon Sep 17 00:00:00 2001 From: simon Date: Thu, 18 Nov 2021 12:16:21 +0700 Subject: [PATCH] use new IndexPaginate class for get_all_indexed videos --- tubearchivist/home/src/download.py | 41 ++++------------------------ tubearchivist/home/src/reindex.py | 8 +++--- tubearchivist/home/src/thumbnails.py | 10 +++---- tubearchivist/home/tasks.py | 2 +- 4 files changed, 14 insertions(+), 47 deletions(-) diff --git a/tubearchivist/home/src/download.py b/tubearchivist/home/src/download.py index 3c96ec7..ede7ac6 100644 --- a/tubearchivist/home/src/download.py +++ b/tubearchivist/home/src/download.py @@ -191,47 +191,16 @@ class PendingList: return all_pending, all_ignore - def get_all_indexed(self): + @staticmethod + def get_all_indexed(): """get a list of all videos indexed""" - headers = {"Content-type": "application/json"} - # get PIT ID - url = self.ES_URL + "/ta_video/_pit?keep_alive=1m" - response = requests.post(url, auth=self.ES_AUTH) - json_data = json.loads(response.text) - pit_id = json_data["id"] - # query + data = { - "size": 500, "query": {"match_all": {}}, - "pit": {"id": pit_id, "keep_alive": "1m"}, "sort": [{"published": {"order": "desc"}}], } - query_str = json.dumps(data) - url = self.ES_URL + "/_search" - all_indexed = [] - while True: - response = requests.get( - url, data=query_str, headers=headers, auth=self.ES_AUTH - ) - json_data = json.loads(response.text) - all_hits = json_data["hits"]["hits"] - if all_hits: - for hit in all_hits: - all_indexed.append(hit) - search_after = hit["sort"] - # update search_after with last hit data - data["search_after"] = search_after - query_str = json.dumps(data) - else: - break - # clean up PIT - query_str = json.dumps({"id": pit_id}) - requests.delete( - self.ES_URL + "/_pit", - data=query_str, - headers=headers, - auth=self.ES_AUTH, - ) + all_indexed = IndexPaginate("ta_video", data).get_results() + return all_indexed def get_all_downloaded(self): diff --git a/tubearchivist/home/src/reindex.py b/tubearchivist/home/src/reindex.py index 8b0366e..2edf217 100644 --- a/tubearchivist/home/src/reindex.py +++ b/tubearchivist/home/src/reindex.py @@ -249,10 +249,10 @@ class FilesystemScanner: all_indexed_raw = index_handler.get_all_indexed() all_indexed = [] for video in all_indexed_raw: - youtube_id = video["_id"] - media_url = video["_source"]["media_url"] - published = video["_source"]["published"] - title = video["_source"]["title"] + youtube_id = video["youtube_id"] + media_url = video["media_url"] + published = video["published"] + title = video["title"] all_indexed.append((youtube_id, media_url, published, title)) return all_indexed diff --git a/tubearchivist/home/src/thumbnails.py b/tubearchivist/home/src/thumbnails.py index 5c3c7a5..138cef0 100644 --- a/tubearchivist/home/src/thumbnails.py +++ b/tubearchivist/home/src/thumbnails.py @@ -60,8 +60,8 @@ class ThumbManager: needed_thumbs = [] for video in all_indexed: - youtube_id = video["_source"]["youtube_id"] - thumb_url = video["_source"]["vid_thumb_url"] + youtube_id = video["youtube_id"] + thumb_url = video["vid_thumb_url"] if missing_only: if youtube_id + ".jpg" not in all_thumbs: needed_thumbs.append((youtube_id, thumb_url)) @@ -277,10 +277,8 @@ class ThumbManager: all_indexed = download.PendingList().get_all_indexed() video_list = [] for video in all_indexed: - youtube_id = video["_source"]["youtube_id"] - media_url = os.path.join( - self.MEDIA_DIR, video["_source"]["media_url"] - ) + youtube_id = video["youtube_id"] + media_url = os.path.join(self.MEDIA_DIR, video["media_url"]) thumb_path = os.path.join( self.CACHE_DIR, self.vid_thumb_path(youtube_id) ) diff --git a/tubearchivist/home/tasks.py b/tubearchivist/home/tasks.py index 3be1660..92608fb 100644 --- a/tubearchivist/home/tasks.py +++ b/tubearchivist/home/tasks.py @@ -209,7 +209,7 @@ def index_channel_playlists(channel_id): all_playlists = channel_handler.get_all_playlists() all_indexed = PendingList().get_all_indexed() - all_youtube_ids = [i["_source"]["youtube_id"] for i in all_indexed] + all_youtube_ids = [i["youtube_id"] for i in all_indexed] for playlist_id, playlist_title in all_playlists: print("add playlist: " + playlist_title)