use new IndexPaginate class for get_all_indexed videos

2025-07-25 18:28:16 +00:00 · 2021-11-18 12:16:21 +07:00 · 2021-11-18 12:16:21 +07:00 · f371a03cc7
commit f371a03cc7
parent 509b0097fe
4 changed files with 14 additions and 47 deletions
--- a/tubearchivist/home/src/download.py
+++ b/tubearchivist/home/src/download.py
@ -191,47 +191,16 @@ class PendingList:

        return all_pending, all_ignore

-    def get_all_indexed(self):
+    @staticmethod
+    def get_all_indexed():
        """get a list of all videos indexed"""
-        headers = {"Content-type": "application/json"}
-        # get PIT ID
-        url = self.ES_URL + "/ta_video/_pit?keep_alive=1m"
-        response = requests.post(url, auth=self.ES_AUTH)
-        json_data = json.loads(response.text)
-        pit_id = json_data["id"]
-        # query
+
        data = {
-            "size": 500,
            "query": {"match_all": {}},
-            "pit": {"id": pit_id, "keep_alive": "1m"},
            "sort": [{"published": {"order": "desc"}}],
        }
-        query_str = json.dumps(data)
-        url = self.ES_URL + "/_search"
-        all_indexed = []
-        while True:
-            response = requests.get(
-                url, data=query_str, headers=headers, auth=self.ES_AUTH
-            )
-            json_data = json.loads(response.text)
-            all_hits = json_data["hits"]["hits"]
-            if all_hits:
-                for hit in all_hits:
-                    all_indexed.append(hit)
-                    search_after = hit["sort"]
-                # update search_after with last hit data
-                data["search_after"] = search_after
-                query_str = json.dumps(data)
-            else:
-                break
-        # clean up PIT
-        query_str = json.dumps({"id": pit_id})
-        requests.delete(
-            self.ES_URL + "/_pit",
-            data=query_str,
-            headers=headers,
-            auth=self.ES_AUTH,
-        )
+        all_indexed = IndexPaginate("ta_video", data).get_results()
+
        return all_indexed

    def get_all_downloaded(self):
--- a/tubearchivist/home/src/reindex.py
+++ b/tubearchivist/home/src/reindex.py
@ -249,10 +249,10 @@ class FilesystemScanner:
        all_indexed_raw = index_handler.get_all_indexed()
        all_indexed = []
        for video in all_indexed_raw:
-            youtube_id = video["_id"]
-            media_url = video["_source"]["media_url"]
-            published = video["_source"]["published"]
-            title = video["_source"]["title"]
+            youtube_id = video["youtube_id"]
+            media_url = video["media_url"]
+            published = video["published"]
+            title = video["title"]
            all_indexed.append((youtube_id, media_url, published, title))
        return all_indexed

--- a/tubearchivist/home/src/thumbnails.py
+++ b/tubearchivist/home/src/thumbnails.py
@ -60,8 +60,8 @@ class ThumbManager:

        needed_thumbs = []
        for video in all_indexed:
-            youtube_id = video["_source"]["youtube_id"]
-            thumb_url = video["_source"]["vid_thumb_url"]
+            youtube_id = video["youtube_id"]
+            thumb_url = video["vid_thumb_url"]
            if missing_only:
                if youtube_id + ".jpg" not in all_thumbs:
                    needed_thumbs.append((youtube_id, thumb_url))
@ -277,10 +277,8 @@ class ThumbManager:
        all_indexed = download.PendingList().get_all_indexed()
        video_list = []
        for video in all_indexed:
-            youtube_id = video["_source"]["youtube_id"]
-            media_url = os.path.join(
-                self.MEDIA_DIR, video["_source"]["media_url"]
-            )
+            youtube_id = video["youtube_id"]
+            media_url = os.path.join(self.MEDIA_DIR, video["media_url"])
            thumb_path = os.path.join(
                self.CACHE_DIR, self.vid_thumb_path(youtube_id)
            )
--- a/tubearchivist/home/tasks.py
+++ b/tubearchivist/home/tasks.py
@ -209,7 +209,7 @@ def index_channel_playlists(channel_id):
    all_playlists = channel_handler.get_all_playlists()

    all_indexed = PendingList().get_all_indexed()
-    all_youtube_ids = [i["_source"]["youtube_id"] for i in all_indexed]
+    all_youtube_ids = [i["youtube_id"] for i in all_indexed]

    for playlist_id, playlist_title in all_playlists:
        print("add playlist: " + playlist_title)