Merge remote-tracking branch 'TA/testing' into feat/react-frontend

2025-07-18 23:18:14 +00:00 · 2022-04-20 10:27:27 -07:00 · 2022-04-20 10:27:27 -07:00 · db148dcdf3
commit db148dcdf3
parent 54882b744d 1477370376
5 changed files with 71 additions and 35 deletions
--- a/README.md
+++ b/README.md
@ -134,7 +134,9 @@ chown 1000:0 /path/to/mount/point
 This will match the permissions with the **UID** and **GID** of elasticsearch within the container and should fix the issue.
 ### Disk usage
-The Elasticsearch index will turn to *read only* if the disk usage of the container goes above 95% until the usage drops below 90% again. Similar to that, TubeArchivist will become all sorts of messed up when running out of disk space. There are some error messages in the logs when that happens, but it's best to make sure to have enough disk space before starting to download.
+The Elasticsearch index will turn to *read only* if the disk usage of the container goes above 95% until the usage drops below 90% again, you will see error messages like `disk usage exceeded flood-stage watermark`, [link](https://github.com/tubearchivist/tubearchivist#disk-usage).  
 Similar to that, TubeArchivist will become all sorts of messed up when running out of disk space. There are some error messages in the logs when that happens, but it's best to make sure to have enough disk space before starting to download.
 ## Getting Started
 1. Go through the **settings** page and look at the available options. Particularly set *Download Format* to your desired video quality before downloading. **Tube Archivist** downloads the best available quality by default. To support iOS or MacOS and some other browsers a compatible format must be specified. For example:
--- a/tubearchivist/api/README.md
+++ b/tubearchivist/api/README.md
@ -20,6 +20,19 @@ headers = {"Authorization": "Token xxxxxxxxxx"}
 response = requests.get(url, headers=headers)
 ```
 ## Pagination
 The list views return a paginate object with the following keys:
 - page_size: int current page size set in config
 - page_from: int first result idx
 - prev_pages: array of ints of previous pages, if available
 - current_page: int current page from query
 - max_hits: reached: bool if max of 10k results is reached
 - last_page: int of last page link
 - next_pages: array of ints of next pages
 - total_hits: int total results
 Pass page number as a query parameter: `page=2`. Defaults to *0*, `page=1` is redundant and falls back to *0*. If a page query doesn't return any results, you'll get `HTTP 404 Not Found`.
 ## Login View
 Return token and user ID for username and password:  
 POST /api/login
--- a/tubearchivist/api/views.py
+++ b/tubearchivist/api/views.py
@ -3,6 +3,7 @@
 from api.src.search_processor import SearchProcess
 from home.src.download.queue import PendingInteract
 from home.src.es.connect import ElasticWrap
 from home.src.index.generic import Pagination
 from home.src.index.video import SponsorBlock
 from home.src.ta.config import AppConfig
 from home.src.ta.helper import UrlListParser
@ -25,12 +26,15 @@ class ApiBaseView(APIView):
    authentication_classes = [SessionAuthentication, TokenAuthentication]
    permission_classes = [IsAuthenticated]
    search_base = False
    data = False
    def __init__(self):
        super().__init__()
        self.response = {"data": False, "config": AppConfig().config}
        self.data = {"query": {"match_all": {}}}
        self.status_code = False
        self.context = False
        self.pagination_handler = False
    def get_document(self, document_id):
        """get single document from es"""
@ -44,20 +48,33 @@ class ApiBaseView(APIView):
            self.response["data"] = False
        self.status_code = status_code
-    def get_paginate(self):
+    def initiate_pagination(self, request):
-        """add pagination detail to response"""
+        """set initial pagination values"""
-        self.response["paginate"] = False
+        user_id = request.user.id
        page_get = int(request.GET.get("page", 0))
        self.pagination_handler = Pagination(page_get, user_id)
        self.data.update(
            {
                "size": self.pagination_handler.pagination["page_size"],
                "from": self.pagination_handler.pagination["page_from"],
            }
        )
-    def get_document_list(self, data):
+    def get_document_list(self, request):
        """get a list of results"""
        print(self.search_base)
-        response, status_code = ElasticWrap(self.search_base).get(data=data)
+        self.initiate_pagination(request)
        es_handler = ElasticWrap(self.search_base)
        response, status_code = es_handler.get(data=self.data)
        self.response["data"] = SearchProcess(response).process()
        if self.response["data"]:
            self.status_code = status_code
        else:
            self.status_code = 404
        self.pagination_handler.validate(response["hits"]["total"]["value"])
        self.response["paginate"] = self.pagination_handler.pagination
 class VideoApiView(ApiBaseView):
    """resolves to /api/video/<video_id>/
@ -81,11 +98,9 @@ class VideoApiListView(ApiBaseView):
    search_base = "ta_video/_search/"
    def get(self, request):
        # pylint: disable=unused-argument
        """get request"""
-        data = {"query": {"match_all": {}}}
+        self.data.update({"sort": [{"published": {"order": "desc"}}]})
-        self.get_document_list(data)
+        self.get_document_list(request)
        self.get_paginate()
        return Response(self.response)
@ -200,11 +215,11 @@ class ChannelApiListView(ApiBaseView):
    search_base = "ta_channel/_search/"
    def get(self, request):
        # pylint: disable=unused-argument
        """get request"""
-        data = {"query": {"match_all": {}}}
+        self.get_document_list(request)
-        self.get_document_list(data)
+        self.data.update(
-        self.get_paginate()
+            {"sort": [{"channel_name.keyword": {"order": "asc"}}]}
        )
        return Response(self.response)
@ -234,13 +249,16 @@ class ChannelApiVideoView(ApiBaseView):
    search_base = "ta_video/_search/"
    def get(self, request, channel_id):
        # pylint: disable=unused-argument
        """handle get request"""
-        data = {
+        self.data.update(
-            "query": {"term": {"channel.channel_id": {"value": channel_id}}}
+            {
                "query": {
                    "term": {"channel.channel_id": {"value": channel_id}}
                },
                "sort": [{"published": {"order": "desc"}}],
            }
-        self.get_document_list(data)
+        )
-        self.get_paginate()
+        self.get_document_list(request)
        return Response(self.response, status=self.status_code)
@ -253,11 +271,11 @@ class PlaylistApiListView(ApiBaseView):
    search_base = "ta_playlist/_search/"
    def get(self, request):
        # pylint: disable=unused-argument
        """handle get request"""
-        data = {"query": {"match_all": {}}}
+        self.data.update(
-        self.get_document_list(data)
+            {"sort": [{"playlist_name.keyword": {"order": "asc"}}]}
-        self.get_paginate()
+        )
        self.get_document_list(request)
        return Response(self.response)
@ -283,13 +301,13 @@ class PlaylistApiVideoView(ApiBaseView):
    search_base = "ta_video/_search/"
    def get(self, request, playlist_id):
        # pylint: disable=unused-argument
        """handle get request"""
-        data = {
+        self.data["query"] = {
-            "query": {"term": {"playlist.keyword": {"value": playlist_id}}}
+            "term": {"playlist.keyword": {"value": playlist_id}}
        }
-        self.get_document_list(data)
+        self.data.update({"sort": [{"published": {"order": "desc"}}]})
-        self.get_paginate()
+
        self.get_document_list(request)
        return Response(self.response, status=self.status_code)
@ -344,11 +362,9 @@ class DownloadApiListView(ApiBaseView):
    valid_filter = ["pending", "ignore"]
    def get(self, request):
        # pylint: disable=unused-argument
        """get request"""
        query_filter = request.GET.get("filter", False)
-        data = {
+        self.data.update = {
            "query": {"match_all": {}},
            "sort": [{"timestamp": {"order": "asc"}}],
        }
        if query_filter:
@ -357,10 +373,9 @@ class DownloadApiListView(ApiBaseView):
                print(message)
                return Response({"message": message}, status=400)
-            data["query"] = {"term": {"status": {"value": query_filter}}}
+            self.data["query"] = {"term": {"status": {"value": query_filter}}}
-        self.get_document_list(data)
+        self.get_document_list(request)
        self.get_paginate()
        return Response(self.response)
    @staticmethod
--- a/tubearchivist/home/src/index/generic.py
+++ b/tubearchivist/home/src/index/generic.py
@ -147,3 +147,4 @@ class Pagination:
        ]
        self.pagination["next_pages"] = next_pages
        self.pagination["total_hits"] = total_hits
--- a/tubearchivist/home/src/index/video.py
+++ b/tubearchivist/home/src/index/video.py
@ -195,6 +195,11 @@ class SubtitleParser:
            if flatten:
                # fix overlapping retiming issue
                if "dDurationMs" not in flatten[-1]:
                    # some events won't have a duration
                    print(f"failed to parse event without duration: {event}")
                    continue
                last_end = flatten[-1]["tStartMs"] + flatten[-1]["dDurationMs"]
                if event["tStartMs"] < last_end:
                    joined = flatten[-1]["segs"][0]["utf8"] + "\n" + text