From 40bb3e880ecc18a180144051f32f11db0cf00597 Mon Sep 17 00:00:00 2001 From: simon Date: Sun, 17 Apr 2022 19:15:40 +0700 Subject: [PATCH 01/10] API: implement status update and delete of item in queue --- tubearchivist/api/README.md | 19 ++++++++++++++++++- tubearchivist/api/views.py | 29 ++++++++++++++++++++++++++++- 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/tubearchivist/api/README.md b/tubearchivist/api/README.md index f6e10f4..2067331 100644 --- a/tubearchivist/api/README.md +++ b/tubearchivist/api/README.md @@ -149,8 +149,25 @@ POST /api/download/ ``` ## Download Queue Item View -/api/download/\/ +GET /api/download/\/ +POST /api/download/\/ +Ignore video in download queue: +```json +{ + "status": "ignore" +} +``` + +Add to queue previously ignored video: +```json +{ + "status": "pending" +} +``` + +DELETE /api/download/\/ +Forget or delete from download queue ## Ping View Validate your connection with the API diff --git a/tubearchivist/api/views.py b/tubearchivist/api/views.py index 7cd7a2a..a610c56 100644 --- a/tubearchivist/api/views.py +++ b/tubearchivist/api/views.py @@ -1,11 +1,12 @@ """all API views""" from api.src.search_processor import SearchProcess +from home.src.download.queue import PendingInteract from home.src.es.connect import ElasticWrap from home.src.index.video import SponsorBlock from home.src.ta.config import AppConfig from home.src.ta.helper import UrlListParser -from home.src.ta.ta_redis import RedisArchivist +from home.src.ta.ta_redis import RedisArchivist, RedisQueue from home.tasks import extrac_dl, subscribe_to from rest_framework.authentication import ( SessionAuthentication, @@ -295,9 +296,12 @@ class PlaylistApiVideoView(ApiBaseView): class DownloadApiView(ApiBaseView): """resolves to /api/download// GET: returns metadata dict of an item in the download queue + POST: update status of item to pending or ignore + DELETE: forget from download queue """ search_base = "ta_download/_doc/" + valid_status = ["pending", "ignore"] def get(self, request, video_id): # pylint: disable=unused-argument @@ -305,6 +309,29 @@ class DownloadApiView(ApiBaseView): self.get_document(video_id) return Response(self.response, status=self.status_code) + def post(self, request, video_id): + """post to video to change status""" + item_status = request.data["status"] + if item_status not in self.valid_status: + message = f"{video_id}: invalid status {item_status}" + print(message) + return Response({"message": message}, status=400) + + print(f"{video_id}: change status to {item_status}") + PendingInteract(video_id=video_id, status=item_status).update_status() + RedisQueue().clear_item(video_id) + + return Response(request.data) + + @staticmethod + def delete(request, video_id): + # pylint: disable=unused-argument + """delete single video from queue""" + print(f"{video_id}: delete from queue") + PendingInteract(video_id=video_id).delete_item() + + return Response({"success": True}) + class DownloadApiListView(ApiBaseView): """resolves to /api/download/ From d086f63861b78b4f6a3309ce6a16c3ca015eb4ff Mon Sep 17 00:00:00 2001 From: simon Date: Sun, 17 Apr 2022 20:10:49 +0700 Subject: [PATCH 02/10] API: sort and query filter download view, delete by filter --- tubearchivist/api/README.md | 9 ++++++++- tubearchivist/api/views.py | 30 +++++++++++++++++++++++++++++- 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/tubearchivist/api/README.md b/tubearchivist/api/README.md index 2067331..c635b11 100644 --- a/tubearchivist/api/README.md +++ b/tubearchivist/api/README.md @@ -136,7 +136,10 @@ POST /api/channel/ /api/playlist/\/video/ ## Download Queue List View -/api/download/ +GET /api/download/ + +Parameter: +- filter: pending, ignore ### Add list of videos to download queue POST /api/download/ @@ -148,6 +151,10 @@ POST /api/download/ } ``` +### Delete download queue items by filter +DELETE /api/download/?filter=ignore +DELETE /api/download/?filter=pending + ## Download Queue Item View GET /api/download/\/ POST /api/download/\/ diff --git a/tubearchivist/api/views.py b/tubearchivist/api/views.py index a610c56..a30484e 100644 --- a/tubearchivist/api/views.py +++ b/tubearchivist/api/views.py @@ -337,14 +337,28 @@ class DownloadApiListView(ApiBaseView): """resolves to /api/download/ GET: returns latest videos in the download queue POST: add a list of videos to download queue + DELETE: remove items based on query filter """ search_base = "ta_download/_search/" + valid_filter = ["pending", "ignore"] def get(self, request): # pylint: disable=unused-argument """get request""" - data = {"query": {"match_all": {}}} + query_filter = request.GET.get("filter", False) + data = { + "query": {"match_all": {}}, + "sort": [{"timestamp": {"order": "asc"}}], + } + if query_filter: + if query_filter not in self.valid_filter: + message = f"invalid url query filder: {query_filter}" + print(message) + return Response({"message": message}, status=400) + + data["query"] = {"term": {"status": {"value": query_filter}}} + self.get_document_list(data) self.get_paginate() return Response(self.response) @@ -374,6 +388,20 @@ class DownloadApiListView(ApiBaseView): return Response(data) + def delete(self, request): + """delete download queue""" + query_filter = request.GET.get("filter", False) + if query_filter not in self.valid_filter: + message = f"invalid url query filter: {query_filter}" + print(message) + return Response({"message": message}, status=400) + + message = f"delete queue by status: {query_filter}" + print(message) + PendingInteract(status=query_filter).delete_by_status() + + return Response({"message": message}) + class PingView(ApiBaseView): """resolves to /api/ping/ From 3147df20da96efd01e23bdd27300176bdeba2177 Mon Sep 17 00:00:00 2001 From: simon Date: Mon, 18 Apr 2022 11:52:13 +0700 Subject: [PATCH 03/10] skip subtitle segments without duration, take 2 --- tubearchivist/home/src/index/video.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tubearchivist/home/src/index/video.py b/tubearchivist/home/src/index/video.py index 969d716..290e0ce 100644 --- a/tubearchivist/home/src/index/video.py +++ b/tubearchivist/home/src/index/video.py @@ -195,6 +195,11 @@ class SubtitleParser: if flatten: # fix overlapping retiming issue + if "dDurationMs" not in flatten[-1]: + # some events won't have a duration + print(f"failed to parse event without duration: {event}") + continue + last_end = flatten[-1]["tStartMs"] + flatten[-1]["dDurationMs"] if event["tStartMs"] < last_end: joined = flatten[-1]["segs"][0]["utf8"] + "\n" + text From 73052164852550b0f089a31ed58eb696056cd02a Mon Sep 17 00:00:00 2001 From: simon Date: Tue, 19 Apr 2022 08:07:47 +0700 Subject: [PATCH 04/10] add link to ES documentation for disk usage --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index d3ee806..16a6112 100644 --- a/README.md +++ b/README.md @@ -134,7 +134,9 @@ chown 1000:0 /path/to/mount/point This will match the permissions with the **UID** and **GID** of elasticsearch within the container and should fix the issue. ### Disk usage -The Elasticsearch index will turn to *read only* if the disk usage of the container goes above 95% until the usage drops below 90% again. Similar to that, TubeArchivist will become all sorts of messed up when running out of disk space. There are some error messages in the logs when that happens, but it's best to make sure to have enough disk space before starting to download. +The Elasticsearch index will turn to *read only* if the disk usage of the container goes above 95% until the usage drops below 90% again, you will see error messages like `disk usage exceeded flood-stage watermark`, [link](https://github.com/tubearchivist/tubearchivist#disk-usage). + +Similar to that, TubeArchivist will become all sorts of messed up when running out of disk space. There are some error messages in the logs when that happens, but it's best to make sure to have enough disk space before starting to download. ## Getting Started 1. Go through the **settings** page and look at the available options. Particularly set *Download Format* to your desired video quality before downloading. **Tube Archivist** downloads the best available quality by default. To support iOS or MacOS and some other browsers a compatible format must be specified. For example: From 04fc6ed26a9df4d4c12142397175c615eb0e0972 Mon Sep 17 00:00:00 2001 From: simon Date: Wed, 20 Apr 2022 22:43:07 +0700 Subject: [PATCH 05/10] API: add pagination --- tubearchivist/api/README.md | 13 ++++ tubearchivist/api/views.py | 82 +++++++++++++++---------- tubearchivist/home/src/index/generic.py | 1 + 3 files changed, 62 insertions(+), 34 deletions(-) diff --git a/tubearchivist/api/README.md b/tubearchivist/api/README.md index c635b11..e2bed71 100644 --- a/tubearchivist/api/README.md +++ b/tubearchivist/api/README.md @@ -20,6 +20,19 @@ headers = {"Authorization": "Token xxxxxxxxxx"} response = requests.get(url, headers=headers) ``` +## Pagination +The list views return a paginate object with the following keys: +- page_size: int current page size set in config +- page_from: int first result idx +- prev_pages: array of ints of previous pages, if available +- current_page: int current page from query +- max_hits: reached: bool if max of 10k results is reached +- last_page: int of last page link +- next_pages: array of ints of next pages +- total_hits: int total results + +Pass page number as a query parameter: `page=2`. Defaults to *0*, `page=1` is redundant and falls back to *0*. If a page query doesn't return any results, you'll get `HTTP 404 Not Found`. + ## Login View Return token and user ID for username and password: POST /api/login diff --git a/tubearchivist/api/views.py b/tubearchivist/api/views.py index a30484e..ea60424 100644 --- a/tubearchivist/api/views.py +++ b/tubearchivist/api/views.py @@ -3,6 +3,7 @@ from api.src.search_processor import SearchProcess from home.src.download.queue import PendingInteract from home.src.es.connect import ElasticWrap +from home.src.index.generic import Pagination from home.src.index.video import SponsorBlock from home.src.ta.config import AppConfig from home.src.ta.helper import UrlListParser @@ -25,12 +26,14 @@ class ApiBaseView(APIView): authentication_classes = [SessionAuthentication, TokenAuthentication] permission_classes = [IsAuthenticated] search_base = False + data = {"query": {"match_all": {}}} def __init__(self): super().__init__() self.response = {"data": False, "config": AppConfig().config} self.status_code = False self.context = False + self.pagination_handler = False def get_document(self, document_id): """get single document from es""" @@ -44,20 +47,33 @@ class ApiBaseView(APIView): self.response["data"] = False self.status_code = status_code - def get_paginate(self): - """add pagination detail to response""" - self.response["paginate"] = False + def initiate_pagination(self, request): + """set initial pagination values""" + user_id = request.user.id + page_get = int(request.GET.get("page", 0)) + self.pagination_handler = Pagination(page_get, user_id) + self.data.update( + { + "size": self.pagination_handler.pagination["page_size"], + "from": self.pagination_handler.pagination["page_from"], + } + ) - def get_document_list(self, data): + def get_document_list(self, request): """get a list of results""" print(self.search_base) - response, status_code = ElasticWrap(self.search_base).get(data=data) + self.initiate_pagination(request) + es_handler = ElasticWrap(self.search_base) + response, status_code = es_handler.get(data=self.data) self.response["data"] = SearchProcess(response).process() if self.response["data"]: self.status_code = status_code else: self.status_code = 404 + self.pagination_handler.validate(response["hits"]["total"]["value"]) + self.response["paginate"] = self.pagination_handler.pagination + class VideoApiView(ApiBaseView): """resolves to /api/video// @@ -81,11 +97,9 @@ class VideoApiListView(ApiBaseView): search_base = "ta_video/_search/" def get(self, request): - # pylint: disable=unused-argument """get request""" - data = {"query": {"match_all": {}}} - self.get_document_list(data) - self.get_paginate() + self.data.update({"sort": [{"published": {"order": "desc"}}]}) + self.get_document_list(request) return Response(self.response) @@ -200,11 +214,11 @@ class ChannelApiListView(ApiBaseView): search_base = "ta_channel/_search/" def get(self, request): - # pylint: disable=unused-argument """get request""" - data = {"query": {"match_all": {}}} - self.get_document_list(data) - self.get_paginate() + self.get_document_list(request) + self.data.update( + {"sort": [{"channel_name.keyword": {"order": "asc"}}]} + ) return Response(self.response) @@ -234,13 +248,16 @@ class ChannelApiVideoView(ApiBaseView): search_base = "ta_video/_search/" def get(self, request, channel_id): - # pylint: disable=unused-argument """handle get request""" - data = { - "query": {"term": {"channel.channel_id": {"value": channel_id}}} - } - self.get_document_list(data) - self.get_paginate() + self.data.update( + { + "query": { + "term": {"channel.channel_id": {"value": channel_id}} + }, + "sort": [{"published": {"order": "desc"}}], + } + ) + self.get_document_list(request) return Response(self.response, status=self.status_code) @@ -253,11 +270,11 @@ class PlaylistApiListView(ApiBaseView): search_base = "ta_playlist/_search/" def get(self, request): - # pylint: disable=unused-argument """handle get request""" - data = {"query": {"match_all": {}}} - self.get_document_list(data) - self.get_paginate() + self.data.update( + {"sort": [{"playlist_name.keyword": {"order": "asc"}}]} + ) + self.get_document_list(request) return Response(self.response) @@ -283,13 +300,13 @@ class PlaylistApiVideoView(ApiBaseView): search_base = "ta_video/_search/" def get(self, request, playlist_id): - # pylint: disable=unused-argument """handle get request""" - data = { - "query": {"term": {"playlist.keyword": {"value": playlist_id}}} + self.data["query"] = { + "term": {"playlist.keyword": {"value": playlist_id}} } - self.get_document_list(data) - self.get_paginate() + self.data.update({"sort": [{"published": {"order": "desc"}}]}) + + self.get_document_list(request) return Response(self.response, status=self.status_code) @@ -344,11 +361,9 @@ class DownloadApiListView(ApiBaseView): valid_filter = ["pending", "ignore"] def get(self, request): - # pylint: disable=unused-argument """get request""" query_filter = request.GET.get("filter", False) - data = { - "query": {"match_all": {}}, + self.data.update = { "sort": [{"timestamp": {"order": "asc"}}], } if query_filter: @@ -357,10 +372,9 @@ class DownloadApiListView(ApiBaseView): print(message) return Response({"message": message}, status=400) - data["query"] = {"term": {"status": {"value": query_filter}}} + self.data["query"] = {"term": {"status": {"value": query_filter}}} - self.get_document_list(data) - self.get_paginate() + self.get_document_list(request) return Response(self.response) @staticmethod diff --git a/tubearchivist/home/src/index/generic.py b/tubearchivist/home/src/index/generic.py index dcff82b..709dde9 100644 --- a/tubearchivist/home/src/index/generic.py +++ b/tubearchivist/home/src/index/generic.py @@ -147,3 +147,4 @@ class Pagination: ] self.pagination["next_pages"] = next_pages + self.pagination["total_hits"] = total_hits From 1477370376212dc4bfa6c5e80764f10f47fddc51 Mon Sep 17 00:00:00 2001 From: simon Date: Wed, 20 Apr 2022 22:51:30 +0700 Subject: [PATCH 06/10] init data instead of class attribute --- tubearchivist/api/views.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tubearchivist/api/views.py b/tubearchivist/api/views.py index ea60424..0fe1d7f 100644 --- a/tubearchivist/api/views.py +++ b/tubearchivist/api/views.py @@ -26,11 +26,12 @@ class ApiBaseView(APIView): authentication_classes = [SessionAuthentication, TokenAuthentication] permission_classes = [IsAuthenticated] search_base = False - data = {"query": {"match_all": {}}} + data = False def __init__(self): super().__init__() self.response = {"data": False, "config": AppConfig().config} + self.data = {"query": {"match_all": {}}} self.status_code = False self.context = False self.pagination_handler = False From b76f38e0bc024b8738c047d4f44785366a044276 Mon Sep 17 00:00:00 2001 From: simon Date: Thu, 21 Apr 2022 05:45:55 +0700 Subject: [PATCH 07/10] API: fix downloads list sort --- tubearchivist/api/views.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tubearchivist/api/views.py b/tubearchivist/api/views.py index 0fe1d7f..a92cf60 100644 --- a/tubearchivist/api/views.py +++ b/tubearchivist/api/views.py @@ -364,9 +364,7 @@ class DownloadApiListView(ApiBaseView): def get(self, request): """get request""" query_filter = request.GET.get("filter", False) - self.data.update = { - "sort": [{"timestamp": {"order": "asc"}}], - } + self.data.update({"sort": [{"timestamp": {"order": "asc"}}]}) if query_filter: if query_filter not in self.valid_filter: message = f"invalid url query filder: {query_filter}" From 3f99f7edffa57d7219a052fae039b70c56358e76 Mon Sep 17 00:00:00 2001 From: simon Date: Sat, 23 Apr 2022 20:16:28 +0700 Subject: [PATCH 08/10] add localhost to allowed origin, optional cors disable --- tubearchivist/config/settings.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tubearchivist/config/settings.py b/tubearchivist/config/settings.py index 8e8bf5a..d24e718 100644 --- a/tubearchivist/config/settings.py +++ b/tubearchivist/config/settings.py @@ -146,7 +146,16 @@ LOGOUT_REDIRECT_URL = "/login/" # Cors needed for browser extension # background.js makes the request so HTTP_ORIGIN will be from extension -CORS_ALLOWED_ORIGIN_REGEXES = [r"moz-extension://*", r"chrome-extension://*"] +if environ.get("DISABLE_CORS"): + # disable cors + CORS_ORIGIN_ALLOW_ALL = True +else: + CORS_ALLOWED_ORIGIN_REGEXES = [ + r"moz-extension://*", + r"chrome-extension://*", + ] + CORS_ALLOWED_ORIGINS = ["http://localhost:8080"] + CORS_ALLOW_HEADERS = list(default_headers) + [ "mode", From 71b3654942227a0f6a6f000e9abbef7e077bf5b1 Mon Sep 17 00:00:00 2001 From: simon Date: Sat, 23 Apr 2022 20:17:31 +0700 Subject: [PATCH 09/10] add localhost:3000 to allowed --- tubearchivist/config/settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubearchivist/config/settings.py b/tubearchivist/config/settings.py index d24e718..9a44b06 100644 --- a/tubearchivist/config/settings.py +++ b/tubearchivist/config/settings.py @@ -154,7 +154,7 @@ else: r"moz-extension://*", r"chrome-extension://*", ] - CORS_ALLOWED_ORIGINS = ["http://localhost:8080"] + CORS_ALLOWED_ORIGINS = ["http://localhost:3000"] CORS_ALLOW_HEADERS = list(default_headers) + [ From eb7313fe6b3c64067117329d3f2a29a0de7fe0f3 Mon Sep 17 00:00:00 2001 From: simon Date: Sat, 23 Apr 2022 20:50:38 +0700 Subject: [PATCH 10/10] API: add run task view --- tubearchivist/api/README.md | 13 ++++++ tubearchivist/api/src/task_processor.py | 54 +++++++++++++++++++++++++ tubearchivist/api/urls.py | 6 +++ tubearchivist/api/views.py | 16 ++++++++ 4 files changed, 89 insertions(+) create mode 100644 tubearchivist/api/src/task_processor.py diff --git a/tubearchivist/api/README.md b/tubearchivist/api/README.md index e2bed71..dada701 100644 --- a/tubearchivist/api/README.md +++ b/tubearchivist/api/README.md @@ -200,3 +200,16 @@ When valid returns message with user id: "user": 1 } ``` + +## Task View +Start a background task +POST /api/task/ +```json +{ + "run": "task_name" +} +``` + +List of valid task names: +- **download_pending**: Start the download queue +- **rescan_pending**: Rescan your subscriptions diff --git a/tubearchivist/api/src/task_processor.py b/tubearchivist/api/src/task_processor.py new file mode 100644 index 0000000..f13b953 --- /dev/null +++ b/tubearchivist/api/src/task_processor.py @@ -0,0 +1,54 @@ +""" +Functionality: +- process tasks from API +- validate +- handover to celery +""" + +from home.src.ta.ta_redis import RedisArchivist +from home.tasks import download_pending, update_subscribed + + +class TaskHandler: + """handle tasks from api""" + + def __init__(self, data): + self.data = data + + def run_task(self): + """map data and run""" + task_name = self.data["run"] + try: + to_run = self.exec_map(task_name) + except KeyError as err: + print(f"invalid task name {task_name}") + raise ValueError from err + + response = to_run() + response.update({"task": task_name}) + return response + + def exec_map(self, task_name): + """map dict key and return function to execute""" + exec_map = { + "download_pending": self._download_pending, + "rescan_pending": self._rescan_pending, + } + + return exec_map[task_name] + + @staticmethod + def _rescan_pending(): + """look for new items in subscribed channels""" + print("rescan subscribed channels") + update_subscribed.delay() + return {"success": True} + + @staticmethod + def _download_pending(): + """start the download queue""" + print("download pending") + running = download_pending.delay() + print("set task id: " + running.id) + RedisArchivist().set_message("dl_queue_id", running.id, expire=False) + return {"success": True} diff --git a/tubearchivist/api/urls.py b/tubearchivist/api/urls.py index b19f5c7..e059f75 100644 --- a/tubearchivist/api/urls.py +++ b/tubearchivist/api/urls.py @@ -11,6 +11,7 @@ from api.views import ( PlaylistApiListView, PlaylistApiVideoView, PlaylistApiView, + TaskApiView, VideoApiListView, VideoApiView, VideoProgressView, @@ -81,4 +82,9 @@ urlpatterns = [ DownloadApiView.as_view(), name="api-download", ), + path( + "task/", + TaskApiView.as_view(), + name="api-task", + ), ] diff --git a/tubearchivist/api/views.py b/tubearchivist/api/views.py index a92cf60..88b8a0e 100644 --- a/tubearchivist/api/views.py +++ b/tubearchivist/api/views.py @@ -1,6 +1,7 @@ """all API views""" from api.src.search_processor import SearchProcess +from api.src.task_processor import TaskHandler from home.src.download.queue import PendingInteract from home.src.es.connect import ElasticWrap from home.src.index.generic import Pagination @@ -446,3 +447,18 @@ class LoginApiView(ObtainAuthToken): print(f"returning token for user with id {user.pk}") return Response({"token": token.key, "user_id": user.pk}) + + +class TaskApiView(ApiBaseView): + """resolves to /api/task/ + POST: start a new background task + """ + + def post(self, request): + """handle post request""" + + data = request.data + print(data) + response = TaskHandler(data).run_task() + + return Response(response)