api extension, #build

changed:
- add localhost to allowed hosts, flag to disable cors
- API: pagination
- API: sort and filter download list
- API: add task view
This commit is contained in:
simon 2022-04-23 20:53:51 +07:00
commit fa9d6df406
No known key found for this signature in database
GPG Key ID: 2C15AA5E89985DD4
8 changed files with 249 additions and 38 deletions

View File

@ -134,7 +134,9 @@ chown 1000:0 /path/to/mount/point
This will match the permissions with the **UID** and **GID** of elasticsearch within the container and should fix the issue. This will match the permissions with the **UID** and **GID** of elasticsearch within the container and should fix the issue.
### Disk usage ### Disk usage
The Elasticsearch index will turn to *read only* if the disk usage of the container goes above 95% until the usage drops below 90% again. Similar to that, TubeArchivist will become all sorts of messed up when running out of disk space. There are some error messages in the logs when that happens, but it's best to make sure to have enough disk space before starting to download. The Elasticsearch index will turn to *read only* if the disk usage of the container goes above 95% until the usage drops below 90% again, you will see error messages like `disk usage exceeded flood-stage watermark`, [link](https://github.com/tubearchivist/tubearchivist#disk-usage).
Similar to that, TubeArchivist will become all sorts of messed up when running out of disk space. There are some error messages in the logs when that happens, but it's best to make sure to have enough disk space before starting to download.
## Getting Started ## Getting Started
1. Go through the **settings** page and look at the available options. Particularly set *Download Format* to your desired video quality before downloading. **Tube Archivist** downloads the best available quality by default. To support iOS or MacOS and some other browsers a compatible format must be specified. For example: 1. Go through the **settings** page and look at the available options. Particularly set *Download Format* to your desired video quality before downloading. **Tube Archivist** downloads the best available quality by default. To support iOS or MacOS and some other browsers a compatible format must be specified. For example:

View File

@ -20,6 +20,19 @@ headers = {"Authorization": "Token xxxxxxxxxx"}
response = requests.get(url, headers=headers) response = requests.get(url, headers=headers)
``` ```
## Pagination
The list views return a paginate object with the following keys:
- page_size: int current page size set in config
- page_from: int first result idx
- prev_pages: array of ints of previous pages, if available
- current_page: int current page from query
- max_hits: reached: bool if max of 10k results is reached
- last_page: int of last page link
- next_pages: array of ints of next pages
- total_hits: int total results
Pass page number as a query parameter: `page=2`. Defaults to *0*, `page=1` is redundant and falls back to *0*. If a page query doesn't return any results, you'll get `HTTP 404 Not Found`.
## Login View ## Login View
Return token and user ID for username and password: Return token and user ID for username and password:
POST /api/login POST /api/login
@ -136,7 +149,10 @@ POST /api/channel/
/api/playlist/\<playlist_id>/video/ /api/playlist/\<playlist_id>/video/
## Download Queue List View ## Download Queue List View
/api/download/ GET /api/download/
Parameter:
- filter: pending, ignore
### Add list of videos to download queue ### Add list of videos to download queue
POST /api/download/ POST /api/download/
@ -148,9 +164,30 @@ POST /api/download/
} }
``` ```
## Download Queue Item View ### Delete download queue items by filter
/api/download/\<video_id>/ DELETE /api/download/?filter=ignore
DELETE /api/download/?filter=pending
## Download Queue Item View
GET /api/download/\<video_id>/
POST /api/download/\<video_id>/
Ignore video in download queue:
```json
{
"status": "ignore"
}
```
Add to queue previously ignored video:
```json
{
"status": "pending"
}
```
DELETE /api/download/\<video_id>/
Forget or delete from download queue
## Ping View ## Ping View
Validate your connection with the API Validate your connection with the API
@ -163,3 +200,16 @@ When valid returns message with user id:
"user": 1 "user": 1
} }
``` ```
## Task View
Start a background task
POST /api/task/
```json
{
"run": "task_name"
}
```
List of valid task names:
- **download_pending**: Start the download queue
- **rescan_pending**: Rescan your subscriptions

View File

@ -0,0 +1,54 @@
"""
Functionality:
- process tasks from API
- validate
- handover to celery
"""
from home.src.ta.ta_redis import RedisArchivist
from home.tasks import download_pending, update_subscribed
class TaskHandler:
"""handle tasks from api"""
def __init__(self, data):
self.data = data
def run_task(self):
"""map data and run"""
task_name = self.data["run"]
try:
to_run = self.exec_map(task_name)
except KeyError as err:
print(f"invalid task name {task_name}")
raise ValueError from err
response = to_run()
response.update({"task": task_name})
return response
def exec_map(self, task_name):
"""map dict key and return function to execute"""
exec_map = {
"download_pending": self._download_pending,
"rescan_pending": self._rescan_pending,
}
return exec_map[task_name]
@staticmethod
def _rescan_pending():
"""look for new items in subscribed channels"""
print("rescan subscribed channels")
update_subscribed.delay()
return {"success": True}
@staticmethod
def _download_pending():
"""start the download queue"""
print("download pending")
running = download_pending.delay()
print("set task id: " + running.id)
RedisArchivist().set_message("dl_queue_id", running.id, expire=False)
return {"success": True}

View File

@ -11,6 +11,7 @@ from api.views import (
PlaylistApiListView, PlaylistApiListView,
PlaylistApiVideoView, PlaylistApiVideoView,
PlaylistApiView, PlaylistApiView,
TaskApiView,
VideoApiListView, VideoApiListView,
VideoApiView, VideoApiView,
VideoProgressView, VideoProgressView,
@ -81,4 +82,9 @@ urlpatterns = [
DownloadApiView.as_view(), DownloadApiView.as_view(),
name="api-download", name="api-download",
), ),
path(
"task/",
TaskApiView.as_view(),
name="api-task",
),
] ]

View File

@ -1,11 +1,14 @@
"""all API views""" """all API views"""
from api.src.search_processor import SearchProcess from api.src.search_processor import SearchProcess
from api.src.task_processor import TaskHandler
from home.src.download.queue import PendingInteract
from home.src.es.connect import ElasticWrap from home.src.es.connect import ElasticWrap
from home.src.index.generic import Pagination
from home.src.index.video import SponsorBlock from home.src.index.video import SponsorBlock
from home.src.ta.config import AppConfig from home.src.ta.config import AppConfig
from home.src.ta.helper import UrlListParser from home.src.ta.helper import UrlListParser
from home.src.ta.ta_redis import RedisArchivist from home.src.ta.ta_redis import RedisArchivist, RedisQueue
from home.tasks import extrac_dl, subscribe_to from home.tasks import extrac_dl, subscribe_to
from rest_framework.authentication import ( from rest_framework.authentication import (
SessionAuthentication, SessionAuthentication,
@ -24,12 +27,15 @@ class ApiBaseView(APIView):
authentication_classes = [SessionAuthentication, TokenAuthentication] authentication_classes = [SessionAuthentication, TokenAuthentication]
permission_classes = [IsAuthenticated] permission_classes = [IsAuthenticated]
search_base = False search_base = False
data = False
def __init__(self): def __init__(self):
super().__init__() super().__init__()
self.response = {"data": False, "config": AppConfig().config} self.response = {"data": False, "config": AppConfig().config}
self.data = {"query": {"match_all": {}}}
self.status_code = False self.status_code = False
self.context = False self.context = False
self.pagination_handler = False
def get_document(self, document_id): def get_document(self, document_id):
"""get single document from es""" """get single document from es"""
@ -43,20 +49,33 @@ class ApiBaseView(APIView):
self.response["data"] = False self.response["data"] = False
self.status_code = status_code self.status_code = status_code
def get_paginate(self): def initiate_pagination(self, request):
"""add pagination detail to response""" """set initial pagination values"""
self.response["paginate"] = False user_id = request.user.id
page_get = int(request.GET.get("page", 0))
self.pagination_handler = Pagination(page_get, user_id)
self.data.update(
{
"size": self.pagination_handler.pagination["page_size"],
"from": self.pagination_handler.pagination["page_from"],
}
)
def get_document_list(self, data): def get_document_list(self, request):
"""get a list of results""" """get a list of results"""
print(self.search_base) print(self.search_base)
response, status_code = ElasticWrap(self.search_base).get(data=data) self.initiate_pagination(request)
es_handler = ElasticWrap(self.search_base)
response, status_code = es_handler.get(data=self.data)
self.response["data"] = SearchProcess(response).process() self.response["data"] = SearchProcess(response).process()
if self.response["data"]: if self.response["data"]:
self.status_code = status_code self.status_code = status_code
else: else:
self.status_code = 404 self.status_code = 404
self.pagination_handler.validate(response["hits"]["total"]["value"])
self.response["paginate"] = self.pagination_handler.pagination
class VideoApiView(ApiBaseView): class VideoApiView(ApiBaseView):
"""resolves to /api/video/<video_id>/ """resolves to /api/video/<video_id>/
@ -80,11 +99,9 @@ class VideoApiListView(ApiBaseView):
search_base = "ta_video/_search/" search_base = "ta_video/_search/"
def get(self, request): def get(self, request):
# pylint: disable=unused-argument
"""get request""" """get request"""
data = {"query": {"match_all": {}}} self.data.update({"sort": [{"published": {"order": "desc"}}]})
self.get_document_list(data) self.get_document_list(request)
self.get_paginate()
return Response(self.response) return Response(self.response)
@ -199,11 +216,11 @@ class ChannelApiListView(ApiBaseView):
search_base = "ta_channel/_search/" search_base = "ta_channel/_search/"
def get(self, request): def get(self, request):
# pylint: disable=unused-argument
"""get request""" """get request"""
data = {"query": {"match_all": {}}} self.get_document_list(request)
self.get_document_list(data) self.data.update(
self.get_paginate() {"sort": [{"channel_name.keyword": {"order": "asc"}}]}
)
return Response(self.response) return Response(self.response)
@ -233,13 +250,16 @@ class ChannelApiVideoView(ApiBaseView):
search_base = "ta_video/_search/" search_base = "ta_video/_search/"
def get(self, request, channel_id): def get(self, request, channel_id):
# pylint: disable=unused-argument
"""handle get request""" """handle get request"""
data = { self.data.update(
"query": {"term": {"channel.channel_id": {"value": channel_id}}} {
} "query": {
self.get_document_list(data) "term": {"channel.channel_id": {"value": channel_id}}
self.get_paginate() },
"sort": [{"published": {"order": "desc"}}],
}
)
self.get_document_list(request)
return Response(self.response, status=self.status_code) return Response(self.response, status=self.status_code)
@ -252,11 +272,11 @@ class PlaylistApiListView(ApiBaseView):
search_base = "ta_playlist/_search/" search_base = "ta_playlist/_search/"
def get(self, request): def get(self, request):
# pylint: disable=unused-argument
"""handle get request""" """handle get request"""
data = {"query": {"match_all": {}}} self.data.update(
self.get_document_list(data) {"sort": [{"playlist_name.keyword": {"order": "asc"}}]}
self.get_paginate() )
self.get_document_list(request)
return Response(self.response) return Response(self.response)
@ -282,22 +302,25 @@ class PlaylistApiVideoView(ApiBaseView):
search_base = "ta_video/_search/" search_base = "ta_video/_search/"
def get(self, request, playlist_id): def get(self, request, playlist_id):
# pylint: disable=unused-argument
"""handle get request""" """handle get request"""
data = { self.data["query"] = {
"query": {"term": {"playlist.keyword": {"value": playlist_id}}} "term": {"playlist.keyword": {"value": playlist_id}}
} }
self.get_document_list(data) self.data.update({"sort": [{"published": {"order": "desc"}}]})
self.get_paginate()
self.get_document_list(request)
return Response(self.response, status=self.status_code) return Response(self.response, status=self.status_code)
class DownloadApiView(ApiBaseView): class DownloadApiView(ApiBaseView):
"""resolves to /api/download/<video_id>/ """resolves to /api/download/<video_id>/
GET: returns metadata dict of an item in the download queue GET: returns metadata dict of an item in the download queue
POST: update status of item to pending or ignore
DELETE: forget from download queue
""" """
search_base = "ta_download/_doc/" search_base = "ta_download/_doc/"
valid_status = ["pending", "ignore"]
def get(self, request, video_id): def get(self, request, video_id):
# pylint: disable=unused-argument # pylint: disable=unused-argument
@ -305,21 +328,53 @@ class DownloadApiView(ApiBaseView):
self.get_document(video_id) self.get_document(video_id)
return Response(self.response, status=self.status_code) return Response(self.response, status=self.status_code)
def post(self, request, video_id):
"""post to video to change status"""
item_status = request.data["status"]
if item_status not in self.valid_status:
message = f"{video_id}: invalid status {item_status}"
print(message)
return Response({"message": message}, status=400)
print(f"{video_id}: change status to {item_status}")
PendingInteract(video_id=video_id, status=item_status).update_status()
RedisQueue().clear_item(video_id)
return Response(request.data)
@staticmethod
def delete(request, video_id):
# pylint: disable=unused-argument
"""delete single video from queue"""
print(f"{video_id}: delete from queue")
PendingInteract(video_id=video_id).delete_item()
return Response({"success": True})
class DownloadApiListView(ApiBaseView): class DownloadApiListView(ApiBaseView):
"""resolves to /api/download/ """resolves to /api/download/
GET: returns latest videos in the download queue GET: returns latest videos in the download queue
POST: add a list of videos to download queue POST: add a list of videos to download queue
DELETE: remove items based on query filter
""" """
search_base = "ta_download/_search/" search_base = "ta_download/_search/"
valid_filter = ["pending", "ignore"]
def get(self, request): def get(self, request):
# pylint: disable=unused-argument
"""get request""" """get request"""
data = {"query": {"match_all": {}}} query_filter = request.GET.get("filter", False)
self.get_document_list(data) self.data.update({"sort": [{"timestamp": {"order": "asc"}}]})
self.get_paginate() if query_filter:
if query_filter not in self.valid_filter:
message = f"invalid url query filder: {query_filter}"
print(message)
return Response({"message": message}, status=400)
self.data["query"] = {"term": {"status": {"value": query_filter}}}
self.get_document_list(request)
return Response(self.response) return Response(self.response)
@staticmethod @staticmethod
@ -347,6 +402,20 @@ class DownloadApiListView(ApiBaseView):
return Response(data) return Response(data)
def delete(self, request):
"""delete download queue"""
query_filter = request.GET.get("filter", False)
if query_filter not in self.valid_filter:
message = f"invalid url query filter: {query_filter}"
print(message)
return Response({"message": message}, status=400)
message = f"delete queue by status: {query_filter}"
print(message)
PendingInteract(status=query_filter).delete_by_status()
return Response({"message": message})
class PingView(ApiBaseView): class PingView(ApiBaseView):
"""resolves to /api/ping/ """resolves to /api/ping/
@ -378,3 +447,18 @@ class LoginApiView(ObtainAuthToken):
print(f"returning token for user with id {user.pk}") print(f"returning token for user with id {user.pk}")
return Response({"token": token.key, "user_id": user.pk}) return Response({"token": token.key, "user_id": user.pk})
class TaskApiView(ApiBaseView):
"""resolves to /api/task/
POST: start a new background task
"""
def post(self, request):
"""handle post request"""
data = request.data
print(data)
response = TaskHandler(data).run_task()
return Response(response)

View File

@ -146,7 +146,16 @@ LOGOUT_REDIRECT_URL = "/login/"
# Cors needed for browser extension # Cors needed for browser extension
# background.js makes the request so HTTP_ORIGIN will be from extension # background.js makes the request so HTTP_ORIGIN will be from extension
CORS_ALLOWED_ORIGIN_REGEXES = [r"moz-extension://*", r"chrome-extension://*"] if environ.get("DISABLE_CORS"):
# disable cors
CORS_ORIGIN_ALLOW_ALL = True
else:
CORS_ALLOWED_ORIGIN_REGEXES = [
r"moz-extension://*",
r"chrome-extension://*",
]
CORS_ALLOWED_ORIGINS = ["http://localhost:3000"]
CORS_ALLOW_HEADERS = list(default_headers) + [ CORS_ALLOW_HEADERS = list(default_headers) + [
"mode", "mode",

View File

@ -147,3 +147,4 @@ class Pagination:
] ]
self.pagination["next_pages"] = next_pages self.pagination["next_pages"] = next_pages
self.pagination["total_hits"] = total_hits

View File

@ -195,6 +195,11 @@ class SubtitleParser:
if flatten: if flatten:
# fix overlapping retiming issue # fix overlapping retiming issue
if "dDurationMs" not in flatten[-1]:
# some events won't have a duration
print(f"failed to parse event without duration: {event}")
continue
last_end = flatten[-1]["tStartMs"] + flatten[-1]["dDurationMs"] last_end = flatten[-1]["tStartMs"] + flatten[-1]["dDurationMs"]
if event["tStartMs"] < last_end: if event["tStartMs"] < last_end:
joined = flatten[-1]["segs"][0]["utf8"] + "\n" + text joined = flatten[-1]["segs"][0]["utf8"] + "\n" + text