New task backend, #build

Changed:
- major refactor of how tasks are handled in the backend
- major refactor on how notifications are returned to UI
- [API] remov old task endpoints
- [API] add endpoints for task-name and task-id
- [API] add token delete endpoint
This commit is contained in:
simon 2023-03-31 22:46:53 +07:00
commit 20a39895f9
36 changed files with 1909 additions and 1552 deletions

View File

@ -11,7 +11,7 @@
![home screenshot](assets/tube-archivist-screenshot-home.png?raw=true "Tube Archivist Home")
## Table of contents:
* [Wiki](https://github.com/tubearchivist/tubearchivist/wiki) with [FAQ](https://github.com/tubearchivist/tubearchivist/wiki/FAQ)
* Docs: [Wiki](https://github.com/tubearchivist/tubearchivist/wiki), [FAQ](https://github.com/tubearchivist/tubearchivist/wiki/FAQ) and [API](https://github.com/tubearchivist/tubearchivist/tree/master/tubearchivist/api)
* [Core functionality](#core-functionality)
* [Resources](#resources)
* [Installing and updating](#installing-and-updating)

View File

@ -87,7 +87,7 @@ function validate {
echo "running black"
black --exclude "migrations/*" --diff --color --check -l 79 "$check_path"
echo "running codespell"
codespell --skip="./.git,./package.json,./package-lock.json,./node_modules" "$check_path"
codespell --skip="./.git,./package.json,./package-lock.json,./node_modules,./.mypy_cache" "$check_path"
echo "running flake8"
flake8 "$check_path" --exclude "migrations" --count --max-complexity=10 \
--max-line-length=79 --show-source --statistics

View File

@ -2,8 +2,9 @@
Documentation of available API endpoints.
Note:
- This is very early stages and will change!
- Check the commit history to see if a documented feature is already in your release
- This has changed in the past and will change again while building out additional integrations and functionality.
- All changes to the API are marked with a `[API]` keyword for easy searching, for example search for [commits](https://github.com/tubearchivist/tubearchivist/search?o=desc&q=%5Bapi%5D&s=committer-date&type=commits). You'll find the same in the [release notes](https://github.com/tubearchivist/tubearchivist/releases).
- Check the commit history and release notes to see if a documented feature is already in your release.
## Table of contents
- [Authentication](#authentication)
@ -35,9 +36,13 @@ Note:
- [Snapshot List](#snapshot-list-view)
- [Snapshot Single](#snapshot-item-view)
**Task management**
- [Task Name List](#task-name-list-view)
- [Task Name Single](#task-name-item-view)
- [Task ID](#task-id-view)
**Additional**
- [Login](#login-view)
- [Task](#task-view) WIP
- [Refresh](#refresh-view)
- [Cookie](#cookie-view)
- [Search](#search-view)
@ -251,7 +256,7 @@ POST /api/snapshot/
Create new snapshot now, will return immediately, task will run async in the background, will return snapshot name:
```json
{
"snapshot_name": "ta_daily_<random-id>
"snapshot_name": "ta_daily_<random-id>"
}
```
@ -260,7 +265,7 @@ GET /api/snapshot/\<snapshot-id>/
Return metadata of a single snapshot
```json
{
"id": "ta_daily_<random-id>,
"id": "ta_daily_<random-id>",
"state": "SUCCESS",
"es_version": "0.0.0",
"start_date": "date_str",
@ -276,6 +281,29 @@ Restore this snapshot
DELETE /api/snapshot/\<snapshot-id>/
Remove this snapshot from index
## Task Name List View
GET /api/task-name/
Return all task results
## Task Name Item View
GET /api/task-name/\<task-name>/
Return all ask results by task name
POST /api/task-name/\<task-name>/
Start a new task by task name, only tasks without arguments can be started like that, see `home.tasks.BaseTask.TASK_CONFIG` for more info.
## Task ID view
GET /api/task-id/\<task-id>/
Return task status by task ID
POST /api/task-id/\<task-id>/
```json
{
"command": "stop|kill"
}
```
Send command to a task, valid commands: `stop` and `kill`.
## Login View
Return token and user ID for username and password:
POST /api/login/
@ -294,33 +322,6 @@ after successful login returns
}
```
## Task View
GET /api/task/
POST /api/task/
Check if there is an ongoing task:
GET /api/task/
Returns:
```json
{
"rescan": false,
"downloading": false
}
```
Start a background task
POST /api/task/
```json
{
"run": "task_name"
}
```
List of valid task names:
- **download_pending**: Start the download queue
- **rescan_pending**: Rescan your subscriptions
## Refresh View
GET /api/refresh/
parameters:

View File

@ -1,54 +0,0 @@
"""
Functionality:
- process tasks from API
- validate
- handover to celery
"""
from home.src.ta.ta_redis import RedisArchivist
from home.tasks import download_pending, update_subscribed
class TaskHandler:
"""handle tasks from api"""
def __init__(self, data):
self.data = data
def run_task(self):
"""map data and run"""
task_name = self.data["run"]
try:
to_run = self.exec_map(task_name)
except KeyError as err:
print(f"invalid task name {task_name}")
raise ValueError from err
response = to_run()
response.update({"task": task_name})
return response
def exec_map(self, task_name):
"""map dict key and return function to execute"""
exec_map = {
"download_pending": self._download_pending,
"rescan_pending": self._rescan_pending,
}
return exec_map[task_name]
@staticmethod
def _rescan_pending():
"""look for new items in subscribed channels"""
print("rescan subscribed channels")
update_subscribed.delay()
return {"success": True}
@staticmethod
def _download_pending():
"""start the download queue"""
print("download pending")
running = download_pending.delay()
print("set task id: " + running.id)
RedisArchivist().set_message("dl_queue_id", running.id)
return {"success": True}

View File

@ -1,138 +1,134 @@
"""all api urls"""
from api.views import (
ChannelApiListView,
ChannelApiVideoView,
ChannelApiView,
CookieView,
DownloadApiListView,
DownloadApiView,
LoginApiView,
PingView,
PlaylistApiListView,
PlaylistApiVideoView,
PlaylistApiView,
RefreshView,
SearchView,
SnapshotApiListView,
SnapshotApiView,
TaskApiView,
VideoApiListView,
VideoApiView,
VideoCommentView,
VideoProgressView,
VideoSimilarView,
VideoSponsorView,
WatchedView,
)
from api import views
from django.urls import path
urlpatterns = [
path("ping/", PingView.as_view(), name="ping"),
path("login/", LoginApiView.as_view(), name="api-login"),
path("ping/", views.PingView.as_view(), name="ping"),
path("login/", views.LoginApiView.as_view(), name="api-login"),
path(
"video/",
VideoApiListView.as_view(),
views.VideoApiListView.as_view(),
name="api-video-list",
),
path(
"video/<slug:video_id>/",
VideoApiView.as_view(),
views.VideoApiView.as_view(),
name="api-video",
),
path(
"video/<slug:video_id>/progress/",
VideoProgressView.as_view(),
views.VideoProgressView.as_view(),
name="api-video-progress",
),
path(
"video/<slug:video_id>/comment/",
VideoCommentView.as_view(),
views.VideoCommentView.as_view(),
name="api-video-comment",
),
path(
"video/<slug:video_id>/similar/",
VideoSimilarView.as_view(),
views.VideoSimilarView.as_view(),
name="api-video-similar",
),
path(
"video/<slug:video_id>/sponsor/",
VideoSponsorView.as_view(),
views.VideoSponsorView.as_view(),
name="api-video-sponsor",
),
path(
"channel/",
ChannelApiListView.as_view(),
views.ChannelApiListView.as_view(),
name="api-channel-list",
),
path(
"channel/<slug:channel_id>/",
ChannelApiView.as_view(),
views.ChannelApiView.as_view(),
name="api-channel",
),
path(
"channel/<slug:channel_id>/video/",
ChannelApiVideoView.as_view(),
views.ChannelApiVideoView.as_view(),
name="api-channel-video",
),
path(
"playlist/",
PlaylistApiListView.as_view(),
views.PlaylistApiListView.as_view(),
name="api-playlist-list",
),
path(
"playlist/<slug:playlist_id>/",
PlaylistApiView.as_view(),
views.PlaylistApiView.as_view(),
name="api-playlist",
),
path(
"playlist/<slug:playlist_id>/video/",
PlaylistApiVideoView.as_view(),
views.PlaylistApiVideoView.as_view(),
name="api-playlist-video",
),
path(
"download/",
DownloadApiListView.as_view(),
views.DownloadApiListView.as_view(),
name="api-download-list",
),
path(
"download/<slug:video_id>/",
DownloadApiView.as_view(),
views.DownloadApiView.as_view(),
name="api-download",
),
path(
"refresh/",
RefreshView.as_view(),
views.RefreshView.as_view(),
name="api-refresh",
),
path(
"task/",
TaskApiView.as_view(),
name="api-task",
),
path(
"snapshot/",
SnapshotApiListView.as_view(),
views.SnapshotApiListView.as_view(),
name="api-snapshot-list",
),
path(
"snapshot/<slug:snapshot_id>/",
SnapshotApiView.as_view(),
views.SnapshotApiView.as_view(),
name="api-snapshot",
),
path(
"task-name/",
views.TaskListView.as_view(),
name="api-task-list",
),
path(
"task-name/<slug:task_name>/",
views.TaskNameListView.as_view(),
name="api-task-name-list",
),
path(
"task-id/<slug:task_id>/",
views.TaskIDView.as_view(),
name="api-task-id",
),
path(
"cookie/",
CookieView.as_view(),
views.CookieView.as_view(),
name="api-cookie",
),
path(
"watched/",
WatchedView.as_view(),
views.WatchedView.as_view(),
name="api-watched",
),
path(
"search/",
SearchView.as_view(),
views.SearchView.as_view(),
name="api-search",
),
path(
"token/",
views.TokenView.as_view(),
name="api-token",
),
path(
"notification/",
views.NotificationView.as_view(),
name="api-notification",
),
]

View File

@ -1,7 +1,6 @@
"""all API views"""
from api.src.search_processor import SearchProcess
from api.src.task_processor import TaskHandler
from home.src.download.queue import PendingInteract
from home.src.download.yt_dlp_base import CookieHandler
from home.src.es.connect import ElasticWrap
@ -14,8 +13,15 @@ from home.src.index.reindex import ReindexProgress
from home.src.index.video import SponsorBlock, YoutubeVideo
from home.src.ta.config import AppConfig
from home.src.ta.ta_redis import RedisArchivist, RedisQueue
from home.src.ta.task_manager import TaskCommand, TaskManager
from home.src.ta.urlparser import Parser
from home.tasks import check_reindex, download_single, extrac_dl, subscribe_to
from home.tasks import (
BaseTask,
check_reindex,
download_pending,
extrac_dl,
subscribe_to,
)
from rest_framework.authentication import (
SessionAuthentication,
TokenAuthentication,
@ -424,14 +430,15 @@ class DownloadApiView(ApiBaseView):
print(message)
return Response({"message": message}, status=400)
pending_video, status_code = PendingInteract(video_id).get_item()
_, status_code = PendingInteract(video_id).get_item()
if status_code == 404:
message = f"{video_id}: item not found {status_code}"
return Response({"message": message}, status=404)
print(f"{video_id}: change status to {item_status}")
if item_status == "priority":
download_single.delay(pending_video)
PendingInteract(youtube_id=video_id).prioritize()
download_pending.delay(from_queue=False)
else:
PendingInteract(video_id, item_status).update_status()
RedisQueue(queue_name="dl_queue").clear_item(video_id)
@ -554,29 +561,6 @@ class LoginApiView(ObtainAuthToken):
return Response({"token": token.key, "user_id": user.pk})
class TaskApiView(ApiBaseView):
"""resolves to /api/task/
GET: check if ongoing background task
POST: start a new background task
"""
@staticmethod
def get(request):
"""handle get request"""
# pylint: disable=unused-argument
response = {"rescan": False, "downloading": False}
for key in response.keys():
response[key] = RedisArchivist().is_locked(key)
return Response(response)
def post(self, request):
"""handle post request"""
response = TaskHandler(request.data).run_task()
return Response(response)
class SnapshotApiListView(ApiBaseView):
"""resolves to /api/snapshot/
GET: returns snashot config plus list of existing snapshots
@ -641,6 +625,108 @@ class SnapshotApiView(ApiBaseView):
return Response(response)
class TaskListView(ApiBaseView):
"""resolves to /api/task-name/
GET: return a list of all stored task results
"""
def get(self, request):
"""handle get request"""
# pylint: disable=unused-argument
all_results = TaskManager().get_all_results()
return Response(all_results)
class TaskNameListView(ApiBaseView):
"""resolves to /api/task-name/<task-name>/
GET: return a list of stored results of task
POST: start new background process
"""
def get(self, request, task_name):
"""handle get request"""
# pylint: disable=unused-argument
if task_name not in BaseTask.TASK_CONFIG:
message = {"message": "invalid task name"}
return Response(message, status=404)
all_results = TaskManager().get_tasks_by_name(task_name)
return Response(all_results)
def post(self, request, task_name):
"""
handle post request
404 for invalid task_name
400 if task can't be started here without argument
"""
# pylint: disable=unused-argument
task_config = BaseTask.TASK_CONFIG.get(task_name)
if not task_config:
message = {"message": "invalid task name"}
return Response(message, status=404)
if not task_config.get("api-start"):
message = {"message": "can not start task through this endpoint"}
return Response(message, status=400)
message = TaskCommand().start(task_name)
return Response({"message": message})
class TaskIDView(ApiBaseView):
"""resolves to /api/task-id/<task-id>/
GET: return details of task id
"""
valid_commands = ["stop", "kill"]
def get(self, request, task_id):
"""handle get request"""
# pylint: disable=unused-argument
task_result = TaskManager().get_task(task_id)
if not task_result:
message = {"message": "task id not found"}
return Response(message, status=404)
return Response(task_result)
def post(self, request, task_id):
"""post command to task"""
command = request.data.get("command")
if not command or command not in self.valid_commands:
message = {"message": "no valid command found"}
return Response(message, status=400)
task_result = TaskManager().get_task(task_id)
if not task_result:
message = {"message": "task id not found"}
return Response(message, status=404)
task_conf = BaseTask.TASK_CONFIG.get(task_result.get("name"))
if command == "stop":
if not task_conf.get("api-stop"):
message = {"message": "task can not be stopped"}
return Response(message, status=400)
message_key = self._build_message_key(task_conf, task_id)
TaskCommand().stop(task_id, message_key)
if command == "kill":
if not task_conf.get("api-stop"):
message = {"message": "task can not be killed"}
return Response(message, status=400)
TaskCommand().kill(task_id)
return Response({"message": "command sent"})
def _build_message_key(self, task_conf, task_id):
"""build message key to forward command to notification"""
return f"message:{task_conf.get('group')}:{task_id.split('-')[0]}"
class RefreshView(ApiBaseView):
"""resolves to /api/refresh/
GET: get refresh progress
@ -760,3 +846,33 @@ class SearchView(ApiBaseView):
search_results = SearchForm().multi_search(search_query)
return Response(search_results)
class TokenView(ApiBaseView):
"""resolves to /api/token/
DELETE: revoke the token
"""
@staticmethod
def delete(request):
print("revoke API token")
request.user.auth_token.delete()
return Response({"success": True})
class NotificationView(ApiBaseView):
"""resolves to /api/notification/
GET: returns a list of notifications
filter query to filter messages by group
"""
valid_filters = ["download", "settings", "channel"]
def get(self, request):
"""get all notifications"""
query = "message"
filter_by = request.GET.get("filter", None)
if filter_by in self.valid_filters:
query = f"{query}:{filter_by}"
return Response(RedisArchivist().list_items(query))

View File

@ -43,7 +43,6 @@ class Command(BaseCommand):
self.stdout.write("[1] connect to Redis")
redis_conn = RedisArchivist().conn
for _ in range(5):
sleep(2)
try:
pong = redis_conn.execute_command("PING")
if pong:
@ -54,10 +53,12 @@ class Command(BaseCommand):
except Exception: # pylint: disable=broad-except
self.stdout.write(" ... retry Redis connection")
sleep(2)
message = " 🗙 Redis connection failed"
self.stdout.write(self.style.ERROR(f"{message}"))
RedisArchivist().exec("PING")
sleep(60)
raise CommandError(message)
def _redis_config_set(self):
@ -75,13 +76,13 @@ class Command(BaseCommand):
self.stdout.write("[3] connect to Elastic Search")
total = self.TIMEOUT // 5
for i in range(total):
sleep(5)
self.stdout.write(f" ... waiting for ES [{i}/{total}]")
try:
_, status_code = ElasticWrap("/").get(
timeout=1, print_error=False
)
except requests.exceptions.ConnectionError:
sleep(5)
continue
if status_code and status_code == 200:
@ -98,6 +99,7 @@ class Command(BaseCommand):
self.stdout.write(self.style.ERROR(f"{message}"))
self.stdout.write(f" error message: {response}")
self.stdout.write(f" status code: {status_code}")
sleep(60)
raise CommandError(message)
def _es_version_check(self):
@ -118,6 +120,7 @@ class Command(BaseCommand):
+ f"Expected {self.MIN_MAJOR}.{self.MIN_MINOR} but got {version}"
)
self.stdout.write(self.style.ERROR(f"{message}"))
sleep(60)
raise CommandError(message)
def _es_path_check(self):
@ -137,4 +140,5 @@ class Command(BaseCommand):
+ " path.repo=/usr/share/elasticsearch/data/snapshot"
)
self.stdout.write(self.style.ERROR(f"{message}"))
sleep(60)
raise CommandError(message)

View File

@ -14,6 +14,7 @@ from home.src.es.snapshot import ElasticSnapshot
from home.src.ta.config import AppConfig, ReleaseVersion
from home.src.ta.helper import clear_dl_cache
from home.src.ta.ta_redis import RedisArchivist
from home.src.ta.task_manager import TaskManager
TOPIC = """
@ -35,6 +36,7 @@ class Command(BaseCommand):
self._sync_redis_state()
self._make_folders()
self._release_locks()
self._clear_tasks()
self._clear_dl_cache()
self._version_check()
self._mig_index_setup()
@ -96,9 +98,23 @@ class Command(BaseCommand):
if not has_changed:
self.stdout.write(self.style.SUCCESS(" no locks found"))
def _clear_tasks(self):
"""clear tasks and messages"""
self.stdout.write("[4] clear task leftovers")
TaskManager().fail_pending()
redis_con = RedisArchivist()
to_delete = redis_con.list_keys("message:")
if to_delete:
for key in to_delete:
redis_con.del_message(key)
self.stdout.write(
self.style.SUCCESS(f" ✓ cleared {len(to_delete)} messages")
)
def _clear_dl_cache(self):
"""clear leftover files from dl cache"""
self.stdout.write("[4] clear leftover files from dl cache")
self.stdout.write("[5] clear leftover files from dl cache")
config = AppConfig().config
leftover_files = clear_dl_cache(config)
if leftover_files:
@ -110,7 +126,7 @@ class Command(BaseCommand):
def _version_check(self):
"""remove new release key if updated now"""
self.stdout.write("[5] check for first run after update")
self.stdout.write("[6] check for first run after update")
new_version = ReleaseVersion().is_updated()
if new_version:
self.stdout.write(
@ -175,4 +191,5 @@ class Command(BaseCommand):
message = f" 🗙 {index_name} vid_type update failed"
self.stdout.write(self.style.ERROR(message))
self.stdout.write(response)
sleep(60)
raise CommandError(message)

View File

@ -265,4 +265,4 @@ CORS_ALLOW_HEADERS = list(default_headers) + [
# TA application settings
TA_UPSTREAM = "https://github.com/tubearchivist/tubearchivist"
TA_VERSION = "v0.3.4"
TA_VERSION = "v0.3.5-unstable"

View File

@ -18,7 +18,7 @@ from home.src.index.playlist import YoutubePlaylist
from home.src.index.video_constants import VideoTypeEnum
from home.src.ta.config import AppConfig
from home.src.ta.helper import DurationConverter, is_shorts
from home.src.ta.ta_redis import RedisArchivist
from home.src.ta.ta_redis import RedisQueue
class PendingIndex:
@ -117,6 +117,16 @@ class PendingInteract:
path = f"ta_download/_update/{self.youtube_id}"
_, _ = ElasticWrap(path).post(data=data)
def prioritize(self):
"""prioritize pending item in redis queue"""
pending_video, _ = self.get_item()
vid_type = pending_video.get("vid_type", VideoTypeEnum.VIDEOS.value)
to_add = {
"youtube_id": pending_video["youtube_id"],
"vid_type": vid_type,
}
RedisQueue(queue_name="dl_queue").add_priority(to_add)
def get_item(self):
"""return pending item dict"""
path = f"ta_download/_doc/{self.youtube_id}"
@ -151,12 +161,14 @@ class PendingList(PendingIndex):
"noplaylist": True,
"writethumbnail": True,
"simulate": True,
"check_formats": None,
}
def __init__(self, youtube_ids=False):
def __init__(self, youtube_ids=False, task=False):
super().__init__()
self.config = AppConfig().config
self.youtube_ids = youtube_ids
self.task = task
self.to_skip = False
self.missing_videos = False
@ -165,16 +177,16 @@ class PendingList(PendingIndex):
self.missing_videos = []
self.get_download()
self.get_indexed()
for entry in self.youtube_ids:
# notify
mess_dict = {
"status": "message:add",
"level": "info",
"title": "Adding to download queue.",
"message": "Extracting lists",
}
RedisArchivist().set_message("message:add", mess_dict, expire=True)
total = len(self.youtube_ids)
for idx, entry in enumerate(self.youtube_ids):
self._process_entry(entry)
if not self.task:
continue
self.task.send_progress(
message_lines=[f"Extracting items {idx + 1}/{total}"],
progress=(idx + 1) / total,
)
def _process_entry(self, entry):
"""process single entry from url list"""
@ -228,9 +240,13 @@ class PendingList(PendingIndex):
self.get_channels()
bulk_list = []
total = len(self.missing_videos)
for idx, (youtube_id, vid_type) in enumerate(self.missing_videos):
print(f"{youtube_id} ({vid_type}): add to download queue")
self._notify_add(idx)
if self.task and self.task.is_stopped():
break
print(f"{youtube_id}: [{idx + 1}/{total}]: add to queue")
self._notify_add(idx, total)
video_details = self.get_youtube_details(youtube_id, vid_type)
if not video_details:
continue
@ -243,29 +259,34 @@ class PendingList(PendingIndex):
url = video_details["vid_thumb_url"]
ThumbManager(youtube_id).download_video_thumb(url)
if bulk_list:
# add last newline
bulk_list.append("\n")
query_str = "\n".join(bulk_list)
_, _ = ElasticWrap("_bulk").post(query_str, ndjson=True)
if len(bulk_list) >= 20:
self._ingest_bulk(bulk_list)
bulk_list = []
def _notify_add(self, idx):
self._ingest_bulk(bulk_list)
def _ingest_bulk(self, bulk_list):
"""add items to queue in bulk"""
if not bulk_list:
return
# add last newline
bulk_list.append("\n")
query_str = "\n".join(bulk_list)
_, _ = ElasticWrap("_bulk").post(query_str, ndjson=True)
def _notify_add(self, idx, total):
"""send notification for adding videos to download queue"""
progress = f"{idx + 1}/{len(self.missing_videos)}"
mess_dict = {
"status": "message:add",
"level": "info",
"title": "Adding new videos to download queue.",
"message": "Progress: " + progress,
}
if idx + 1 == len(self.missing_videos):
expire = 4
else:
expire = True
if not self.task:
return
RedisArchivist().set_message("message:add", mess_dict, expire=expire)
if idx + 1 % 25 == 0:
print("adding to queue progress: " + progress)
self.task.send_progress(
message_lines=[
"Adding new videos to download queue.",
f"Extracting items {idx + 1}/{total}",
],
progress=(idx + 1) / total,
)
def get_youtube_details(self, youtube_id, vid_type=VideoTypeEnum.VIDEOS):
"""get details from youtubedl for single pending video"""

View File

@ -13,13 +13,15 @@ from home.src.index.playlist import YoutubePlaylist
from home.src.index.video_constants import VideoTypeEnum
from home.src.ta.config import AppConfig
from home.src.ta.ta_redis import RedisArchivist
from home.src.ta.urlparser import Parser
class ChannelSubscription:
"""manage the list of channels subscribed"""
def __init__(self):
def __init__(self, task=False):
self.config = AppConfig().config
self.task = task
@staticmethod
def get_channels(subscribed_only=True):
@ -44,7 +46,7 @@ class ChannelSubscription:
last_videos = []
for vid_type, limit_amount in queries:
for vid_type_enum, limit_amount in queries:
obs = {
"skip_download": True,
"extract_flat": True,
@ -52,9 +54,9 @@ class ChannelSubscription:
if limit:
obs["playlistend"] = limit_amount
path = vid_type.value
vid_type = vid_type_enum.value
channel = YtWrap(obs, self.config).extract(
f"https://www.youtube.com/channel/{channel_id}/{path}"
f"https://www.youtube.com/channel/{channel_id}/{vid_type}"
)
if not channel:
continue
@ -101,12 +103,16 @@ class ChannelSubscription:
def find_missing(self):
"""add missing videos from subscribed channels to pending"""
all_channels = self.get_channels()
if not all_channels:
return False
pending = queue.PendingList()
pending.get_download()
pending.get_indexed()
missing_videos = []
total = len(all_channels)
for idx, channel in enumerate(all_channels):
channel_id = channel["channel_id"]
print(f"{channel_id}: find missing videos.")
@ -116,21 +122,19 @@ class ChannelSubscription:
for video_id, _, vid_type in last_videos:
if video_id not in pending.to_skip:
missing_videos.append((video_id, vid_type))
# notify
message = {
"status": "message:rescan",
"level": "info",
"title": "Scanning channels: Looking for new videos.",
"message": f"Progress: {idx + 1}/{len(all_channels)}",
}
if idx + 1 == len(all_channels):
expire = 4
else:
expire = True
RedisArchivist().set_message(
"message:rescan", message=message, expire=expire
)
if not self.task:
continue
if self.task:
if self.task.is_stopped():
self.task.send_progress(["Received Stop signal."])
break
self.task.send_progress(
message_lines=[f"Scanning Channel {idx + 1}/{total}"],
progress=(idx + 1) / total,
)
return missing_videos
@ -147,8 +151,9 @@ class ChannelSubscription:
class PlaylistSubscription:
"""manage the playlist download functionality"""
def __init__(self):
def __init__(self, task=False):
self.config = AppConfig().config
self.task = task
@staticmethod
def get_playlists(subscribed_only=True):
@ -232,14 +237,18 @@ class PlaylistSubscription:
def find_missing(self):
"""find videos in subscribed playlists not downloaded yet"""
all_playlists = [i["playlist_id"] for i in self.get_playlists()]
if not all_playlists:
return False
to_ignore = self.get_to_ignore()
missing_videos = []
total = len(all_playlists)
for idx, playlist_id in enumerate(all_playlists):
size_limit = self.config["subscriptions"]["channel_size"]
playlist = YoutubePlaylist(playlist_id)
playlist.update_playlist()
if not playlist:
is_active = playlist.update_playlist()
if not is_active:
playlist.deactivate()
continue
@ -249,19 +258,123 @@ class PlaylistSubscription:
all_missing = [i for i in playlist_entries if not i["downloaded"]]
message = {
"status": "message:rescan",
"level": "info",
"title": "Scanning playlists: Looking for new videos.",
"message": f"Progress: {idx + 1}/{len(all_playlists)}",
}
RedisArchivist().set_message(
"message:rescan", message=message, expire=True
)
for video in all_missing:
youtube_id = video["youtube_id"]
if youtube_id not in to_ignore:
missing_videos.append(youtube_id)
if not self.task:
continue
if self.task:
self.task.send_progress(
message_lines=[f"Scanning Playlists {idx + 1}/{total}"],
progress=(idx + 1) / total,
)
if self.task.is_stopped():
self.task.send_progress(["Received Stop signal."])
break
return missing_videos
class SubscriptionScanner:
"""add missing videos to queue"""
def __init__(self, task=False):
self.task = task
self.missing_videos = False
def scan(self):
"""scan channels and playlists"""
if self.task:
self.task.send_progress(["Rescanning channels and playlists."])
self.missing_videos = []
self.scan_channels()
if self.task and not self.task.is_stopped():
self.scan_playlists()
return self.missing_videos
def scan_channels(self):
"""get missing from channels"""
channel_handler = ChannelSubscription(task=self.task)
missing = channel_handler.find_missing()
if not missing:
return
for vid_id, vid_type in missing:
self.missing_videos.append(
{"type": "video", "vid_type": vid_type, "url": vid_id}
)
def scan_playlists(self):
"""get missing from playlists"""
playlist_handler = PlaylistSubscription(task=self.task)
missing = playlist_handler.find_missing()
if not missing:
return
for i in missing:
self.missing_videos.append(
{
"type": "video",
"vid_type": VideoTypeEnum.VIDEOS.value,
"url": i,
}
)
class SubscriptionHandler:
"""subscribe to channels and playlists from url_str"""
def __init__(self, url_str, task=False):
self.url_str = url_str
self.task = task
self.to_subscribe = False
def subscribe(self):
"""subscribe to url_str items"""
if self.task:
self.task.send_progress(["Processing form content."])
self.to_subscribe = Parser(self.url_str).parse()
total = len(self.to_subscribe)
for idx, item in enumerate(self.to_subscribe):
if self.task:
self._notify(idx, item, total)
self.subscribe_type(item)
def subscribe_type(self, item):
"""process single item"""
if item["type"] == "playlist":
PlaylistSubscription().process_url_str([item])
return
if item["type"] == "video":
# extract channel id from video
vid = queue.PendingList().get_youtube_details(item["url"])
channel_id = vid["channel_id"]
elif item["type"] == "channel":
channel_id = item["url"]
else:
raise ValueError("failed to subscribe to: " + item["url"])
self._subscribe(channel_id)
def _subscribe(self, channel_id):
"""subscribe to channel"""
ChannelSubscription().change_subscribe(
channel_id, channel_subscribed=True
)
def _notify(self, idx, item, total):
"""send notification message to redis"""
subscribe_type = item["type"].title()
message_lines = [
f"Subscribe to {subscribe_type}",
f"Progress: {idx + 1}/{total}",
]
self.task.send_progress(message_lines, progress=(idx + 1) / total)

View File

@ -10,8 +10,7 @@ from io import BytesIO
from time import sleep
import requests
from home.src.download import queue # partial import
from home.src.es.connect import IndexPaginate
from home.src.es.connect import ElasticWrap, IndexPaginate
from home.src.ta.config import AppConfig
from mutagen.mp4 import MP4, MP4Cover
from PIL import Image, ImageFile, ImageFilter, UnidentifiedImageError
@ -272,102 +271,121 @@ class ValidatorCallback:
class ThumbValidator:
"""validate thumbnails"""
def download_missing(self):
"""download all missing artwork"""
self.download_missing_videos()
self.download_missing_channels()
self.download_missing_playlists()
def download_missing_videos(self):
"""get all missing video thumbnails"""
data = {
"query": {"term": {"active": {"value": True}}},
"sort": [{"youtube_id": {"order": "asc"}}],
"_source": ["vid_thumb_url", "youtube_id"],
}
paginate = IndexPaginate(
"ta_video", data, size=5000, callback=ValidatorCallback
)
_ = paginate.get_results()
def download_missing_channels(self):
"""get all missing channel thumbnails"""
data = {
"query": {"term": {"channel_active": {"value": True}}},
"sort": [{"channel_id": {"order": "asc"}}],
"_source": {
"excludes": ["channel_description", "channel_overwrites"]
INDEX = [
{
"data": {
"query": {"term": {"active": {"value": True}}},
"_source": ["vid_thumb_url", "youtube_id"],
},
}
paginate = IndexPaginate(
"ta_channel", data, callback=ValidatorCallback
)
_ = paginate.get_results()
"name": "ta_video",
},
{
"data": {
"query": {"term": {"channel_active": {"value": True}}},
"_source": {
"excludes": ["channel_description", "channel_overwrites"]
},
},
"name": "ta_channel",
},
{
"data": {
"query": {"term": {"playlist_active": {"value": True}}},
"_source": ["playlist_id", "playlist_thumbnail"],
},
"name": "ta_playlist",
},
]
def download_missing_playlists(self):
"""get all missing playlist artwork"""
data = {
"query": {"term": {"playlist_active": {"value": True}}},
"sort": [{"playlist_id": {"order": "asc"}}],
"_source": ["playlist_id", "playlist_thumbnail"],
}
paginate = IndexPaginate(
"ta_playlist", data, callback=ValidatorCallback
)
_ = paginate.get_results()
def __init__(self, task):
self.task = task
def validate(self):
"""validate all indexes"""
for index in self.INDEX:
total = self._get_total(index["name"])
if not total:
continue
paginate = IndexPaginate(
index_name=index["name"],
data=index["data"],
size=1000,
callback=ValidatorCallback,
task=self.task,
total=total,
)
_ = paginate.get_results()
@staticmethod
def _get_total(index_name):
"""get total documents in index"""
path = f"{index_name}/_count"
response, _ = ElasticWrap(path).get()
return response.get("count")
class ThumbFilesystem:
"""filesystem tasks for thumbnails"""
"""sync thumbnail files to media files"""
INDEX_NAME = "ta_video"
def __init__(self, task=False):
self.task = task
def embed(self):
"""entry point"""
data = {
"query": {"match_all": {}},
"_source": ["media_url", "youtube_id"],
}
paginate = IndexPaginate(
index_name=self.INDEX_NAME,
data=data,
callback=EmbedCallback,
task=self.task,
total=self._get_total(),
)
_ = paginate.get_results()
def _get_total(self):
"""get total documents in index"""
path = f"{self.INDEX_NAME}/_count"
response, _ = ElasticWrap(path).get()
return response.get("count")
class EmbedCallback:
"""callback class to embed thumbnails"""
CONFIG = AppConfig().config
CACHE_DIR = CONFIG["application"]["cache_dir"]
MEDIA_DIR = CONFIG["application"]["videos"]
VIDEO_DIR = os.path.join(CACHE_DIR, "videos")
FORMAT = MP4Cover.FORMAT_JPEG
def sync(self):
"""embed thumbnails to mediafiles"""
video_list = self.get_thumb_list()
self._embed_thumbs(video_list)
def __init__(self, source, index_name):
self.source = source
self.index_name = index_name
def get_thumb_list(self):
"""get list of mediafiles and matching thumbnails"""
pending = queue.PendingList()
pending.get_download()
pending.get_indexed()
video_list = []
for video in pending.all_videos:
video_id = video["youtube_id"]
media_url = os.path.join(self.MEDIA_DIR, video["media_url"])
def run(self):
"""run embed"""
for video in self.source:
video_id = video["_source"]["youtube_id"]
media_url = os.path.join(
self.MEDIA_DIR, video["_source"]["media_url"]
)
thumb_path = os.path.join(
self.CACHE_DIR, ThumbManager(video_id).vid_thumb_path()
)
video_list.append(
{
"media_url": media_url,
"thumb_path": thumb_path,
}
)
if os.path.exists(thumb_path):
self.embed(media_url, thumb_path)
return video_list
def embed(self, media_url, thumb_path):
"""embed thumb in single media file"""
video = MP4(media_url)
with open(thumb_path, "rb") as f:
video["covr"] = [MP4Cover(f.read(), imageformat=self.FORMAT)]
@staticmethod
def _embed_thumbs(video_list):
"""rewrite the thumbnail into media file"""
counter = 1
for video in video_list:
# loop through all videos
media_url = video["media_url"]
thumb_path = video["thumb_path"]
mutagen_vid = MP4(media_url)
with open(thumb_path, "rb") as f:
mutagen_vid["covr"] = [
MP4Cover(f.read(), imageformat=MP4Cover.FORMAT_JPEG)
]
mutagen_vid.save()
if counter % 50 == 0:
print(f"thumbnail write progress {counter}/{len(video_list)}")
counter = counter + 1
video.save()

View File

@ -22,7 +22,7 @@ from home.src.index.video import YoutubeVideo, index_new_video
from home.src.index.video_constants import VideoTypeEnum
from home.src.ta.config import AppConfig
from home.src.ta.helper import clean_string, ignore_filelist
from home.src.ta.ta_redis import RedisArchivist, RedisQueue
from home.src.ta.ta_redis import RedisQueue
class DownloadPostProcess:
@ -96,7 +96,7 @@ class DownloadPostProcess:
def validate_playlists(self):