yt-dlp uploader id fix, #build

Changed:
- Fix for yt-dlp parsing error [2023.02.17]
- Fix for various thumbnail extraction errors
- Fix download queue filter
- [API] Add filter subscribed channels
This commit is contained in:
simon 2023-02-17 20:58:19 +07:00
commit 9e1316b543
No known key found for this signature in database
GPG Key ID: 2C15AA5E89985DD4
10 changed files with 77 additions and 24 deletions

View File

@ -1,5 +1,5 @@
# Video Page # Video Page
Every video downloaded gets a dedicated page accessible at `/video/<video-id>/` of your Tube Archivist. Every video downloaded gets a dedicated page accessible at `/video/<video-id>/` of your Tube Archivist. Throughout the interface, click on a video title to access the video page.
Clicking on the channel name or the channel icon will bring you to the dedicated channel detail [page](Channels#channel-detail). Clicking on the channel name or the channel icon will bring you to the dedicated channel detail [page](Channels#channel-detail).

View File

@ -155,6 +155,9 @@ Timestamps either *int* or *float*, end time can't be before start time.
## Channel List View ## Channel List View
/api/channel/ /api/channel/
Parameter:
- filter: subscribed
### Subscribe to a list of channels ### Subscribe to a list of channels
POST /api/channel/ POST /api/channel/
```json ```json

View File

@ -289,14 +289,27 @@ class ChannelApiListView(ApiBaseView):
""" """
search_base = "ta_channel/_search/" search_base = "ta_channel/_search/"
valid_filter = ["subscribed"]
def get(self, request): def get(self, request):
"""get request""" """get request"""
self.get_document_list(request)
self.data.update( self.data.update(
{"sort": [{"channel_name.keyword": {"order": "asc"}}]} {"sort": [{"channel_name.keyword": {"order": "asc"}}]}
) )
query_filter = request.GET.get("filter", False)
must_list = []
if query_filter:
if query_filter not in self.valid_filter:
message = f"invalid url query filder: {query_filter}"
print(message)
return Response({"message": message}, status=400)
must_list.append({"term": {"channel_subscribed": {"value": True}}})
self.data["query"] = {"bool": {"must": must_list}}
self.get_document_list(request)
return Response(self.response) return Response(self.response)
@staticmethod @staticmethod
@ -430,7 +443,7 @@ class DownloadApiView(ApiBaseView):
# pylint: disable=unused-argument # pylint: disable=unused-argument
"""delete single video from queue""" """delete single video from queue"""
print(f"{video_id}: delete from queue") print(f"{video_id}: delete from queue")
PendingInteract(video_id=video_id).delete_item() PendingInteract(video_id).delete_item()
return Response({"success": True}) return Response({"success": True})

View File

@ -96,13 +96,13 @@ class PendingIndex:
class PendingInteract: class PendingInteract:
"""interact with items in download queue""" """interact with items in download queue"""
def __init__(self, video_id=False, status=False): def __init__(self, youtube_id=False, status=False):
self.video_id = video_id self.youtube_id = youtube_id
self.status = status self.status = status
def delete_item(self): def delete_item(self):
"""delete single item from pending""" """delete single item from pending"""
path = f"ta_download/_doc/{self.video_id}" path = f"ta_download/_doc/{self.youtube_id}"
_, _ = ElasticWrap(path).delete(refresh=True) _, _ = ElasticWrap(path).delete(refresh=True)
def delete_by_status(self): def delete_by_status(self):
@ -114,15 +114,35 @@ class PendingInteract:
def update_status(self): def update_status(self):
"""update status field of pending item""" """update status field of pending item"""
data = {"doc": {"status": self.status}} data = {"doc": {"status": self.status}}
path = f"ta_download/_update/{self.video_id}" path = f"ta_download/_update/{self.youtube_id}"
_, _ = ElasticWrap(path).post(data=data) _, _ = ElasticWrap(path).post(data=data)
def get_item(self): def get_item(self):
"""return pending item dict""" """return pending item dict"""
path = f"ta_download/_doc/{self.video_id}" path = f"ta_download/_doc/{self.youtube_id}"
response, status_code = ElasticWrap(path).get() response, status_code = ElasticWrap(path).get()
return response["_source"], status_code return response["_source"], status_code
def get_channel(self):
"""
get channel metadata from queue to not depend on channel to be indexed
"""
data = {
"size": 1,
"query": {"term": {"channel_id": {"value": self.youtube_id}}},
}
response, _ = ElasticWrap("ta_download/_search").get(data=data)
hits = response["hits"]["hits"]
if not hits:
channel_name = "NA"
else:
channel_name = hits[0]["_source"].get("channel_name", "NA")
return {
"channel_id": self.youtube_id,
"channel_name": channel_name,
}
class PendingList(PendingIndex): class PendingList(PendingIndex):
"""manage the pending videos list""" """manage the pending videos list"""
@ -264,6 +284,7 @@ class PendingList(PendingIndex):
return False return False
# stop if video is streaming live now # stop if video is streaming live now
if vid["live_status"] in ["is_upcoming", "is_live"]: if vid["live_status"] in ["is_upcoming", "is_live"]:
print(f"{youtube_id}: skip is_upcoming or is_live")
return False return False
if vid["live_status"] == "was_live": if vid["live_status"] == "was_live":

View File

@ -43,8 +43,12 @@ class ThumbManagerBase:
response = requests.get(url, stream=True, timeout=5) response = requests.get(url, stream=True, timeout=5)
if response.ok: if response.ok:
try: try:
return Image.open(response.raw) img = Image.open(response.raw)
except UnidentifiedImageError: if isinstance(img, Image.Image):
return img
return self.get_fallback()
except (UnidentifiedImageError, OSError):
print(f"failed to open thumbnail: {url}") print(f"failed to open thumbnail: {url}")
return self.get_fallback() return self.get_fallback()
@ -59,6 +63,7 @@ class ThumbManagerBase:
def get_fallback(self): def get_fallback(self):
"""get fallback thumbnail if not available""" """get fallback thumbnail if not available"""
print(f"{self.item_id}: failed to extract thumbnail, use fallback")
if self.fallback: if self.fallback:
img_raw = Image.open(self.fallback) img_raw = Image.open(self.fallback)
return img_raw return img_raw

View File

@ -109,12 +109,20 @@ class Comments:
if comments_raw: if comments_raw:
for comment in comments_raw: for comment in comments_raw:
cleaned_comment = self.clean_comment(comment) cleaned_comment = self.clean_comment(comment)
if not cleaned_comment:
continue
comments.append(cleaned_comment) comments.append(cleaned_comment)
self.comments_format = comments self.comments_format = comments
def clean_comment(self, comment): def clean_comment(self, comment):
"""parse metadata from comment for indexing""" """parse metadata from comment for indexing"""
if not comment.get("text"):
# comment text can be empty
print(f"{self.youtube_id}: Failed to extract text, {comment}")
return False
time_text_datetime = datetime.utcfromtimestamp(comment["timestamp"]) time_text_datetime = datetime.utcfromtimestamp(comment["timestamp"])
if time_text_datetime.hour == 0 and time_text_datetime.minute == 0: if time_text_datetime.hour == 0 and time_text_datetime.minute == 0:

View File

@ -111,9 +111,10 @@ def download_pending():
def download_single(pending_video): def download_single(pending_video):
"""start download single video now""" """start download single video now"""
queue = RedisQueue(queue_name="dl_queue") queue = RedisQueue(queue_name="dl_queue")
to_add = { to_add = {
"youtube_id": pending_video["youtube_id"], "youtube_id": pending_video["youtube_id"],
"vid_type": pending_video["vid_type"], "vid_type": pending_video.get("vid_type", VideoTypeEnum.VIDEOS.value),
} }
queue.add_priority(json.dumps(to_add)) queue.add_priority(json.dumps(to_add))
print(f"Added to queue with priority: {to_add}") print(f"Added to queue with priority: {to_add}")

View File

@ -15,6 +15,7 @@ from django.contrib.auth.forms import AuthenticationForm
from django.http import JsonResponse from django.http import JsonResponse
from django.shortcuts import redirect, render from django.shortcuts import redirect, render
from django.views import View from django.views import View
from home.src.download.queue import PendingInteract
from home.src.download.yt_dlp_base import CookieHandler from home.src.download.yt_dlp_base import CookieHandler
from home.src.es.backup import ElasticBackup from home.src.es.backup import ElasticBackup
from home.src.es.connect import ElasticWrap from home.src.es.connect import ElasticWrap
@ -32,7 +33,7 @@ from home.src.frontend.forms import (
UserSettingsForm, UserSettingsForm,
) )
from home.src.frontend.searching import SearchHandler from home.src.frontend.searching import SearchHandler
from home.src.index.channel import YoutubeChannel, channel_overwrites from home.src.index.channel import channel_overwrites
from home.src.index.generic import Pagination from home.src.index.generic import Pagination
from home.src.index.playlist import YoutubePlaylist from home.src.index.playlist import YoutubePlaylist
from home.src.index.reindex import ReindexProgress from home.src.index.reindex import ReindexProgress
@ -375,13 +376,13 @@ class DownloadView(ArchivistResultsView):
def get(self, request): def get(self, request):
"""handle get request""" """handle get request"""
self.initiate_vars(request) self.initiate_vars(request)
self._update_view_data(request) filter_view = self._update_view_data(request)
self.find_results() self.find_results()
self.context.update( self.context.update(
{ {
"title": "Downloads", "title": "Downloads",
"add_form": AddToQueueForm(), "add_form": AddToQueueForm(),
"channel_agg_list": self._get_channel_agg(), "channel_agg_list": self._get_channel_agg(filter_view),
} }
) )
return render(request, "home/downloads.html", self.context) return render(request, "home/downloads.html", self.context)
@ -401,12 +402,11 @@ class DownloadView(ArchivistResultsView):
{"term": {"channel_id": {"value": channel_filter}}} {"term": {"channel_id": {"value": channel_filter}}}
) )
channel = YoutubeChannel(channel_filter) channel = PendingInteract(channel_filter).get_channel()
channel.get_from_es()
self.context.update( self.context.update(
{ {
"channel_filter_id": channel_filter, "channel_filter_id": channel.get("channel_id"),
"channel_filter_name": channel.json_data["channel_name"], "channel_filter_name": channel.get("channel_name"),
} }
) )
@ -417,11 +417,13 @@ class DownloadView(ArchivistResultsView):
} }
) )
def _get_channel_agg(self): return filter_view
def _get_channel_agg(self, filter_view):
"""get pending channel with count""" """get pending channel with count"""
data = { data = {
"size": 0, "size": 0,
"query": {"term": {"status": {"value": "pending"}}}, "query": {"term": {"status": {"value": filter_view}}},
"aggs": { "aggs": {
"channel_downloads": { "channel_downloads": {
"multi_terms": { "multi_terms": {

View File

@ -1,13 +1,13 @@
beautifulsoup4==4.11.2 beautifulsoup4==4.11.2
celery==5.2.7 celery==5.2.7
Django==4.1.6 Django==4.1.7
django-auth-ldap==4.1.0 django-auth-ldap==4.1.0
django-cors-headers==3.13.0 django-cors-headers==3.13.0
djangorestframework==3.14.0 djangorestframework==3.14.0
Pillow==9.4.0 Pillow==9.4.0
redis==4.4.2 redis==4.5.1
requests==2.28.2 requests==2.28.2
ryd-client==0.0.6 ryd-client==0.0.6
uWSGI==2.0.21 uWSGI==2.0.21
whitenoise==6.3.0 whitenoise==6.3.0
yt_dlp==2023.1.6 yt_dlp==2023.2.17

View File

@ -142,7 +142,7 @@ function toggleCheckbox(checkbox) {
let payload = JSON.stringify(payloadDict); let payload = JSON.stringify(payloadDict);
sendPost(payload); sendPost(payload);
setTimeout(function () { setTimeout(function () {
let currPage = window.location.pathname + window.location.search; let currPage = window.location.pathname;
window.location.replace(currPage); window.location.replace(currPage);
}, 500); }, 500);
} }