yt-dlp uploader id fix, #build

Changed:
- Fix for yt-dlp parsing error [2023.02.17]
- Fix for various thumbnail extraction errors
- Fix download queue filter
- [API] Add filter subscribed channels
This commit is contained in:
simon 2023-02-17 20:58:19 +07:00
commit 9e1316b543
No known key found for this signature in database
GPG Key ID: 2C15AA5E89985DD4
10 changed files with 77 additions and 24 deletions

View File

@ -1,5 +1,5 @@
# Video Page
Every video downloaded gets a dedicated page accessible at `/video/<video-id>/` of your Tube Archivist.
Every video downloaded gets a dedicated page accessible at `/video/<video-id>/` of your Tube Archivist. Throughout the interface, click on a video title to access the video page.
Clicking on the channel name or the channel icon will bring you to the dedicated channel detail [page](Channels#channel-detail).

View File

@ -155,6 +155,9 @@ Timestamps either *int* or *float*, end time can't be before start time.
## Channel List View
/api/channel/
Parameter:
- filter: subscribed
### Subscribe to a list of channels
POST /api/channel/
```json

View File

@ -289,14 +289,27 @@ class ChannelApiListView(ApiBaseView):
"""
search_base = "ta_channel/_search/"
valid_filter = ["subscribed"]
def get(self, request):
"""get request"""
self.get_document_list(request)
self.data.update(
{"sort": [{"channel_name.keyword": {"order": "asc"}}]}
)
query_filter = request.GET.get("filter", False)
must_list = []
if query_filter:
if query_filter not in self.valid_filter:
message = f"invalid url query filder: {query_filter}"
print(message)
return Response({"message": message}, status=400)
must_list.append({"term": {"channel_subscribed": {"value": True}}})
self.data["query"] = {"bool": {"must": must_list}}
self.get_document_list(request)
return Response(self.response)
@staticmethod
@ -430,7 +443,7 @@ class DownloadApiView(ApiBaseView):
# pylint: disable=unused-argument
"""delete single video from queue"""
print(f"{video_id}: delete from queue")
PendingInteract(video_id=video_id).delete_item()
PendingInteract(video_id).delete_item()
return Response({"success": True})

View File

@ -96,13 +96,13 @@ class PendingIndex:
class PendingInteract:
"""interact with items in download queue"""
def __init__(self, video_id=False, status=False):
self.video_id = video_id
def __init__(self, youtube_id=False, status=False):
self.youtube_id = youtube_id
self.status = status
def delete_item(self):
"""delete single item from pending"""
path = f"ta_download/_doc/{self.video_id}"
path = f"ta_download/_doc/{self.youtube_id}"
_, _ = ElasticWrap(path).delete(refresh=True)
def delete_by_status(self):
@ -114,15 +114,35 @@ class PendingInteract:
def update_status(self):
"""update status field of pending item"""
data = {"doc": {"status": self.status}}
path = f"ta_download/_update/{self.video_id}"
path = f"ta_download/_update/{self.youtube_id}"
_, _ = ElasticWrap(path).post(data=data)
def get_item(self):
"""return pending item dict"""
path = f"ta_download/_doc/{self.video_id}"
path = f"ta_download/_doc/{self.youtube_id}"
response, status_code = ElasticWrap(path).get()
return response["_source"], status_code
def get_channel(self):
"""
get channel metadata from queue to not depend on channel to be indexed
"""
data = {
"size": 1,
"query": {"term": {"channel_id": {"value": self.youtube_id}}},
}
response, _ = ElasticWrap("ta_download/_search").get(data=data)
hits = response["hits"]["hits"]
if not hits:
channel_name = "NA"
else:
channel_name = hits[0]["_source"].get("channel_name", "NA")
return {
"channel_id": self.youtube_id,
"channel_name": channel_name,
}
class PendingList(PendingIndex):
"""manage the pending videos list"""
@ -264,6 +284,7 @@ class PendingList(PendingIndex):
return False
# stop if video is streaming live now
if vid["live_status"] in ["is_upcoming", "is_live"]:
print(f"{youtube_id}: skip is_upcoming or is_live")
return False
if vid["live_status"] == "was_live":

View File

@ -43,8 +43,12 @@ class ThumbManagerBase:
response = requests.get(url, stream=True, timeout=5)
if response.ok:
try:
return Image.open(response.raw)
except UnidentifiedImageError:
img = Image.open(response.raw)
if isinstance(img, Image.Image):
return img
return self.get_fallback()
except (UnidentifiedImageError, OSError):
print(f"failed to open thumbnail: {url}")
return self.get_fallback()
@ -59,6 +63,7 @@ class ThumbManagerBase:
def get_fallback(self):
"""get fallback thumbnail if not available"""
print(f"{self.item_id}: failed to extract thumbnail, use fallback")
if self.fallback:
img_raw = Image.open(self.fallback)
return img_raw

View File

@ -109,12 +109,20 @@ class Comments:
if comments_raw:
for comment in comments_raw:
cleaned_comment = self.clean_comment(comment)
if not cleaned_comment:
continue
comments.append(cleaned_comment)
self.comments_format = comments
def clean_comment(self, comment):
"""parse metadata from comment for indexing"""
if not comment.get("text"):
# comment text can be empty
print(f"{self.youtube_id}: Failed to extract text, {comment}")
return False
time_text_datetime = datetime.utcfromtimestamp(comment["timestamp"])
if time_text_datetime.hour == 0 and time_text_datetime.minute == 0:

View File

@ -111,9 +111,10 @@ def download_pending():
def download_single(pending_video):
"""start download single video now"""
queue = RedisQueue(queue_name="dl_queue")
to_add = {
"youtube_id": pending_video["youtube_id"],
"vid_type": pending_video["vid_type"],
"vid_type": pending_video.get("vid_type", VideoTypeEnum.VIDEOS.value),
}
queue.add_priority(json.dumps(to_add))
print(f"Added to queue with priority: {to_add}")

View File

@ -15,6 +15,7 @@ from django.contrib.auth.forms import AuthenticationForm
from django.http import JsonResponse
from django.shortcuts import redirect, render
from django.views import View
from home.src.download.queue import PendingInteract
from home.src.download.yt_dlp_base import CookieHandler
from home.src.es.backup import ElasticBackup
from home.src.es.connect import ElasticWrap
@ -32,7 +33,7 @@ from home.src.frontend.forms import (
UserSettingsForm,
)
from home.src.frontend.searching import SearchHandler
from home.src.index.channel import YoutubeChannel, channel_overwrites
from home.src.index.channel import channel_overwrites
from home.src.index.generic import Pagination
from home.src.index.playlist import YoutubePlaylist
from home.src.index.reindex import ReindexProgress
@ -375,13 +376,13 @@ class DownloadView(ArchivistResultsView):
def get(self, request):
"""handle get request"""
self.initiate_vars(request)
self._update_view_data(request)
filter_view = self._update_view_data(request)
self.find_results()
self.context.update(
{
"title": "Downloads",
"add_form": AddToQueueForm(),
"channel_agg_list": self._get_channel_agg(),
"channel_agg_list": self._get_channel_agg(filter_view),
}
)
return render(request, "home/downloads.html", self.context)
@ -401,12 +402,11 @@ class DownloadView(ArchivistResultsView):
{"term": {"channel_id": {"value": channel_filter}}}
)
channel = YoutubeChannel(channel_filter)
channel.get_from_es()
channel = PendingInteract(channel_filter).get_channel()
self.context.update(
{
"channel_filter_id": channel_filter,
"channel_filter_name": channel.json_data["channel_name"],
"channel_filter_id": channel.get("channel_id"),
"channel_filter_name": channel.get("channel_name"),
}
)
@ -417,11 +417,13 @@ class DownloadView(ArchivistResultsView):
}
)
def _get_channel_agg(self):
return filter_view
def _get_channel_agg(self, filter_view):
"""get pending channel with count"""
data = {
"size": 0,
"query": {"term": {"status": {"value": "pending"}}},
"query": {"term": {"status": {"value": filter_view}}},
"aggs": {
"channel_downloads": {
"multi_terms": {

View File

@ -1,13 +1,13 @@
beautifulsoup4==4.11.2
celery==5.2.7
Django==4.1.6
Django==4.1.7
django-auth-ldap==4.1.0
django-cors-headers==3.13.0
djangorestframework==3.14.0
Pillow==9.4.0
redis==4.4.2
redis==4.5.1
requests==2.28.2
ryd-client==0.0.6
uWSGI==2.0.21
whitenoise==6.3.0
yt_dlp==2023.1.6
yt_dlp==2023.2.17

View File

@ -142,7 +142,7 @@ function toggleCheckbox(checkbox) {
let payload = JSON.stringify(payloadDict);
sendPost(payload);
setTimeout(function () {
let currPage = window.location.pathname + window.location.search;
let currPage = window.location.pathname;
window.location.replace(currPage);
}, 500);
}