Lives and shorts (#395)

* Initial shot at adding in live and short videos

* Fix for typo on config variable names

* Remove was_live youtube flag

* Adding startup script to support setting vid_type on ta_video for videos created prior to support for live and shorts

* Linting updates

* More linting fixes

* Removing title variable from loop tuple unpacking
This commit is contained in:
Chance Turner 2023-01-01 21:11:06 -06:00 committed by GitHub
parent 7f603cb06a
commit 98f5b66826
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 214 additions and 41 deletions

View File

@ -36,6 +36,7 @@ class StartupCheck:
clear_dl_cache(self.config_handler.config)
self.snapshot_check()
self.ta_version_check()
self.es_set_vid_type()
self.set_has_run()
def get_has_run(self):
@ -126,6 +127,17 @@ class StartupCheck:
"""remove key if updated now"""
ReleaseVersion().is_updated()
def es_set_vid_type(self):
"""update path 0.3.0 to 0.3.1, set default vid_type to video"""
data = {
"query": {
"bool": {"must_not": [{"exists": {"field": "vid_type"}}]}
},
"script": {"source": "ctx._source['vid_type'] = 'video'"},
}
response, _ = ElasticWrap("ta_video/_update_by_query").post(data=data)
print(f"ta_video vid_type index update ran: {response}")
class HomeConfig(AppConfig):
"""call startup funcs"""

View File

@ -14,7 +14,9 @@
"subscriptions": {
"auto_search": false,
"auto_download": false,
"channel_size": 50
"channel_size": 50,
"live_channel_size": 0,
"shorts_channel_size": 0
},
"downloads": {
"limit_count": false,

View File

@ -15,6 +15,7 @@ from home.src.download.thumbnails import ThumbManager
from home.src.download.yt_dlp_base import YtWrap
from home.src.es.connect import ElasticWrap, IndexPaginate
from home.src.index.playlist import YoutubePlaylist
from home.src.index.video_constants import VideoTypeEnum
from home.src.ta.config import AppConfig
from home.src.ta.helper import DurationConverter
from home.src.ta.ta_redis import RedisArchivist
@ -156,7 +157,8 @@ class PendingList(PendingIndex):
def _process_entry(self, entry):
"""process single entry from url list"""
if entry["type"] == "video":
self._add_video(entry["url"])
vid_type = entry.get("vid_type", VideoTypeEnum.VIDEO)
self._add_video(entry["url"], vid_type)
elif entry["type"] == "channel":
self._parse_channel(entry["url"])
elif entry["type"] == "playlist":
@ -165,10 +167,10 @@ class PendingList(PendingIndex):
else:
raise ValueError(f"invalid url_type: {entry}")
def _add_video(self, url):
def _add_video(self, url, vid_type=VideoTypeEnum.VIDEO):
"""add video to list"""
if url not in self.missing_videos and url not in self.to_skip:
self.missing_videos.append(url)
self.missing_videos.append((url, vid_type))
else:
print(f"{url}: skipped adding already indexed video to download.")
@ -177,9 +179,8 @@ class PendingList(PendingIndex):
video_results = ChannelSubscription().get_last_youtube_videos(
url, limit=False
)
youtube_ids = [i[0] for i in video_results]
for video_id in youtube_ids:
self._add_video(video_id)
for video_id, _, vid_type in video_results:
self._add_video(video_id, vid_type)
def _parse_playlist(self, url):
"""add all videos of playlist to list"""
@ -188,16 +189,18 @@ class PendingList(PendingIndex):
video_results = playlist.json_data.get("playlist_entries")
youtube_ids = [i["youtube_id"] for i in video_results]
for video_id in youtube_ids:
self._add_video(video_id)
# FIXME: This will need to be adjusted to support Live/Shorts
# from playlists
self._add_video(video_id, VideoTypeEnum.VIDEO)
def add_to_pending(self, status="pending"):
"""add missing videos to pending list"""
self.get_channels()
bulk_list = []
for idx, youtube_id in enumerate(self.missing_videos):
print(f"{youtube_id}: add to download queue")
video_details = self.get_youtube_details(youtube_id)
for idx, (youtube_id, vid_type) in enumerate(self.missing_videos):
print(f"{youtube_id} ({vid_type}): add to download queue")
video_details = self.get_youtube_details(youtube_id, vid_type)
if not video_details:
continue
@ -235,7 +238,7 @@ class PendingList(PendingIndex):
if idx + 1 % 25 == 0:
print("adding to queue progress: " + progress)
def get_youtube_details(self, youtube_id):
def get_youtube_details(self, youtube_id, vid_type=VideoTypeEnum.VIDEO):
"""get details from youtubedl for single pending video"""
vid = YtWrap(self.yt_obs, self.config).extract(youtube_id)
if not vid:
@ -249,9 +252,9 @@ class PendingList(PendingIndex):
if vid["live_status"] in ["is_upcoming", "is_live"]:
return False
return self._parse_youtube_details(vid)
return self._parse_youtube_details(vid, vid_type)
def _parse_youtube_details(self, vid):
def _parse_youtube_details(self, vid, vid_type=VideoTypeEnum.VIDEO):
"""parse response"""
vid_id = vid.get("id")
duration_str = DurationConverter.get_str(vid["duration"])
@ -271,6 +274,8 @@ class PendingList(PendingIndex):
"duration": duration_str,
"published": published,
"timestamp": int(datetime.now().timestamp()),
# Pulling enum value out so it is serializable
"vid_type": vid_type.value,
}
if self.all_channels:
youtube_details.update(

View File

@ -10,6 +10,7 @@ from home.src.download.yt_dlp_base import YtWrap
from home.src.es.connect import IndexPaginate
from home.src.index.channel import YoutubeChannel
from home.src.index.playlist import YoutubePlaylist
from home.src.index.video_constants import VideoTypeEnum
from home.src.ta.config import AppConfig
from home.src.ta.ta_redis import RedisArchivist
@ -37,19 +38,44 @@ class ChannelSubscription:
def get_last_youtube_videos(self, channel_id, limit=True):
"""get a list of last videos from channel"""
obs = {
"skip_download": True,
"extract_flat": True,
}
if limit:
obs["playlistend"] = self.config["subscriptions"]["channel_size"]
url = f"https://www.youtube.com/channel/{channel_id}/videos"
channel = YtWrap(obs, self.config).extract(url)
if not channel:
return False
queries = [
(
VideoTypeEnum.VIDEO,
"videos",
self.config["subscriptions"]["channel_size"],
),
(
VideoTypeEnum.LIVE,
"streams",
self.config["subscriptions"]["live_channel_size"],
),
(
VideoTypeEnum.SHORT,
"shorts",
self.config["subscriptions"]["shorts_channel_size"],
),
]
last_videos = []
for vid_type, url, limit_amount in queries:
obs = {
"skip_download": True,
"extract_flat": True,
}
if limit:
obs["playlistend"] = limit_amount
channel = YtWrap(obs, self.config).extract(
f"https://www.youtube.com/channel/{channel_id}/{url}"
)
if not channel:
continue
last_videos.extend(
[(i["id"], i["title"], vid_type) for i in channel["entries"]]
)
last_videos = [(i["id"], i["title"]) for i in channel["entries"]]
return last_videos
def find_missing(self):
@ -67,9 +93,9 @@ class ChannelSubscription:
last_videos = self.get_last_youtube_videos(channel_id)
if last_videos:
for video in last_videos:
if video[0] not in pending.to_skip:
missing_videos.append(video[0])
for video_id, _, vid_type in last_videos:
if video_id not in pending.to_skip:
missing_videos.append((video_id, vid_type))
# notify
message = {
"status": "message:rescan",

View File

@ -6,6 +6,7 @@ functionality:
- move to archive
"""
import json
import os
import shutil
from datetime import datetime
@ -18,6 +19,7 @@ from home.src.index.channel import YoutubeChannel
from home.src.index.comments import CommentList
from home.src.index.playlist import YoutubePlaylist
from home.src.index.video import YoutubeVideo, index_new_video
from home.src.index.video_constants import VideoTypeEnum
from home.src.ta.config import AppConfig
from home.src.ta.helper import clean_string, ignore_filelist
from home.src.ta.ta_redis import RedisArchivist, RedisQueue
@ -174,10 +176,23 @@ class VideoDownloader:
queue.trim(limit_queue - 1)
while True:
youtube_id = queue.get_next()
if not youtube_id:
youtube_data = queue.get_next()
if not youtube_data:
break
try:
youtube_data = json.loads(youtube_data)
except json.JSONDecodeError: # This many not be necessary
continue
youtube_id = youtube_data.get("youtube_id")
tmp_vid_type = youtube_data.get(
"vid_type", VideoTypeEnum.VIDEO.value
)
video_type = VideoTypeEnum(tmp_vid_type)
print(f"Downloading type: {video_type}")
success = self._dl_single_vid(youtube_id)
if not success:
continue
@ -191,7 +206,9 @@ class VideoDownloader:
RedisArchivist().set_message(self.MSG, mess_dict, expire=60)
vid_dict = index_new_video(
youtube_id, video_overwrites=self.video_overwrites
youtube_id,
video_overwrites=self.video_overwrites,
video_type=video_type,
)
self.channels.add(vid_dict["channel"]["channel_id"])
self.videos.add(vid_dict["youtube_id"])
@ -245,7 +262,17 @@ class VideoDownloader:
RedisArchivist().set_message(self.MSG, mess_dict, expire=True)
pending = PendingList()
pending.get_download()
to_add = [i["youtube_id"] for i in pending.all_pending]
to_add = [
json.dumps(
{
"youtube_id": i["youtube_id"],
# Using .value in default val to match what would be
# decoded when parsing json if not set
"vid_type": i.get("vid_type", VideoTypeEnum.VIDEO.value),
}
)
for i in pending.all_pending
]
if not to_add:
# there is nothing pending
print("download queue is empty")

View File

@ -178,6 +178,9 @@
"youtube_id": {
"type": "keyword"
},
"vid_type": {
"type": "keyword"
},
"published": {
"type": "date"
},
@ -324,6 +327,9 @@
},
"youtube_id": {
"type": "keyword"
},
"vid_type": {
"type": "keyword"
}
},
"expected_set": {

View File

@ -101,6 +101,12 @@ class ApplicationSettingsForm(forms.Form):
subscriptions_channel_size = forms.IntegerField(
required=False, min_value=1
)
subscriptions_live_channel_size = forms.IntegerField(
required=False, min_value=0
)
subscriptions_shorts_channel_size = forms.IntegerField(
required=False, min_value=0
)
downloads_limit_count = forms.IntegerField(required=False)
downloads_limit_speed = forms.IntegerField(required=False)
downloads_throttledratelimit = forms.IntegerField(required=False)

View File

@ -15,6 +15,7 @@ from home.src.index import comments as ta_comments
from home.src.index import playlist as ta_playlist
from home.src.index.generic import YouTubeItem
from home.src.index.subtitle import YoutubeSubtitle
from home.src.index.video_constants import VideoTypeEnum
from home.src.ta.helper import DurationConverter, clean_string, randomizor
from home.src.ta.ta_redis import RedisArchivist
from ryd_client import ryd_client
@ -123,10 +124,16 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle):
index_name = "ta_video"
yt_base = "https://www.youtube.com/watch?v="
def __init__(self, youtube_id, video_overwrites=False):
def __init__(
self,
youtube_id,
video_overwrites=False,
video_type=VideoTypeEnum.VIDEO,
):
super().__init__(youtube_id)
self.channel_id = False
self.video_overwrites = video_overwrites
self.video_type = video_type
self.es_path = f"{self.index_name}/_doc/{youtube_id}"
self.offline_import = False
@ -189,6 +196,8 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle):
"vid_last_refresh": last_refresh,
"date_downloaded": last_refresh,
"youtube_id": self.youtube_id,
# Using .value to make json encodable
"vid_type": self.video_type.value,
"active": True,
}
@ -396,9 +405,13 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle):
_, _ = ElasticWrap(path).post(data=data)
def index_new_video(youtube_id, video_overwrites=False):
def index_new_video(
youtube_id, video_overwrites=False, video_type=VideoTypeEnum.VIDEO
):
"""combined classes to create new video in index"""
video = YoutubeVideo(youtube_id, video_overwrites=video_overwrites)
video = YoutubeVideo(
youtube_id, video_overwrites=video_overwrites, video_type=video_type
)
video.build_json()
if not video.json_data:
raise ValueError("failed to get metadata for " + youtube_id)

View File

@ -0,0 +1,7 @@
import enum
class VideoTypeEnum(enum.Enum):
VIDEO = "video"
LIVE = "live"
SHORT = "short"

View File

@ -22,6 +22,7 @@ from home.src.es.index_setup import ElasitIndexWrap
from home.src.index.channel import YoutubeChannel
from home.src.index.filesystem import ImportFolderScanner, scan_filesystem
from home.src.index.reindex import Reindex, ReindexManual, ReindexOutdated
from home.src.index.video_constants import VideoTypeEnum
from home.src.ta.config import AppConfig, ReleaseVersion, ScheduleBuilder
from home.src.ta.helper import UrlListParser, clear_dl_cache
from home.src.ta.ta_redis import RedisArchivist, RedisQueue
@ -58,10 +59,22 @@ def update_subscribed():
missing_from_channels = channel_handler.find_missing()
playlist_handler = PlaylistSubscription()
missing_from_playlists = playlist_handler.find_missing()
missing = missing_from_channels + missing_from_playlists
if missing:
youtube_ids = [{"type": "video", "url": i} for i in missing]
pending_handler = PendingList(youtube_ids=youtube_ids)
if missing_from_channels or missing_from_playlists:
channel_videos = [
{"type": "video", "vid_type": vid_type, "url": vid_id}
for vid_id, vid_type in missing_from_channels
]
playlist_videos = [
{
"type": "video",
"vid_type": VideoTypeEnum.VIDEO,
"url": i,
}
for i in missing_from_playlists
]
pending_handler = PendingList(
youtube_ids=channel_videos + playlist_videos
)
pending_handler.parse_url_list()
pending_handler.add_to_pending()

View File

@ -8,6 +8,8 @@
</div>
<div class="info-box-item channel-nav">
<a href="{% url 'channel_id' channel_info.channel_id %}"><h3>Videos</h3></a>
<a href="{% url 'channel_id_live' channel_info.channel_id %}"><h3>Live</h3></a>
<a href="{% url 'channel_id_shorts' channel_info.channel_id %}"><h3>Shorts</h3></a>
<a href="{% url 'channel_id_playlist' channel_info.channel_id %}"><h3>Playlists</h3></a>
<a href="{% url 'channel_id_about' channel_info.channel_id %}"><h3>About</h3></a>
{% if has_pending %}

View File

@ -8,6 +8,8 @@
</div>
<div class="info-box-item channel-nav">
<a href="{% url 'channel_id' channel_info.channel_id %}"><h3>Videos</h3></a>
<a href="{% url 'channel_id_live' channel_info.channel_id %}"><h3>Live</h3></a>
<a href="{% url 'channel_id_shorts' channel_info.channel_id %}"><h3>Shorts</h3></a>
<a href="{% url 'channel_id_playlist' channel_info.channel_id %}"><h3>Playlists</h3></a>
<a href="{% url 'channel_id_about' channel_info.channel_id %}"><h3>About</h3></a>
{% if has_pending %}

View File

@ -8,6 +8,8 @@
</div>
<div class="info-box-item channel-nav">
<a href="{% url 'channel_id' channel_info.channel_id %}"><h3>Videos</h3></a>
<a href="{% url 'channel_id_live' channel_info.channel_id %}"><h3>Live</h3></a>
<a href="{% url 'channel_id_shorts' channel_info.channel_id %}"><h3>Shorts</h3></a>
<a href="{% url 'channel_id_playlist' channel_info.channel_id %}"><h3>Playlists</h3></a>
<a href="{% url 'channel_id_about' channel_info.channel_id %}"><h3>About</h3></a>
{% if has_pending %}

View File

@ -38,6 +38,16 @@
<i>Videos to scan to find new items for the <b>Rescan subscriptions</b> task, max recommended 50.</i><br>
{{ app_form.subscriptions_channel_size }}
</div>
<div class="settings-item">
<p>YouTube Live page size: <span class="settings-current">{{ config.subscriptions.live_channel_size }}</span></p>
<i>Live Videos to scan to find new items for the <b>Rescan subscriptions</b> task, max recommended 50, 0 to disable.</i><br>
{{ app_form.subscriptions_live_channel_size }}
</div>
<div class="settings-item">
<p>YouTube Shorts page size: <span class="settings-current">{{ config.subscriptions.shorts_channel_size }}</span></p>
<i>Shorts Videos to scan to find new items for the <b>Rescan subscriptions</b> task, max recommended 50, 0 to disable.</i><br>
{{ app_form.subscriptions_shorts_channel_size }}
</div>
</div>
<div class="settings-group">
<h2 id="downloads">Downloads</h2>

View File

@ -7,7 +7,9 @@ from django.urls import path
from home.views import (
AboutView,
ChannelIdAboutView,
ChannelIdLiveView,
ChannelIdPlaylistView,
ChannelIdShortsView,
ChannelIdView,
ChannelView,
DownloadView,
@ -44,6 +46,16 @@ urlpatterns = [
login_required(ChannelIdView.as_view()),
name="channel_id",
),
path(
"channel/<slug:channel_id>/live/",
login_required(ChannelIdLiveView.as_view()),
name="channel_id_live",
),
path(
"channel/<slug:channel_id>/shorts/",
login_required(ChannelIdShortsView.as_view()),
name="channel_id_shorts",
),
path(
"channel/<slug:channel_id>/about/",
login_required(ChannelIdAboutView.as_view()),

View File

@ -3,7 +3,7 @@ Functionality:
- all views for home app
- holds base classes to inherit from
"""
import enum
import json
import urllib.parse
from time import sleep
@ -36,6 +36,7 @@ from home.src.index.channel import YoutubeChannel, channel_overwrites
from home.src.index.generic import Pagination
from home.src.index.playlist import YoutubePlaylist
from home.src.index.reindex import ReindexProgress
from home.src.index.video_constants import VideoTypeEnum
from home.src.ta.config import AppConfig, ReleaseVersion, ScheduleBuilder
from home.src.ta.helper import UrlListParser, time_parser
from home.src.ta.ta_redis import RedisArchivist
@ -513,6 +514,7 @@ class ChannelIdView(ChannelIdBaseView):
view_origin = "home"
es_search = "ta_video/_search"
video_types = [VideoTypeEnum.VIDEO]
def get(self, request, channel_id):
"""get request"""
@ -542,10 +544,20 @@ class ChannelIdView(ChannelIdBaseView):
def _update_view_data(self, channel_id):
"""update view specific data dict"""
vid_type_terms = []
for t in self.video_types:
if t and isinstance(t, enum.Enum):
vid_type_terms.append(t.value)
else:
print(
"Invalid value passed into video_types on "
+ f"ChannelIdView: {t}"
)
self.data["query"] = {
"bool": {
"must": [
{"term": {"channel.channel_id": {"value": channel_id}}}
{"term": {"channel.channel_id": {"value": channel_id}}},
{"terms": {"vid_type": vid_type_terms}},
]
}
}
@ -571,6 +583,22 @@ class ChannelIdView(ChannelIdBaseView):
return redirect("channel_id", channel_id, permanent=True)
class ChannelIdLiveView(ChannelIdView):
"""resolves to /channel/<channel-id>/live/
display single channel page from channel_id
"""
video_types = [VideoTypeEnum.LIVE]
class ChannelIdShortsView(ChannelIdView):
"""resolves to /channel/<channel-id>/shorts/
display single channel page from channel_id
"""
video_types = [VideoTypeEnum.SHORT]
class ChannelIdAboutView(ChannelIdBaseView):
"""resolves to /channel/<channel-id>/about/
show metadata, handle per channel conf