Filesystem organization, #build
Changed: - Channged filesystem to static ids - Improved error handling for download process - Lots of fixes and improvements
This commit is contained in:
commit
cd25eadd1c
|
@ -1,5 +1,6 @@
|
|||
# python testing cache
|
||||
__pycache__
|
||||
.venv
|
||||
|
||||
# django testing db
|
||||
db.sqlite3
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
|
||||
# First stage to build python wheel
|
||||
FROM python:3.10.9-slim-bullseye AS builder
|
||||
FROM python:3.11.3-slim-bullseye AS builder
|
||||
ARG TARGETPLATFORM
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
|
@ -14,7 +14,7 @@ COPY ./tubearchivist/requirements.txt /requirements.txt
|
|||
RUN pip install --user -r requirements.txt
|
||||
|
||||
# build final image
|
||||
FROM python:3.10.9-slim-bullseye as tubearchivist
|
||||
FROM python:3.11.3-slim-bullseye as tubearchivist
|
||||
|
||||
ARG TARGETPLATFORM
|
||||
ARG INSTALL_DEBUG
|
||||
|
|
|
@ -49,6 +49,7 @@ function sync_test {
|
|||
--exclude ".gitignore" \
|
||||
--exclude "**/cache" \
|
||||
--exclude "**/__pycache__/" \
|
||||
--exclude ".venv" \
|
||||
--exclude "db.sqlite3" \
|
||||
--exclude ".mypy_cache" \
|
||||
. -e ssh "$host":tubearchivist
|
||||
|
@ -87,14 +88,14 @@ function validate {
|
|||
# note: this logic is duplicated in the `./github/workflows/lint_python.yml` config
|
||||
# if you update this file, you should update that as well
|
||||
echo "running black"
|
||||
black --exclude "migrations/*" --diff --color --check -l 79 "$check_path"
|
||||
black --force-exclude "migrations/*" --diff --color --check -l 79 "$check_path"
|
||||
echo "running codespell"
|
||||
codespell --skip="./.git,./package.json,./package-lock.json,./node_modules,./.mypy_cache" "$check_path"
|
||||
codespell --skip="./.git,./.venv,./package.json,./package-lock.json,./node_modules,./.mypy_cache" "$check_path"
|
||||
echo "running flake8"
|
||||
flake8 "$check_path" --exclude "migrations" --count --max-complexity=10 \
|
||||
flake8 "$check_path" --exclude "migrations,.venv" --count --max-complexity=10 \
|
||||
--max-line-length=79 --show-source --statistics
|
||||
echo "running isort"
|
||||
isort --skip "migrations" --check-only --diff --profile black -l 79 "$check_path"
|
||||
isort --skip "migrations" --skip ".venv" --check-only --diff --profile black -l 79 "$check_path"
|
||||
printf " \n> all validations passed\n"
|
||||
|
||||
}
|
||||
|
|
|
@ -14,6 +14,7 @@ fi
|
|||
python manage.py ta_envcheck
|
||||
python manage.py ta_connection
|
||||
python manage.py ta_startup
|
||||
python manage.py ta_migpath
|
||||
|
||||
# start all tasks
|
||||
nginx &
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -11,7 +11,7 @@ from home.src.index.channel import YoutubeChannel
|
|||
from home.src.index.generic import Pagination
|
||||
from home.src.index.reindex import ReindexProgress
|
||||
from home.src.index.video import SponsorBlock, YoutubeVideo
|
||||
from home.src.ta.config import AppConfig
|
||||
from home.src.ta.config import AppConfig, ReleaseVersion
|
||||
from home.src.ta.ta_redis import RedisArchivist
|
||||
from home.src.ta.task_manager import TaskCommand, TaskManager
|
||||
from home.src.ta.urlparser import Parser
|
||||
|
@ -189,7 +189,7 @@ class VideoCommentView(ApiBaseView):
|
|||
|
||||
class VideoSimilarView(ApiBaseView):
|
||||
"""resolves to /api/video/<video-id>/similar/
|
||||
GET: return max 3 videos similar to this
|
||||
GET: return max 6 videos similar to this
|
||||
"""
|
||||
|
||||
search_base = "ta_video/_search/"
|
||||
|
@ -535,7 +535,11 @@ class PingView(ApiBaseView):
|
|||
@staticmethod
|
||||
def get(request):
|
||||
"""get pong"""
|
||||
data = {"response": "pong", "user": request.user.id}
|
||||
data = {
|
||||
"response": "pong",
|
||||
"user": request.user.id,
|
||||
"version": ReleaseVersion().get_local_version(),
|
||||
}
|
||||
return Response(data)
|
||||
|
||||
|
||||
|
|
|
@ -86,6 +86,8 @@ class Command(BaseCommand):
|
|||
continue
|
||||
|
||||
if status_code and status_code == 200:
|
||||
path = "_cluster/health?wait_for_status=yellow&timeout=30s"
|
||||
_, _ = ElasticWrap(path).get()
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(" ✓ ES connection established")
|
||||
)
|
||||
|
@ -116,7 +118,7 @@ class Command(BaseCommand):
|
|||
return
|
||||
|
||||
message = (
|
||||
" 🗙 ES connection failed. "
|
||||
" 🗙 ES version check failed. "
|
||||
+ f"Expected {self.MIN_MAJOR}.{self.MIN_MINOR} but got {version}"
|
||||
)
|
||||
self.stdout.write(self.style.ERROR(f"{message}"))
|
||||
|
|
|
@ -0,0 +1,171 @@
|
|||
"""filepath migration from v0.3.6 to v0.3.7"""
|
||||
|
||||
import json
|
||||
import os
|
||||
|
||||
from django.core.management.base import BaseCommand
|
||||
from home.src.es.connect import ElasticWrap, IndexPaginate
|
||||
from home.src.ta.config import AppConfig
|
||||
from home.src.ta.helper import ignore_filelist
|
||||
|
||||
TOPIC = """
|
||||
|
||||
########################
|
||||
# Filesystem Migration #
|
||||
########################
|
||||
|
||||
"""
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
"""command framework"""
|
||||
|
||||
# pylint: disable=no-member
|
||||
|
||||
def handle(self, *args, **options):
|
||||
"""run commands"""
|
||||
self.stdout.write(TOPIC)
|
||||
|
||||
handler = FolderMigration()
|
||||
to_migrate = handler.get_to_migrate()
|
||||
if not to_migrate:
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(" no channel migration needed\n")
|
||||
)
|
||||
return
|
||||
|
||||
self.stdout.write(self.style.SUCCESS(" migrating channels"))
|
||||
total_channels = handler.create_folders(to_migrate)
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(f" created {total_channels} channels")
|
||||
)
|
||||
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(f" migrating {len(to_migrate)} videos")
|
||||
)
|
||||
handler.migrate_videos(to_migrate)
|
||||
self.stdout.write(self.style.SUCCESS(" update videos in index"))
|
||||
handler.send_bulk()
|
||||
|
||||
self.stdout.write(self.style.SUCCESS(" cleanup old folders"))
|
||||
handler.delete_old()
|
||||
|
||||
self.stdout.write(self.style.SUCCESS(" ✓ migration completed\n"))
|
||||
|
||||
|
||||
class FolderMigration:
|
||||
"""migrate video archive folder"""
|
||||
|
||||
def __init__(self):
|
||||
self.config = AppConfig().config
|
||||
self.videos = self.config["application"]["videos"]
|
||||
self.bulk_list = []
|
||||
|
||||
def get_to_migrate(self):
|
||||
"""get videos to migrate"""
|
||||
script = (
|
||||
"doc['media_url'].value == "
|
||||
+ "doc['channel.channel_id'].value + '/'"
|
||||
+ " + doc['youtube_id'].value + '.mp4'"
|
||||
)
|
||||
data = {
|
||||
"query": {"bool": {"must_not": [{"script": {"script": script}}]}},
|
||||
"_source": [
|
||||
"youtube_id",
|
||||
"media_url",
|
||||
"channel.channel_id",
|
||||
"subtitles",
|
||||
],
|
||||
}
|
||||
response = IndexPaginate("ta_video", data).get_results()
|
||||
|
||||
return response
|
||||
|
||||
def create_folders(self, to_migrate):
|
||||
"""create required channel folders"""
|
||||
host_uid = self.config["application"]["HOST_UID"]
|
||||
host_gid = self.config["application"]["HOST_GID"]
|
||||
all_channel_ids = {i["channel"]["channel_id"] for i in to_migrate}
|
||||
|
||||
for channel_id in all_channel_ids:
|
||||
new_folder = os.path.join(self.videos, channel_id)
|
||||
os.makedirs(new_folder, exist_ok=True)
|
||||
if host_uid and host_gid:
|
||||
os.chown(new_folder, host_uid, host_gid)
|
||||
|
||||
return len(all_channel_ids)
|
||||
|
||||
def migrate_videos(self, to_migrate):
|
||||
"""migrate all videos of channel"""
|
||||
for video in to_migrate:
|
||||
new_media_url = self._move_video_file(video)
|
||||
if not new_media_url:
|
||||
continue
|
||||
|
||||
all_subtitles = self._move_subtitles(video)
|
||||
action = {
|
||||
"update": {"_id": video["youtube_id"], "_index": "ta_video"}
|
||||
}
|
||||
source = {"doc": {"media_url": new_media_url}}
|
||||
if all_subtitles:
|
||||
source["doc"].update({"subtitles": all_subtitles})
|
||||
|
||||
self.bulk_list.append(json.dumps(action))
|
||||
self.bulk_list.append(json.dumps(source))
|
||||
|
||||
def _move_video_file(self, video):
|
||||
"""move video file to new location"""
|
||||
old_path = os.path.join(self.videos, video["media_url"])
|
||||
if not os.path.exists(old_path):
|
||||
print(f"did not find expected video at {old_path}")
|
||||
return False
|
||||
|
||||
new_media_url = os.path.join(
|
||||
video["channel"]["channel_id"], video["youtube_id"] + ".mp4"
|
||||
)
|
||||
new_path = os.path.join(self.videos, new_media_url)
|
||||
os.rename(old_path, new_path)
|
||||
|
||||
return new_media_url
|
||||
|
||||
def _move_subtitles(self, video):
|
||||
"""move subtitle files to new location"""
|
||||
all_subtitles = video.get("subtitles")
|
||||
if not all_subtitles:
|
||||
return False
|
||||
|
||||
for subtitle in all_subtitles:
|
||||
old_path = os.path.join(self.videos, subtitle["media_url"])
|
||||
if not os.path.exists(old_path):
|
||||
print(f"did not find expected subtitle at {old_path}")
|
||||
continue
|
||||
|
||||
new_media_url = os.path.join(
|
||||
video["channel"]["channel_id"],
|
||||
f"{video.get('youtube_id')}.{subtitle.get('lang')}.vtt",
|
||||
)
|
||||
new_path = os.path.join(self.videos, new_media_url)
|
||||
os.rename(old_path, new_path)
|
||||
subtitle["media_url"] = new_media_url
|
||||
|
||||
return all_subtitles
|
||||
|
||||
def send_bulk(self):
|
||||
"""send bulk request to update index with new urls"""
|
||||
if not self.bulk_list:
|
||||
print("nothing to update")
|
||||
return
|
||||
|
||||
self.bulk_list.append("\n")
|
||||
data = "\n".join(self.bulk_list)
|
||||
response, status = ElasticWrap("_bulk").post(data=data, ndjson=True)
|
||||
if not status == 200:
|
||||
print(response)
|
||||
|
||||
def delete_old(self):
|
||||
"""delete old empty folders"""
|
||||
all_folders = ignore_filelist(os.listdir(self.videos))
|
||||
for folder in all_folders:
|
||||
folder_path = os.path.join(self.videos, folder)
|
||||
if not ignore_filelist(os.listdir(folder_path)):
|
||||
os.rmdir(folder_path)
|
|
@ -256,4 +256,4 @@ CORS_ALLOW_HEADERS = list(default_headers) + [
|
|||
|
||||
# TA application settings
|
||||
TA_UPSTREAM = "https://github.com/tubearchivist/tubearchivist"
|
||||
TA_VERSION = "v0.3.6"
|
||||
TA_VERSION = "v0.3.7-unstable"
|
||||
|
|
|
@ -114,7 +114,13 @@ class PendingInteract:
|
|||
def update_status(self):
|
||||
"""update status of pending item"""
|
||||
if self.status == "priority":
|
||||
data = {"doc": {"status": "pending", "auto_start": True}}
|
||||
data = {
|
||||
"doc": {
|
||||
"status": "pending",
|
||||
"auto_start": True,
|
||||
"message": None,
|
||||
}
|
||||
}
|
||||
else:
|
||||
data = {"doc": {"status": self.status}}
|
||||
|
||||
|
|
|
@ -270,7 +270,7 @@ class ValidatorCallback:
|
|||
urls = (
|
||||
channel["_source"]["channel_thumb_url"],
|
||||
channel["_source"]["channel_banner_url"],
|
||||
channel["_source"]["channel_tvart_url"],
|
||||
channel["_source"].get("channel_tvart_url", False),
|
||||
)
|
||||
handler = ThumbManager(channel["_source"]["channel_id"])
|
||||
handler.download_channel_art(urls, skip_existing=True)
|
||||
|
|
|
@ -48,11 +48,14 @@ class YtWrap:
|
|||
with yt_dlp.YoutubeDL(self.obs) as ydl:
|
||||
try:
|
||||
ydl.download([url])
|
||||
except yt_dlp.utils.DownloadError:
|
||||
print(f"{url}: failed to download.")
|
||||
return False
|
||||
except yt_dlp.utils.DownloadError as err:
|
||||
print(f"{url}: failed to download with message {err}")
|
||||
if "Temporary failure in name resolution" in str(err):
|
||||
raise ConnectionError("lost the internet, abort!") from err
|
||||
|
||||
return True
|
||||
return False, str(err)
|
||||
|
||||
return True, True
|
||||
|
||||
def extract(self, url):
|
||||
"""make extract request"""
|
||||
|
@ -61,8 +64,17 @@ class YtWrap:
|
|||
except cookiejar.LoadError:
|
||||
print("cookie file is invalid")
|
||||
return False
|
||||
except (yt_dlp.utils.ExtractorError, yt_dlp.utils.DownloadError):
|
||||
print(f"{url}: failed to get info from youtube")
|
||||
except yt_dlp.utils.ExtractorError as err:
|
||||
print(f"{url}: failed to extract with message: {err}, continue...")
|
||||
return False
|
||||
except yt_dlp.utils.DownloadError as err:
|
||||
if "This channel does not have a" in str(err):
|
||||
return False
|
||||
|
||||
print(f"{url}: failed to get info from youtube with message {err}")
|
||||
if "Temporary failure in name resolution" in str(err):
|
||||
raise ConnectionError("lost the internet, abort!") from err
|
||||
|
||||
return False
|
||||
|
||||
return response
|
||||
|
|
|
@ -20,7 +20,7 @@ from home.src.index.playlist import YoutubePlaylist
|
|||
from home.src.index.video import YoutubeVideo, index_new_video
|
||||
from home.src.index.video_constants import VideoTypeEnum
|
||||
from home.src.ta.config import AppConfig
|
||||
from home.src.ta.helper import clean_string, ignore_filelist
|
||||
from home.src.ta.helper import ignore_filelist
|
||||
|
||||
|
||||
class DownloadPostProcess:
|
||||
|
@ -203,12 +203,13 @@ class VideoDownloader:
|
|||
def _get_next(self, auto_only):
|
||||
"""get next item in queue"""
|
||||
must_list = [{"term": {"status": {"value": "pending"}}}]
|
||||
must_not_list = [{"exists": {"field": "message"}}]
|
||||
if auto_only:
|
||||
must_list.append({"term": {"auto_start": {"value": True}}})
|
||||
|
||||
data = {
|
||||
"size": 1,
|
||||
"query": {"bool": {"must": must_list}},
|
||||
"query": {"bool": {"must": must_list, "must_not": must_not_list}},
|
||||
"sort": [
|
||||
{"auto_start": {"order": "desc"}},
|
||||
{"timestamp": {"order": "asc"}},
|
||||
|
@ -344,7 +345,9 @@ class VideoDownloader:
|
|||
if youtube_id in file_name:
|
||||
obs["outtmpl"] = os.path.join(dl_cache, file_name)
|
||||
|
||||
success = YtWrap(obs, self.config).download(youtube_id)
|
||||
success, message = YtWrap(obs, self.config).download(youtube_id)
|
||||
if not success:
|
||||
self._handle_error(youtube_id, message)
|
||||
|
||||
if self.obs["writethumbnail"]:
|
||||
# webp files don't get cleaned up automatically
|
||||
|
@ -356,28 +359,27 @@ class VideoDownloader:
|
|||
|
||||
return success
|
||||
|
||||
@staticmethod
|
||||
def _handle_error(youtube_id, message):
|
||||
"""store error message"""
|
||||
data = {"doc": {"message": message}}
|
||||
_, _ = ElasticWrap(f"ta_download/_update/{youtube_id}").post(data=data)
|
||||
|
||||
def move_to_archive(self, vid_dict):
|
||||
"""move downloaded video from cache to archive"""
|
||||
videos = self.config["application"]["videos"]
|
||||
host_uid = self.config["application"]["HOST_UID"]
|
||||
host_gid = self.config["application"]["HOST_GID"]
|
||||
channel_name = clean_string(vid_dict["channel"]["channel_name"])
|
||||
if len(channel_name) <= 3:
|
||||
# fall back to channel id
|
||||
channel_name = vid_dict["channel"]["channel_id"]
|
||||
# make archive folder with correct permissions
|
||||
new_folder = os.path.join(videos, channel_name)
|
||||
if not os.path.exists(new_folder):
|
||||
os.makedirs(new_folder)
|
||||
if host_uid and host_gid:
|
||||
os.chown(new_folder, host_uid, host_gid)
|
||||
# find real filename
|
||||
# make folder
|
||||
folder = os.path.join(videos, vid_dict["channel"]["channel_id"])
|
||||
if not os.path.exists(folder):
|
||||
os.makedirs(folder)
|
||||
if host_uid and host_gid:
|
||||
os.chown(folder, host_uid, host_gid)
|
||||
# move media file
|
||||
media_file = vid_dict["youtube_id"] + ".mp4"
|
||||
cache_dir = self.config["application"]["cache_dir"]
|
||||
all_cached = ignore_filelist(os.listdir(cache_dir + "/download/"))
|
||||
for file_str in all_cached:
|
||||
if vid_dict["youtube_id"] in file_str:
|
||||
old_file = file_str
|
||||
old_path = os.path.join(cache_dir, "download", old_file)
|
||||
old_path = os.path.join(cache_dir, "download", media_file)
|
||||
new_path = os.path.join(videos, vid_dict["media_url"])
|
||||
# move media file and fix permission
|
||||
shutil.move(old_path, new_path, copy_function=shutil.copyfile)
|
||||
|
|
|
@ -127,6 +127,12 @@ class IndexPaginate:
|
|||
|
||||
def validate_data(self):
|
||||
"""add pit and size to data"""
|
||||
if not self.data:
|
||||
self.data = {}
|
||||
|
||||
if "query" not in self.data.keys():
|
||||
self.data.update({"query": {"match_all": {}}})
|
||||
|
||||
if "sort" not in self.data.keys():
|
||||
self.data.update({"sort": [{"_doc": {"order": "desc"}}]})
|
||||
|
||||
|
|
|
@ -380,6 +380,9 @@
|
|||
},
|
||||
"auto_start": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"message": {
|
||||
"type": "text"
|
||||
}
|
||||
},
|
||||
"expected_set": {
|
||||
|
|
|
@ -14,7 +14,6 @@ from home.src.download.yt_dlp_base import YtWrap
|
|||
from home.src.es.connect import ElasticWrap, IndexPaginate
|
||||
from home.src.index.generic import YouTubeItem
|
||||
from home.src.index.playlist import YoutubePlaylist
|
||||
from home.src.ta.helper import clean_string
|
||||
|
||||
|
||||
class YoutubeChannel(YouTubeItem):
|
||||
|
@ -177,12 +176,10 @@ class YoutubeChannel(YouTubeItem):
|
|||
|
||||
def get_folder_path(self):
|
||||
"""get folder where media files get stored"""
|
||||
channel_name = self.json_data["channel_name"]
|
||||
folder_name = clean_string(channel_name)
|
||||
if len(folder_name) <= 3:
|
||||
# fall back to channel id
|
||||
folder_name = self.json_data["channel_id"]
|
||||
folder_path = os.path.join(self.app_conf["videos"], folder_name)
|
||||
folder_path = os.path.join(
|
||||
self.app_conf["videos"],
|
||||
self.json_data["channel_id"],
|
||||
)
|
||||
return folder_path
|
||||
|
||||
def delete_es_videos(self):
|
||||
|
|
|
@ -120,7 +120,9 @@ class Comments:
|
|||
"comment_timestamp": comment["timestamp"],
|
||||
"comment_time_text": time_text,
|
||||
"comment_likecount": comment["like_count"],
|
||||
"comment_is_favorited": comment["is_favorited"],
|
||||
"comment_is_favorited": comment.get(
|
||||
"is_favorited"
|
||||
), # temporary fix for yt-dlp upstream issue 7389
|
||||
"comment_author": comment["author"],
|
||||
"comment_author_id": comment["author_id"],
|
||||
"comment_author_thumbnail": comment["author_thumbnail"],
|
||||
|
|
|
@ -1,198 +1,85 @@
|
|||
"""
|
||||
Functionality:
|
||||
- reindexing old documents
|
||||
- syncing updated values between indexes
|
||||
- scan the filesystem to delete or index
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
|
||||
from home.src.download.queue import PendingList
|
||||
from home.src.es.connect import ElasticWrap
|
||||
from home.src.es.connect import ElasticWrap, IndexPaginate
|
||||
from home.src.index.comments import CommentList
|
||||
from home.src.index.video import index_new_video
|
||||
from home.src.index.video import YoutubeVideo, index_new_video
|
||||
from home.src.ta.config import AppConfig
|
||||
from home.src.ta.helper import clean_string, ignore_filelist
|
||||
from PIL import ImageFile
|
||||
|
||||
ImageFile.LOAD_TRUNCATED_IMAGES = True
|
||||
from home.src.ta.helper import ignore_filelist
|
||||
|
||||
|
||||
class ScannerBase:
|
||||
"""scan the filesystem base class"""
|
||||
class Scanner:
|
||||
"""scan index and filesystem"""
|
||||
|
||||
CONFIG = AppConfig().config
|
||||
VIDEOS = CONFIG["application"]["videos"]
|
||||
|
||||
def __init__(self):
|
||||
self.to_index = False
|
||||
self.to_delete = False
|
||||
self.mismatch = False
|
||||
self.to_rename = False
|
||||
|
||||
def scan(self):
|
||||
"""entry point, scan and compare"""
|
||||
all_downloaded = self._get_all_downloaded()
|
||||
all_indexed = self._get_all_indexed()
|
||||
self.list_comarison(all_downloaded, all_indexed)
|
||||
|
||||
def _get_all_downloaded(self):
|
||||
"""get a list of all video files downloaded"""
|
||||
channels = os.listdir(self.VIDEOS)
|
||||
all_channels = ignore_filelist(channels)
|
||||
all_channels.sort()
|
||||
all_downloaded = []
|
||||
for channel_name in all_channels:
|
||||
channel_path = os.path.join(self.VIDEOS, channel_name)
|
||||
channel_files = os.listdir(channel_path)
|
||||
channel_files_clean = ignore_filelist(channel_files)
|
||||
all_videos = [i for i in channel_files_clean if i.endswith(".mp4")]
|
||||
for video in all_videos:
|
||||
youtube_id = video[9:20]
|
||||
all_downloaded.append((channel_name, video, youtube_id))
|
||||
|
||||
return all_downloaded
|
||||
|
||||
@staticmethod
|
||||
def _get_all_indexed():
|
||||
"""get a list of all indexed videos"""
|
||||
index_handler = PendingList()
|
||||
index_handler.get_download()
|
||||
index_handler.get_indexed()
|
||||
|
||||
all_indexed = []
|
||||
for video in index_handler.all_videos:
|
||||
youtube_id = video["youtube_id"]
|
||||
media_url = video["media_url"]
|
||||
published = video["published"]
|
||||
title = video["title"]
|
||||
all_indexed.append((youtube_id, media_url, published, title))
|
||||
return all_indexed
|
||||
|
||||
def list_comarison(self, all_downloaded, all_indexed):
|
||||
"""compare the lists to figure out what to do"""
|
||||
self._find_unindexed(all_downloaded, all_indexed)
|
||||
self._find_missing(all_downloaded, all_indexed)
|
||||
self._find_bad_media_url(all_downloaded, all_indexed)
|
||||
|
||||
def _find_unindexed(self, all_downloaded, all_indexed):
|
||||
"""find video files without a matching document indexed"""
|
||||
all_indexed_ids = [i[0] for i in all_indexed]
|
||||
self.to_index = []
|
||||
for downloaded in all_downloaded:
|
||||
if downloaded[2] not in all_indexed_ids:
|
||||
self.to_index.append(downloaded)
|
||||
|
||||
def _find_missing(self, all_downloaded, all_indexed):
|
||||
"""find indexed videos without matching media file"""
|
||||
all_downloaded_ids = [i[2] for i in all_downloaded]
|
||||
self.to_delete = []
|
||||
for video in all_indexed:
|
||||
youtube_id = video[0]
|
||||
if youtube_id not in all_downloaded_ids:
|
||||
self.to_delete.append(video)
|
||||
|
||||
def _find_bad_media_url(self, all_downloaded, all_indexed):
|
||||
"""rename media files not matching the indexed title"""
|
||||
self.mismatch = []
|
||||
self.to_rename = []
|
||||
|
||||
for downloaded in all_downloaded:
|
||||
channel, filename, downloaded_id = downloaded
|
||||
# find in indexed
|
||||
for indexed in all_indexed:
|
||||
indexed_id, media_url, published, title = indexed
|
||||
if indexed_id == downloaded_id:
|
||||
# found it
|
||||
pub = published.replace("-", "")
|
||||
expected = f"{pub}_{indexed_id}_{clean_string(title)}.mp4"
|
||||
new_url = os.path.join(channel, expected)
|
||||
if expected != filename:
|
||||
# file to rename
|
||||
self.to_rename.append((channel, filename, expected))
|
||||
if media_url != new_url:
|
||||
# media_url to update in es
|
||||
self.mismatch.append((indexed_id, new_url))
|
||||
|
||||
break
|
||||
|
||||
|
||||
class Filesystem(ScannerBase):
|
||||
"""handle scanning and fixing from filesystem"""
|
||||
VIDEOS = AppConfig().config["application"]["videos"]
|
||||
|
||||
def __init__(self, task=False):
|
||||
super().__init__()
|
||||
self.task = task
|
||||
self.to_delete = False
|
||||
self.to_index = False
|
||||
|
||||
def process(self):
|
||||
"""entry point"""
|
||||
def scan(self):
|
||||
"""scan the filesystem"""
|
||||
downloaded = self._get_downloaded()
|
||||
indexed = self._get_indexed()
|
||||
self.to_index = downloaded - indexed
|
||||
self.to_delete = indexed - downloaded
|
||||
|
||||
def _get_downloaded(self):
|
||||
"""get downloaded ids"""
|
||||
if self.task:
|
||||
self.task.send_progress(["Scanning your archive and index."])
|
||||
self.scan()
|
||||
self.rename_files()
|
||||
self.send_mismatch_bulk()
|
||||
self.delete_from_index()
|
||||
self.add_missing()
|
||||
self.task.send_progress(["Scan your filesystem for videos."])
|
||||
|
||||
def rename_files(self):
|
||||
"""rename media files as identified by find_bad_media_url"""
|
||||
if not self.to_rename:
|
||||
return
|
||||
downloaded = set()
|
||||
channels = ignore_filelist(os.listdir(self.VIDEOS))
|
||||
for channel in channels:
|
||||
folder = os.path.join(self.VIDEOS, channel)
|
||||
files = ignore_filelist(os.listdir(folder))
|
||||
downloaded.update({i.split(".")[0] for i in files})
|
||||
|
||||
total = len(self.to_rename)
|
||||
return downloaded
|
||||
|
||||
def _get_indexed(self):
|
||||
"""get all indexed ids"""
|
||||
if self.task:
|
||||
self.task.send_progress([f"Rename {total} media files."])
|
||||
for bad_filename in self.to_rename:
|
||||
channel, filename, expected_filename = bad_filename
|
||||
print(f"renaming [{filename}] to [{expected_filename}]")
|
||||
old_path = os.path.join(self.VIDEOS, channel, filename)
|
||||
new_path = os.path.join(self.VIDEOS, channel, expected_filename)
|
||||
os.rename(old_path, new_path)
|
||||
self.task.send_progress(["Get all videos indexed."])
|
||||
|
||||
def send_mismatch_bulk(self):
|
||||
"""build bulk update"""
|
||||
if not self.mismatch:
|
||||
return
|
||||
data = {"query": {"match_all": {}}, "_source": ["youtube_id"]}
|
||||
response = IndexPaginate("ta_video", data).get_results()
|
||||
return {i["youtube_id"] for i in response}
|
||||
|
||||
total = len(self.mismatch)
|
||||
if self.task:
|
||||
self.task.send_progress([f"Fix media urls for {total} files"])
|
||||
bulk_list = []
|
||||
for video_mismatch in self.mismatch:
|
||||
youtube_id, media_url = video_mismatch
|
||||
print(f"{youtube_id}: fixing media url {media_url}")
|
||||
action = {"update": {"_id": youtube_id, "_index": "ta_video"}}
|
||||
source = {"doc": {"media_url": media_url}}
|
||||
bulk_list.append(json.dumps(action))
|
||||
bulk_list.append(json.dumps(source))
|
||||
# add last newline
|
||||
bulk_list.append("\n")
|
||||
data = "\n".join(bulk_list)
|
||||
_, _ = ElasticWrap("_bulk").post(data=data, ndjson=True)
|
||||
def apply(self):
|
||||
"""apply all changes"""
|
||||
self.delete()
|
||||
self.index()
|
||||
self.url_fix()
|
||||
|
||||
def delete_from_index(self):
|
||||
"""find indexed but deleted mediafile"""
|
||||
def delete(self):
|
||||
"""delete videos from index"""
|
||||
if not self.to_delete:
|
||||
print("nothing to delete")
|
||||
return
|
||||
|
||||
total = len(self.to_delete)
|
||||
if self.task:
|
||||
self.task.send_progress([f"Clean up {total} items from index."])
|
||||
for indexed in self.to_delete:
|
||||
youtube_id = indexed[0]
|
||||
print(f"deleting {youtube_id} from index")
|
||||
path = f"ta_video/_doc/{youtube_id}"
|
||||
_, _ = ElasticWrap(path).delete()
|
||||
self.task.send_progress(
|
||||
[f"Remove {len(self.to_delete)} videos from index."]
|
||||
)
|
||||
|
||||
def add_missing(self):
|
||||
"""add missing videos to index"""
|
||||
video_ids = [i[2] for i in self.to_index]
|
||||
if not video_ids:
|
||||
for youtube_id in self.to_delete:
|
||||
YoutubeVideo(youtube_id).delete_media_file()
|
||||
|
||||
def index(self):
|
||||
"""index new"""
|
||||
if not self.to_index:
|
||||
print("nothing to index")
|
||||
return
|
||||
|
||||
total = len(video_ids)
|
||||
for idx, youtube_id in enumerate(video_ids):
|
||||
total = len(self.to_index)
|
||||
for idx, youtube_id in enumerate(self.to_index):
|
||||
if self.task:
|
||||
self.task.send_progress(
|
||||
message_lines=[
|
||||
|
@ -202,4 +89,36 @@ class Filesystem(ScannerBase):
|
|||
)
|
||||
index_new_video(youtube_id)
|
||||
|
||||
CommentList(video_ids, task=self.task).index()
|
||||
CommentList(self.to_index, task=self.task).index()
|
||||
|
||||
def url_fix(self):
|
||||
"""
|
||||
update path v0.3.6 to v0.3.7
|
||||
fix url not matching channel-videoid pattern
|
||||
"""
|
||||
bool_must = (
|
||||
"doc['media_url'].value == "
|
||||
+ "(doc['channel.channel_id'].value + '/' + "
|
||||
+ "doc['youtube_id'].value) + '.mp4'"
|
||||
)
|
||||
to_update = (
|
||||
"ctx._source['media_url'] = "
|
||||
+ "ctx._source.channel['channel_id'] + '/' + "
|
||||
+ "ctx._source['youtube_id'] + '.mp4'"
|
||||
)
|
||||
data = {
|
||||
"query": {
|
||||
"bool": {
|
||||
"must_not": [{"script": {"script": {"source": bool_must}}}]
|
||||
}
|
||||
},
|
||||
"script": {"source": to_update},
|
||||
}
|
||||
response, _ = ElasticWrap("ta_video/_update_by_query").post(data=data)
|
||||
updated = response.get("updates")
|
||||
if updated:
|
||||
print(f"updated {updated} bad media_url")
|
||||
if self.task:
|
||||
self.task.send_progress(
|
||||
[f"Updated {updated} wrong media urls."]
|
||||
)
|
||||
|
|
|
@ -6,7 +6,6 @@ functionality:
|
|||
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
from datetime import datetime
|
||||
from time import sleep
|
||||
|
||||
|
@ -14,7 +13,6 @@ from home.src.download.queue import PendingList
|
|||
from home.src.download.subscriptions import ChannelSubscription
|
||||
from home.src.download.thumbnails import ThumbManager
|
||||
from home.src.download.yt_dlp_base import CookieHandler
|
||||
from home.src.download.yt_dlp_handler import VideoDownloader
|
||||
from home.src.es.connect import ElasticWrap, IndexPaginate
|
||||
from home.src.index.channel import YoutubeChannel
|
||||
from home.src.index.comments import Comments
|
||||
|
@ -54,6 +52,7 @@ class ReindexBase:
|
|||
def __init__(self):
|
||||
self.config = AppConfig().config
|
||||
self.now = int(datetime.now().timestamp())
|
||||
self.total = None
|
||||
|
||||
def populate(self, all_ids, reindex_config):
|
||||
"""add all to reindex ids to redis queue"""
|
||||
|
@ -61,6 +60,7 @@ class ReindexBase:
|
|||
return
|
||||
|
||||
RedisQueue(queue_name=reindex_config["queue_name"]).add_list(all_ids)
|
||||
self.total = None
|
||||
|
||||
|
||||
class ReindexPopulate(ReindexBase):
|
||||
|
@ -238,18 +238,19 @@ class Reindex(ReindexBase):
|
|||
if not RedisQueue(index_config["queue_name"]).has_item():
|
||||
continue
|
||||
|
||||
total = RedisQueue(index_config["queue_name"]).length()
|
||||
self.total = RedisQueue(index_config["queue_name"]).length()
|
||||
while True:
|
||||
has_next = self.reindex_index(name, index_config, total)
|
||||
has_next = self.reindex_index(name, index_config)
|
||||
if not has_next:
|
||||
break
|
||||
|
||||
def reindex_index(self, name, index_config, total):
|
||||
def reindex_index(self, name, index_config):
|
||||
"""reindex all of a single index"""
|
||||
reindex = self.get_reindex_map(index_config["index_name"])
|
||||
youtube_id = RedisQueue(index_config["queue_name"]).get_next()
|
||||
if youtube_id:
|
||||
self._notify(name, index_config, total)
|
||||
if self.task:
|
||||
self._notify(name, index_config)
|
||||
reindex(youtube_id)
|
||||
sleep_interval = self.config["downloads"].get("sleep_interval", 0)
|
||||
sleep(sleep_interval)
|
||||
|
@ -266,23 +267,18 @@ class Reindex(ReindexBase):
|
|||
|
||||
return def_map.get(index_name)
|
||||
|
||||
def _notify(self, name, index_config, total):
|
||||
def _notify(self, name, index_config):
|
||||
"""send notification back to task"""
|
||||
if self.total is None:
|
||||
self.total = RedisQueue(index_config["queue_name"]).length()
|
||||
|
||||
remaining = RedisQueue(index_config["queue_name"]).length()
|
||||
idx = total - remaining
|
||||
message = [f"Reindexing {name.title()}s {idx}/{total}"]
|
||||
progress = idx / total
|
||||
idx = self.total - remaining
|
||||
message = [f"Reindexing {name.title()}s {idx}/{self.total}"]
|
||||
progress = idx / self.total
|
||||
self.task.send_progress(message, progress=progress)
|
||||
|
||||
def _reindex_single_video(self, youtube_id):
|
||||
"""wrapper to handle channel name changes"""
|
||||
try:
|
||||
self._reindex_single_video_call(youtube_id)
|
||||
except FileNotFoundError:
|
||||
ChannelUrlFixer(youtube_id, self.config).run()
|
||||
self._reindex_single_video_call(youtube_id)
|
||||
|
||||
def _reindex_single_video_call(self, youtube_id):
|
||||
"""refresh data for single video"""
|
||||
video = YoutubeVideo(youtube_id)
|
||||
|
||||
|
@ -291,7 +287,10 @@ class Reindex(ReindexBase):
|
|||
es_meta = video.json_data.copy()
|
||||
|
||||
# get new
|
||||
video.build_json()
|
||||
media_url = os.path.join(
|
||||
self.config["application"]["videos"], es_meta["media_url"]
|
||||
)
|
||||
video.build_json(media_path=media_url)
|
||||
if not video.youtube_meta:
|
||||
video.deactivate()
|
||||
return
|
||||
|
@ -466,65 +465,6 @@ class ReindexProgress(ReindexBase):
|
|||
return state_dict
|
||||
|
||||
|
||||
class ChannelUrlFixer:
|
||||
"""fix not matching channel names in reindex"""
|
||||
|
||||
def __init__(self, youtube_id, config):
|
||||
self.youtube_id = youtube_id
|
||||
self.config = config
|
||||
self.video = False
|
||||
|
||||
def run(self):
|
||||
"""check and run if needed"""
|
||||
print(f"{self.youtube_id}: failed to build channel path, try to fix.")
|
||||
video_path_is, video_folder_is = self.get_as_is()
|
||||
if not os.path.exists(video_path_is):
|
||||
print(f"giving up reindex, video in video: {self.video.json_data}")
|
||||
raise ValueError
|
||||
|
||||
_, video_folder_should = self.get_as_should()
|
||||
|
||||
if video_folder_is != video_folder_should:
|
||||
self.process(video_path_is)
|
||||
else:
|
||||
print(f"{self.youtube_id}: skip channel url fixer")
|
||||
|
||||
def get_as_is(self):
|
||||
"""get video object as is"""
|
||||
self.video = YoutubeVideo(self.youtube_id)
|
||||
self.video.get_from_es()
|
||||
video_path_is = os.path.join(
|
||||
self.config["application"]["videos"],
|
||||
self.video.json_data["media_url"],
|
||||
)
|
||||
video_folder_is = os.path.split(video_path_is)[0]
|
||||
|
||||
return video_path_is, video_folder_is
|
||||
|
||||
def get_as_should(self):
|
||||
"""add fresh metadata from remote"""
|
||||
self.video.get_from_youtube()
|
||||
self.video.add_file_path()
|
||||
|
||||
video_path_should = os.path.join(
|
||||
self.config["application"]["videos"],
|
||||
self.video.json_data["media_url"],
|
||||
)
|
||||
video_folder_should = os.path.split(video_path_should)[0]
|
||||
return video_path_should, video_folder_should
|
||||
|
||||
def process(self, video_path_is):
|
||||
"""fix filepath"""
|
||||
print(f"{self.youtube_id}: fixing channel rename.")
|
||||
cache_dir = self.config["application"]["cache_dir"]
|
||||
new_path = os.path.join(
|
||||
cache_dir, "download", self.youtube_id + ".mp4"
|
||||
)
|
||||
shutil.move(video_path_is, new_path, copy_function=shutil.copyfile)
|
||||
VideoDownloader().move_to_archive(self.video.json_data)
|
||||
self.video.update_media_url()
|
||||
|
||||
|
||||
class ChannelFullScan:
|
||||
"""
|
||||
update from v0.3.0 to v0.3.1
|
||||
|
|
|
@ -62,7 +62,12 @@ class YoutubeSubtitle:
|
|||
if not all_formats:
|
||||
return False
|
||||
|
||||
subtitle = [i for i in all_formats if i["ext"] == "json3"][0]
|
||||
subtitle_json3 = [i for i in all_formats if i["ext"] == "json3"]
|
||||
if not subtitle_json3:
|
||||
print(f"{self.video.youtube_id}-{lang}: json3 not processed")
|
||||
return False
|
||||
|
||||
subtitle = subtitle_json3[0]
|
||||
subtitle.update(
|
||||
{"lang": lang, "source": "auto", "media_url": media_url}
|
||||
)
|
||||
|
|
|
@ -20,7 +20,7 @@ from home.src.index.video_streams import (
|
|||
DurationConverter,
|
||||
MediaStreamExtractor,
|
||||
)
|
||||
from home.src.ta.helper import clean_string, randomizor
|
||||
from home.src.ta.helper import randomizor
|
||||
from home.src.ta.ta_redis import RedisArchivist
|
||||
from ryd_client import ryd_client
|
||||
|
||||
|
@ -231,18 +231,24 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle):
|
|||
def build_dl_cache_path(self):
|
||||
"""find video path in dl cache"""
|
||||
cache_dir = self.app_conf["cache_dir"]
|
||||
cache_path = f"{cache_dir}/download/"
|
||||
all_cached = os.listdir(cache_path)
|
||||
for file_cached in all_cached:
|
||||
if self.youtube_id in file_cached:
|
||||
vid_path = os.path.join(cache_path, file_cached)
|
||||
return vid_path
|
||||
video_id = self.json_data["youtube_id"]
|
||||
cache_path = f"{cache_dir}/download/{video_id}.mp4"
|
||||
if os.path.exists(cache_path):
|
||||
return cache_path
|
||||
|
||||
channel_path = os.path.join(
|
||||
self.app_conf["videos"],
|
||||
self.json_data["channel"]["channel_id"],
|
||||
f"{video_id}.mp4",
|
||||
)
|
||||
if os.path.exists(channel_path):
|
||||
return channel_path
|
||||
|
||||
raise FileNotFoundError
|
||||
|
||||
def add_player(self, media_path=False):
|
||||
"""add player information for new videos"""
|
||||
vid_path = self._get_vid_path(media_path)
|
||||
vid_path = media_path or self.build_dl_cache_path()
|
||||
|
||||
duration_handler = DurationConverter()
|
||||
duration = duration_handler.get_sec(vid_path)
|
||||
|
@ -259,7 +265,7 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle):
|
|||
|
||||
def add_streams(self, media_path=False):
|
||||
"""add stream metadata"""
|
||||
vid_path = self._get_vid_path(media_path)
|
||||
vid_path = media_path or self.build_dl_cache_path()
|
||||
media = MediaStreamExtractor(vid_path)
|
||||
self.json_data.update(
|
||||
{
|
||||
|
@ -268,43 +274,12 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle):
|
|||
}
|
||||
)
|
||||
|
||||
def _get_vid_path(self, media_path=False):
|
||||
"""get path of media file"""
|
||||
if media_path:
|
||||
return media_path
|
||||
|
||||
try:
|
||||
# when indexing from download task
|
||||
vid_path = self.build_dl_cache_path()
|
||||
except FileNotFoundError as err:
|
||||
# when reindexing needs to handle title rename
|
||||
channel = os.path.split(self.json_data["media_url"])[0]
|
||||
channel_dir = os.path.join(self.app_conf["videos"], channel)
|
||||
all_files = os.listdir(channel_dir)
|
||||
for file in all_files:
|
||||
if self.youtube_id in file and file.endswith(".mp4"):
|
||||
vid_path = os.path.join(channel_dir, file)
|
||||
break
|
||||
else:
|
||||
raise FileNotFoundError("could not find video file") from err
|
||||
|
||||
return vid_path
|
||||
|
||||
def add_file_path(self):
|
||||
"""build media_url for where file will be located"""
|
||||
channel_name = self.json_data["channel"]["channel_name"]
|
||||
clean_channel_name = clean_string(channel_name)
|
||||
if len(clean_channel_name) <= 3:
|
||||
# fall back to channel id
|
||||
clean_channel_name = self.json_data["channel"]["channel_id"]
|
||||
|
||||
timestamp = self.json_data["published"].replace("-", "")
|
||||
youtube_id = self.json_data["youtube_id"]
|
||||
title = self.json_data["title"]
|
||||
clean_title = clean_string(title)
|
||||
filename = f"{timestamp}_{youtube_id}_{clean_title}.mp4"
|
||||
media_url = os.path.join(clean_channel_name, filename)
|
||||
self.json_data["media_url"] = media_url
|
||||
self.json_data["media_url"] = os.path.join(
|
||||
self.json_data["channel"]["channel_id"],
|
||||
self.json_data["youtube_id"] + ".mp4",
|
||||
)
|
||||
|
||||
def delete_media_file(self):
|
||||
"""delete video file, meta data"""
|
||||
|
|
|
@ -35,24 +35,27 @@ class DurationConverter:
|
|||
return duration_sec
|
||||
|
||||
@staticmethod
|
||||
def get_str(duration_sec):
|
||||
def get_str(seconds):
|
||||
"""takes duration in sec and returns clean string"""
|
||||
if not duration_sec:
|
||||
if not seconds:
|
||||
# failed to extract
|
||||
return "NA"
|
||||
|
||||
hours = int(duration_sec // 3600)
|
||||
minutes = int((duration_sec - (hours * 3600)) // 60)
|
||||
secs = int(duration_sec - (hours * 3600) - (minutes * 60))
|
||||
days = int(seconds // (24 * 3600))
|
||||
hours = int((seconds % (24 * 3600)) // 3600)
|
||||
minutes = int((seconds % 3600) // 60)
|
||||
seconds = int(seconds % 60)
|
||||
|
||||
duration_str = str()
|
||||
if days:
|
||||
duration_str = f"{days}d "
|
||||
if hours:
|
||||
duration_str = str(hours).zfill(2) + ":"
|
||||
duration_str = duration_str + str(hours).zfill(2) + ":"
|
||||
if minutes:
|
||||
duration_str = duration_str + str(minutes).zfill(2) + ":"
|
||||
else:
|
||||
duration_str = duration_str + "00:"
|
||||
duration_str = duration_str + str(secs).zfill(2)
|
||||
duration_str = duration_str + str(seconds).zfill(2)
|
||||
return duration_str
|
||||
|
||||
|
||||
|
|
|
@ -8,6 +8,7 @@ import json
|
|||
import os
|
||||
import re
|
||||
from random import randint
|
||||
from time import sleep
|
||||
|
||||
import requests
|
||||
from celery.schedules import crontab
|
||||
|
@ -67,11 +68,19 @@ class AppConfig:
|
|||
@staticmethod
|
||||
def get_config_redis():
|
||||
"""read config json set from redis to overwrite defaults"""
|
||||
config = RedisArchivist().get_message("config")
|
||||
if not list(config.values())[0]:
|
||||
return False
|
||||
for i in range(10):
|
||||
try:
|
||||
config = RedisArchivist().get_message("config")
|
||||
if not list(config.values())[0]:
|
||||
return False
|
||||
|
||||
return config
|
||||
return config
|
||||
|
||||
except Exception: # pylint: disable=broad-except
|
||||
print(f"... Redis connection failed, retry [{i}/10]")
|
||||
sleep(3)
|
||||
|
||||
raise ConnectionError("failed to connect to redis")
|
||||
|
||||
def update_config(self, form_post):
|
||||
"""update config values from settings form"""
|
||||
|
@ -317,6 +326,10 @@ class ReleaseVersion:
|
|||
RedisArchivist().set_message(self.NEW_KEY, message)
|
||||
print(f"[{self.local_version}]: found new version {new_version}")
|
||||
|
||||
def get_local_version(self):
|
||||
"""read version from local"""
|
||||
return self.local_version
|
||||
|
||||
def get_remote_version(self):
|
||||
"""read version from remote"""
|
||||
self.response = requests.get(self.REMOTE_URL, timeout=20).json()
|
||||
|
|
|
@ -6,25 +6,13 @@ Loose collection of helper functions
|
|||
import json
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
import string
|
||||
import unicodedata
|
||||
from datetime import datetime
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
def clean_string(file_name: str) -> str:
|
||||
"""clean string to only asci characters"""
|
||||
whitelist = "-_.() " + string.ascii_letters + string.digits
|
||||
normalized = unicodedata.normalize("NFKD", file_name)
|
||||
ascii_only = normalized.encode("ASCII", "ignore").decode().strip()
|
||||
white_listed: str = "".join(c for c in ascii_only if c in whitelist)
|
||||
cleaned: str = re.sub(r"[ ]{2,}", " ", white_listed)
|
||||
return cleaned
|
||||
|
||||
|
||||
def ignore_filelist(filelist: list[str]) -> list[str]:
|
||||
"""ignore temp files for os.listdir sanitizer"""
|
||||
to_ignore = ["Icon\r\r", "Temporary Items", "Network Trash Folder"]
|
||||
|
|
|
@ -19,7 +19,7 @@ from home.src.download.yt_dlp_handler import VideoDownloader
|
|||
from home.src.es.backup import ElasticBackup
|
||||
from home.src.es.index_setup import ElasitIndexWrap
|
||||
from home.src.index.channel import YoutubeChannel
|
||||
from home.src.index.filesystem import Filesystem
|
||||
from home.src.index.filesystem import Scanner
|
||||
from home.src.index.manual import ImportFolderScanner
|
||||
from home.src.index.reindex import Reindex, ReindexManual, ReindexPopulate
|
||||
from home.src.ta.config import AppConfig, ReleaseVersion, ScheduleBuilder
|
||||
|
@ -113,7 +113,7 @@ class BaseTask(Task):
|
|||
"""callback for task failure"""
|
||||
print(f"{task_id} Failed callback")
|
||||
message, key = self._build_message(level="error")
|
||||
message.update({"messages": ["Task failed"]})
|
||||
message.update({"messages": [f"Task failed: {exc}"]})
|
||||
RedisArchivist().set_message(key, message, expire=20)
|
||||
|
||||
def on_success(self, retval, task_id, args, kwargs):
|
||||
|
@ -290,7 +290,9 @@ def rescan_filesystem(self):
|
|||
return
|
||||
|
||||
manager.init(self)
|
||||
Filesystem(task=self).process()
|
||||
handler = Scanner(task=self)
|
||||
handler.scan()
|
||||
handler.apply()
|
||||
ThumbValidator(task=self).validate()
|
||||
|
||||
|
||||
|
|
|
@ -97,6 +97,9 @@
|
|||
<a href="https://www.youtube.com/watch?v={{ video.source.youtube_id }}" target="_blank"><h3>{{ video.source.title }}</h3></a>
|
||||
</div>
|
||||
<p>Published: {{ video.source.published }} | Duration: {{ video.source.duration }} | {{ video.source.youtube_id }}</p>
|
||||
{% if video.source.message %}
|
||||
<p class="danger-zone">{{ video.source.message }}</p>
|
||||
{% endif %}
|
||||
<div>
|
||||
{% if show_ignored_only %}
|
||||
<button data-id="{{ video.source.youtube_id }}" onclick="forgetIgnore(this)">Forget</button>
|
||||
|
@ -105,6 +108,9 @@
|
|||
<button data-id="{{ video.source.youtube_id }}" onclick="toIgnore(this)">Ignore</button>
|
||||
<button id="{{ video.source.youtube_id }}" data-id="{{ video.source.youtube_id }}" onclick="downloadNow(this)">Download now</button>
|
||||
{% endif %}
|
||||
{% if video.source.message %}
|
||||
<button class="danger-button" data-id="{{ video.source.youtube_id }}" onclick="forgetIgnore(this)">Delete</button>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
celery==5.2.7
|
||||
Django==4.2.1
|
||||
django-auth-ldap==4.3.0
|
||||
django-cors-headers==3.14.0
|
||||
celery==5.3.1
|
||||
Django==4.2.3
|
||||
django-auth-ldap==4.4.0
|
||||
django-cors-headers==4.2.0
|
||||
djangorestframework==3.14.0
|
||||
Pillow==9.5.0
|
||||
redis==4.5.4
|
||||
requests==2.30.0
|
||||
Pillow==10.0.0
|
||||
redis==4.6.0
|
||||
requests==2.31.0
|
||||
ryd-client==0.0.6
|
||||
uWSGI==2.0.21
|
||||
whitenoise==6.4.0
|
||||
yt_dlp==2023.3.4
|
||||
whitenoise==6.5.0
|
||||
yt_dlp==2023.7.6
|
||||
|
|
|
@ -160,12 +160,12 @@ function dlPending() {
|
|||
}, 500);
|
||||
}
|
||||
|
||||
function addToQueue(autostart=false) {
|
||||
function addToQueue(autostart = false) {
|
||||
let textArea = document.getElementById('id_vid_url');
|
||||
if (textArea.value === '') {
|
||||
return
|
||||
return;
|
||||
}
|
||||
let toPost = {data: [{youtube_id: textArea.value, status: 'pending'}]};
|
||||
let toPost = { data: [{ youtube_id: textArea.value, status: 'pending' }] };
|
||||
let apiEndpoint = '/api/download/';
|
||||
if (autostart) {
|
||||
apiEndpoint = `${apiEndpoint}?autostart=true`;
|
||||
|
|
Loading…
Reference in New Issue