WIP: rewrite ManualImport, identify and archive

This commit is contained in:
simon 2022-08-08 19:18:27 +07:00
parent 8f711d359b
commit b76fa69396
No known key found for this signature in database
GPG Key ID: 2C15AA5E89985DD4
2 changed files with 51 additions and 120 deletions

View File

@ -12,10 +12,9 @@ import shutil
import subprocess
from home.src.download.queue import PendingList
from home.src.download.yt_dlp_handler import VideoDownloader
from home.src.es.connect import ElasticWrap
from home.src.index.reindex import Reindex
from home.src.index.video import index_new_video
from home.src.index.video import YoutubeVideo, index_new_video
from home.src.ta.config import AppConfig
from home.src.ta.helper import clean_string, ignore_filelist
from home.src.ta.ta_redis import RedisArchivist
@ -255,6 +254,8 @@ class ImportFolderScanner:
self._convert_thumb(current_video)
self._convert_video(current_video)
ManualImport(current_video, self.CONFIG).run()
def _detect_youtube_id(self, current_video):
"""find video id from filename or json"""
print(current_video)
@ -450,132 +451,65 @@ class ImportFolderScanner:
os.remove(current_path)
class ManualImportOld:
"""import and indexing existing video files"""
class ManualImport:
"""import single identified video"""
CONFIG = AppConfig().config
CACHE_DIR = CONFIG["application"]["cache_dir"]
IMPORT_DIR = os.path.join(CACHE_DIR, "import")
def __init__(self, current_video, config):
self.current_video = current_video
self.config = config
def __init__(self):
self.identified = self.import_folder_parser()
def run(self):
"""run all"""
json_data = self.index_metadata()
self._move_to_archive(json_data)
self._cleanup()
def import_folder_parser(self):
"""detect files in import folder"""
import_files = os.listdir(self.IMPORT_DIR)
to_import = ignore_filelist(import_files)
to_import.sort()
video_files = [i for i in to_import if not i.endswith(".json")]
def index_metadata(self):
"""get metadata from yt or json"""
video = YoutubeVideo(self.current_video["video_id"])
video.build_json(
youtube_meta_overwrite=self._get_info_json(),
media_path=self.current_video["media"],
)
video.check_subtitles()
video.upload_to_es()
identified = []
return video.json_data
for file_path in video_files:
def _get_info_json(self):
"""read info_json from file"""
if not self.current_video["metadata"]:
return False
file_dict = {"video_file": file_path}
file_name, _ = os.path.splitext(file_path)
with open(self.current_video["metadata"], "r", encoding="utf-8") as f:
info_json = json.loads(f.read())
matching_json = [
i
for i in to_import
if i.startswith(file_name) and i.endswith(".json")
]
if matching_json:
json_file = matching_json[0]
youtube_id = self.extract_id_from_json(json_file)
file_dict.update({"json_file": json_file})
else:
youtube_id = self.extract_id_from_filename(file_name)
file_dict.update({"json_file": False})
return info_json
file_dict.update({"youtube_id": youtube_id})
identified.append(file_dict)
def _move_to_archive(self, json_data):
"""move identified media file to archive"""
videos = self.config["application"]["videos"]
return identified
channel, file = os.path.split(json_data["media_url"])
channel_folder = os.path.join(videos, channel)
if not os.path.exists(channel_folder):
os.makedirs(channel_folder)
@staticmethod
def extract_id_from_filename(file_name):
"""
look at the file name for the youtube id
expects filename ending in [<youtube_id>].<ext>
"""
id_search = re.search(r"\[([a-zA-Z0-9_-]{11})\]$", file_name)
if id_search:
youtube_id = id_search.group(1)
return youtube_id
old_path = self.current_video["media"]
new_path = os.path.join(channel_folder, file)
shutil.move(old_path, new_path, copy_function=shutil.copyfile)
print("failed to extract youtube id for: " + file_name)
raise Exception
def _cleanup(self):
"""cleanup leftover files"""
if os.path.exists(self.current_video["metadata"]):
os.remove(self.current_video["metadata"])
def extract_id_from_json(self, json_file):
"""open json file and extract id"""
json_path = os.path.join(self.CACHE_DIR, "import", json_file)
with open(json_path, "r", encoding="utf-8") as f:
json_content = f.read()
if os.path.exists(self.current_video["thumb"]):
os.remove(self.current_video["thumb"])
youtube_id = json.loads(json_content)["id"]
return youtube_id
def process_import(self):
"""go through identified media files"""
all_videos_added = []
for media_file in self.identified:
json_file = media_file["json_file"]
video_file = media_file["video_file"]
youtube_id = media_file["youtube_id"]
video_path = os.path.join(self.CACHE_DIR, "import", video_file)
self.move_to_cache(video_path, youtube_id)
# identify and archive
vid_dict = index_new_video(youtube_id)
VideoDownloader([youtube_id]).move_to_archive(vid_dict)
youtube_id = vid_dict["youtube_id"]
thumb_url = vid_dict["vid_thumb_url"]
all_videos_added.append((youtube_id, thumb_url))
# cleanup
if os.path.exists(video_path):
os.remove(video_path)
if json_file:
json_path = os.path.join(self.CACHE_DIR, "import", json_file)
os.remove(json_path)
return all_videos_added
def move_to_cache(self, video_path, youtube_id):
"""move identified video file to cache, convert to mp4"""
file_name = os.path.split(video_path)[-1]
video_file, ext = os.path.splitext(file_name)
# make sure youtube_id is in filename
if youtube_id not in video_file:
video_file = f"{video_file}_{youtube_id}"
# move, convert if needed
if ext == ".mp4":
new_file = video_file + ext
dest_path = os.path.join(self.CACHE_DIR, "download", new_file)
shutil.move(video_path, dest_path, copy_function=shutil.copyfile)
else:
print(f"processing with ffmpeg: {video_file}")
new_file = video_file + ".mp4"
dest_path = os.path.join(self.CACHE_DIR, "download", new_file)
subprocess.run(
[
"ffmpeg",
"-i",
video_path,
dest_path,
"-loglevel",
"warning",
"-stats",
],
check=True,
)
for subtitle_file in self.current_video["subtitle"]:
if os.path.exists(subtitle_file):
os.remove(subtitle_file)
def scan_filesystem():

View File

@ -20,7 +20,7 @@ from home.src.download.yt_dlp_handler import VideoDownloader
from home.src.es.index_setup import backup_all_indexes, restore_from_backup
from home.src.index.channel import YoutubeChannel
from home.src.index.filesystem import (
ManualImport,
ImportFolderScanner,
reindex_old_documents,
scan_filesystem,
)
@ -150,10 +150,7 @@ def run_manual_import():
try:
have_lock = my_lock.acquire(blocking=False)
if have_lock:
import_handler = ManualImport()
if import_handler.identified:
all_videos_added = import_handler.process_import()
ThumbManager().download_vid(all_videos_added)
ImportFolderScanner().scan()
else:
print("Did not acquire lock form import.")