mirror of
https://github.com/tubearchivist/tubearchivist.git
synced 2025-02-12 19:10:14 +00:00
WIP: rewrite ManualImport, identify and archive
This commit is contained in:
parent
8f711d359b
commit
b76fa69396
@ -12,10 +12,9 @@ import shutil
|
||||
import subprocess
|
||||
|
||||
from home.src.download.queue import PendingList
|
||||
from home.src.download.yt_dlp_handler import VideoDownloader
|
||||
from home.src.es.connect import ElasticWrap
|
||||
from home.src.index.reindex import Reindex
|
||||
from home.src.index.video import index_new_video
|
||||
from home.src.index.video import YoutubeVideo, index_new_video
|
||||
from home.src.ta.config import AppConfig
|
||||
from home.src.ta.helper import clean_string, ignore_filelist
|
||||
from home.src.ta.ta_redis import RedisArchivist
|
||||
@ -255,6 +254,8 @@ class ImportFolderScanner:
|
||||
self._convert_thumb(current_video)
|
||||
self._convert_video(current_video)
|
||||
|
||||
ManualImport(current_video, self.CONFIG).run()
|
||||
|
||||
def _detect_youtube_id(self, current_video):
|
||||
"""find video id from filename or json"""
|
||||
print(current_video)
|
||||
@ -450,132 +451,65 @@ class ImportFolderScanner:
|
||||
os.remove(current_path)
|
||||
|
||||
|
||||
class ManualImportOld:
|
||||
"""import and indexing existing video files"""
|
||||
class ManualImport:
|
||||
"""import single identified video"""
|
||||
|
||||
CONFIG = AppConfig().config
|
||||
CACHE_DIR = CONFIG["application"]["cache_dir"]
|
||||
IMPORT_DIR = os.path.join(CACHE_DIR, "import")
|
||||
def __init__(self, current_video, config):
|
||||
self.current_video = current_video
|
||||
self.config = config
|
||||
|
||||
def __init__(self):
|
||||
self.identified = self.import_folder_parser()
|
||||
def run(self):
|
||||
"""run all"""
|
||||
json_data = self.index_metadata()
|
||||
self._move_to_archive(json_data)
|
||||
self._cleanup()
|
||||
|
||||
def import_folder_parser(self):
|
||||
"""detect files in import folder"""
|
||||
import_files = os.listdir(self.IMPORT_DIR)
|
||||
to_import = ignore_filelist(import_files)
|
||||
to_import.sort()
|
||||
video_files = [i for i in to_import if not i.endswith(".json")]
|
||||
def index_metadata(self):
|
||||
"""get metadata from yt or json"""
|
||||
video = YoutubeVideo(self.current_video["video_id"])
|
||||
video.build_json(
|
||||
youtube_meta_overwrite=self._get_info_json(),
|
||||
media_path=self.current_video["media"],
|
||||
)
|
||||
video.check_subtitles()
|
||||
video.upload_to_es()
|
||||
|
||||
identified = []
|
||||
return video.json_data
|
||||
|
||||
for file_path in video_files:
|
||||
def _get_info_json(self):
|
||||
"""read info_json from file"""
|
||||
if not self.current_video["metadata"]:
|
||||
return False
|
||||
|
||||
file_dict = {"video_file": file_path}
|
||||
file_name, _ = os.path.splitext(file_path)
|
||||
with open(self.current_video["metadata"], "r", encoding="utf-8") as f:
|
||||
info_json = json.loads(f.read())
|
||||
|
||||
matching_json = [
|
||||
i
|
||||
for i in to_import
|
||||
if i.startswith(file_name) and i.endswith(".json")
|
||||
]
|
||||
if matching_json:
|
||||
json_file = matching_json[0]
|
||||
youtube_id = self.extract_id_from_json(json_file)
|
||||
file_dict.update({"json_file": json_file})
|
||||
else:
|
||||
youtube_id = self.extract_id_from_filename(file_name)
|
||||
file_dict.update({"json_file": False})
|
||||
return info_json
|
||||
|
||||
file_dict.update({"youtube_id": youtube_id})
|
||||
identified.append(file_dict)
|
||||
def _move_to_archive(self, json_data):
|
||||
"""move identified media file to archive"""
|
||||
videos = self.config["application"]["videos"]
|
||||
|
||||
return identified
|
||||
channel, file = os.path.split(json_data["media_url"])
|
||||
channel_folder = os.path.join(videos, channel)
|
||||
if not os.path.exists(channel_folder):
|
||||
os.makedirs(channel_folder)
|
||||
|
||||
@staticmethod
|
||||
def extract_id_from_filename(file_name):
|
||||
"""
|
||||
look at the file name for the youtube id
|
||||
expects filename ending in [<youtube_id>].<ext>
|
||||
"""
|
||||
id_search = re.search(r"\[([a-zA-Z0-9_-]{11})\]$", file_name)
|
||||
if id_search:
|
||||
youtube_id = id_search.group(1)
|
||||
return youtube_id
|
||||
old_path = self.current_video["media"]
|
||||
new_path = os.path.join(channel_folder, file)
|
||||
shutil.move(old_path, new_path, copy_function=shutil.copyfile)
|
||||
|
||||
print("failed to extract youtube id for: " + file_name)
|
||||
raise Exception
|
||||
def _cleanup(self):
|
||||
"""cleanup leftover files"""
|
||||
if os.path.exists(self.current_video["metadata"]):
|
||||
os.remove(self.current_video["metadata"])
|
||||
|
||||
def extract_id_from_json(self, json_file):
|
||||
"""open json file and extract id"""
|
||||
json_path = os.path.join(self.CACHE_DIR, "import", json_file)
|
||||
with open(json_path, "r", encoding="utf-8") as f:
|
||||
json_content = f.read()
|
||||
if os.path.exists(self.current_video["thumb"]):
|
||||
os.remove(self.current_video["thumb"])
|
||||
|
||||
youtube_id = json.loads(json_content)["id"]
|
||||
|
||||
return youtube_id
|
||||
|
||||
def process_import(self):
|
||||
"""go through identified media files"""
|
||||
|
||||
all_videos_added = []
|
||||
|
||||
for media_file in self.identified:
|
||||
json_file = media_file["json_file"]
|
||||
video_file = media_file["video_file"]
|
||||
youtube_id = media_file["youtube_id"]
|
||||
|
||||
video_path = os.path.join(self.CACHE_DIR, "import", video_file)
|
||||
|
||||
self.move_to_cache(video_path, youtube_id)
|
||||
|
||||
# identify and archive
|
||||
vid_dict = index_new_video(youtube_id)
|
||||
VideoDownloader([youtube_id]).move_to_archive(vid_dict)
|
||||
youtube_id = vid_dict["youtube_id"]
|
||||
thumb_url = vid_dict["vid_thumb_url"]
|
||||
all_videos_added.append((youtube_id, thumb_url))
|
||||
|
||||
# cleanup
|
||||
if os.path.exists(video_path):
|
||||
os.remove(video_path)
|
||||
if json_file:
|
||||
json_path = os.path.join(self.CACHE_DIR, "import", json_file)
|
||||
os.remove(json_path)
|
||||
|
||||
return all_videos_added
|
||||
|
||||
def move_to_cache(self, video_path, youtube_id):
|
||||
"""move identified video file to cache, convert to mp4"""
|
||||
file_name = os.path.split(video_path)[-1]
|
||||
video_file, ext = os.path.splitext(file_name)
|
||||
|
||||
# make sure youtube_id is in filename
|
||||
if youtube_id not in video_file:
|
||||
video_file = f"{video_file}_{youtube_id}"
|
||||
|
||||
# move, convert if needed
|
||||
if ext == ".mp4":
|
||||
new_file = video_file + ext
|
||||
dest_path = os.path.join(self.CACHE_DIR, "download", new_file)
|
||||
shutil.move(video_path, dest_path, copy_function=shutil.copyfile)
|
||||
else:
|
||||
print(f"processing with ffmpeg: {video_file}")
|
||||
new_file = video_file + ".mp4"
|
||||
dest_path = os.path.join(self.CACHE_DIR, "download", new_file)
|
||||
subprocess.run(
|
||||
[
|
||||
"ffmpeg",
|
||||
"-i",
|
||||
video_path,
|
||||
dest_path,
|
||||
"-loglevel",
|
||||
"warning",
|
||||
"-stats",
|
||||
],
|
||||
check=True,
|
||||
)
|
||||
for subtitle_file in self.current_video["subtitle"]:
|
||||
if os.path.exists(subtitle_file):
|
||||
os.remove(subtitle_file)
|
||||
|
||||
|
||||
def scan_filesystem():
|
||||
|
@ -20,7 +20,7 @@ from home.src.download.yt_dlp_handler import VideoDownloader
|
||||
from home.src.es.index_setup import backup_all_indexes, restore_from_backup
|
||||
from home.src.index.channel import YoutubeChannel
|
||||
from home.src.index.filesystem import (
|
||||
ManualImport,
|
||||
ImportFolderScanner,
|
||||
reindex_old_documents,
|
||||
scan_filesystem,
|
||||
)
|
||||
@ -150,10 +150,7 @@ def run_manual_import():
|
||||
try:
|
||||
have_lock = my_lock.acquire(blocking=False)
|
||||
if have_lock:
|
||||
import_handler = ManualImport()
|
||||
if import_handler.identified:
|
||||
all_videos_added = import_handler.process_import()
|
||||
ThumbManager().download_vid(all_videos_added)
|
||||
ImportFolderScanner().scan()
|
||||
else:
|
||||
print("Did not acquire lock form import.")
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user