linting everything in black

This commit is contained in:
simon 2021-09-21 16:25:22 +07:00
parent 69e6e490f4
commit 2433e0e7d8
19 changed files with 1100 additions and 1111 deletions

View File

@ -11,6 +11,6 @@ import os
from django.core.asgi import get_asgi_application
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings')
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "config.settings")
application = get_asgi_application()

View File

@ -21,67 +21,67 @@ BASE_DIR = Path(__file__).resolve().parent.parent
# See https://docs.djangoproject.com/en/3.2/howto/deployment/checklist/
# SECURITY WARNING: keep the secret key used in production secret!
SECRET_KEY = 'Fvid^aUL6LohRZz*kZFvq85B&JW&kB9o*#jdzWsdWE8*XkCLR8'
SECRET_KEY = "Fvid^aUL6LohRZz*kZFvq85B&JW&kB9o*#jdzWsdWE8*XkCLR8"
# SECURITY WARNING: don't run with debug turned on in production!
DEBUG = bool(environ.get('DJANGO_DEBUG'))
DEBUG = bool(environ.get("DJANGO_DEBUG"))
ALLOWED_HOSTS = ['*']
ALLOWED_HOSTS = ["*"]
# Application definition
INSTALLED_APPS = [
'home.apps.HomeConfig',
'django.contrib.admin',
'django.contrib.auth',
'django.contrib.contenttypes',
'django.contrib.sessions',
'django.contrib.messages',
'whitenoise.runserver_nostatic',
'django.contrib.staticfiles',
'django.contrib.humanize'
"home.apps.HomeConfig",
"django.contrib.admin",
"django.contrib.auth",
"django.contrib.contenttypes",
"django.contrib.sessions",
"django.contrib.messages",
"whitenoise.runserver_nostatic",
"django.contrib.staticfiles",
"django.contrib.humanize",
]
MIDDLEWARE = [
'django.middleware.security.SecurityMiddleware',
'django.contrib.sessions.middleware.SessionMiddleware',
'whitenoise.middleware.WhiteNoiseMiddleware',
'django.middleware.common.CommonMiddleware',
'django.middleware.csrf.CsrfViewMiddleware',
'django.contrib.auth.middleware.AuthenticationMiddleware',
'django.contrib.messages.middleware.MessageMiddleware',
'django.middleware.clickjacking.XFrameOptionsMiddleware',
"django.middleware.security.SecurityMiddleware",
"django.contrib.sessions.middleware.SessionMiddleware",
"whitenoise.middleware.WhiteNoiseMiddleware",
"django.middleware.common.CommonMiddleware",
"django.middleware.csrf.CsrfViewMiddleware",
"django.contrib.auth.middleware.AuthenticationMiddleware",
"django.contrib.messages.middleware.MessageMiddleware",
"django.middleware.clickjacking.XFrameOptionsMiddleware",
]
ROOT_URLCONF = 'config.urls'
ROOT_URLCONF = "config.urls"
TEMPLATES = [
{
'BACKEND': 'django.template.backends.django.DjangoTemplates',
'DIRS': [],
'APP_DIRS': True,
'OPTIONS': {
'context_processors': [
'django.template.context_processors.debug',
'django.template.context_processors.request',
'django.contrib.auth.context_processors.auth',
'django.contrib.messages.context_processors.messages',
"BACKEND": "django.template.backends.django.DjangoTemplates",
"DIRS": [],
"APP_DIRS": True,
"OPTIONS": {
"context_processors": [
"django.template.context_processors.debug",
"django.template.context_processors.request",
"django.contrib.auth.context_processors.auth",
"django.contrib.messages.context_processors.messages",
],
},
},
]
WSGI_APPLICATION = 'config.wsgi.application'
WSGI_APPLICATION = "config.wsgi.application"
# Database
# https://docs.djangoproject.com/en/3.2/ref/settings/#databases
DATABASES = {
'default': {
'ENGINE': 'django.db.backends.sqlite3',
'NAME': BASE_DIR / 'db.sqlite3',
"default": {
"ENGINE": "django.db.backends.sqlite3",
"NAME": BASE_DIR / "db.sqlite3",
}
}
@ -91,16 +91,16 @@ DATABASES = {
AUTH_PASSWORD_VALIDATORS = [
{
'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator', # noqa: E501
"NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator", # noqa: E501
},
{
'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator', # noqa: E501
"NAME": "django.contrib.auth.password_validation.MinimumLengthValidator", # noqa: E501
},
{
'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator', # noqa: E501
"NAME": "django.contrib.auth.password_validation.CommonPasswordValidator", # noqa: E501
},
{
'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator', # noqa: E501
"NAME": "django.contrib.auth.password_validation.NumericPasswordValidator", # noqa: E501
},
]
@ -108,9 +108,9 @@ AUTH_PASSWORD_VALIDATORS = [
# Internationalization
# https://docs.djangoproject.com/en/3.2/topics/i18n/
LANGUAGE_CODE = 'en-us'
LANGUAGE_CODE = "en-us"
TIME_ZONE = 'UTC'
TIME_ZONE = "UTC"
USE_I18N = True
@ -122,7 +122,7 @@ USE_TZ = True
# Static files (CSS, JavaScript, Images)
# https://docs.djangoproject.com/en/3.2/howto/static-files/
STATIC_URL = '/static/'
STATIC_URL = "/static/"
# STATICFILES_DIRS = [
# str(BASE_DIR.joinpath('static')),
@ -130,15 +130,15 @@ STATIC_URL = '/static/'
# ]
# STATIC_URL = '/static/'
STATICFILES_DIRS = (str(BASE_DIR.joinpath('static')),)
STATICFILES_DIRS = (str(BASE_DIR.joinpath("static")),)
# MEDIA_ROOT = str(BASE_DIR.joinpath('media'))
# MEDIA_URL = '/media/'
STATIC_ROOT = str(BASE_DIR.joinpath('staticfiles'))
STATIC_ROOT = str(BASE_DIR.joinpath("staticfiles"))
STATICFILES_STORAGE = 'whitenoise.storage.CompressedManifestStaticFilesStorage'
STATICFILES_STORAGE = "whitenoise.storage.CompressedManifestStaticFilesStorage"
# Default primary key field type
# https://docs.djangoproject.com/en/3.2/ref/settings/#default-auto-field
DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField'
DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField"

View File

@ -17,6 +17,6 @@ from django.contrib import admin
from django.urls import include, path
urlpatterns = [
path('', include('home.urls')),
path('admin/', admin.site.urls),
path("", include("home.urls")),
path("admin/", admin.site.urls),
]

View File

@ -11,6 +11,6 @@ import os
from django.core.wsgi import get_wsgi_application
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings')
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "config.settings")
application = get_wsgi_application()

View File

@ -10,24 +10,24 @@ from .tasks import app as celery_app
def sync_redis_state():
""" make sure redis gets the config.json values """
print('sync redis')
"""make sure redis gets the config.json values"""
print("sync redis")
config_handler = AppConfig()
config_handler.load_new_defaults()
config = config_handler.config
sort_order = config['archive']['sort']
set_message('sort_order', sort_order, expire=False)
hide_watched = bool(int(config['archive']['hide_watched']))
set_message('hide_watched', hide_watched, expire=False)
show_subed_only = bool(int(config['archive']['show_subed_only']))
set_message('show_subed_only', show_subed_only, expire=False)
sort_order = config["archive"]["sort"]
set_message("sort_order", sort_order, expire=False)
hide_watched = bool(int(config["archive"]["hide_watched"]))
set_message("hide_watched", hide_watched, expire=False)
show_subed_only = bool(int(config["archive"]["show_subed_only"]))
set_message("show_subed_only", show_subed_only, expire=False)
def make_folders():
""" make needed cache folders here so docker doesn't mess it up """
folders = ['download', 'channels', 'videos', 'import', 'backup']
"""make needed cache folders here so docker doesn't mess it up"""
folders = ["download", "channels", "videos", "import", "backup"]
config = AppConfig().config
cache_dir = config['application']['cache_dir']
cache_dir = config["application"]["cache_dir"]
for folder in folders:
folder_path = os.path.join(cache_dir, folder)
try:
@ -36,7 +36,7 @@ def make_folders():
continue
__all__ = ('celery_app',)
__all__ = ("celery_app",)
make_folders()
sync_redis_state()
index_check()

View File

@ -2,5 +2,5 @@ from django.apps import AppConfig
class HomeConfig(AppConfig):
default_auto_field = 'django.db.models.BigAutoField'
name = 'home'
default_auto_field = "django.db.models.BigAutoField"
name = "home"

View File

@ -12,71 +12,71 @@ from home.src.helper import get_message, set_message
class AppConfig:
""" handle user settings and application variables """
"""handle user settings and application variables"""
def __init__(self):
self.config = self.get_config()
def get_config(self):
""" get config from default file or redis if changed """
"""get config from default file or redis if changed"""
config = self.get_config_redis()
if not config:
config = self.get_config_file()
config['application'].update(self.get_config_env())
config["application"].update(self.get_config_env())
return config
def get_config_file(self):
""" read the defaults from config.json """
with open('home/config.json', 'r', encoding="utf-8") as f:
"""read the defaults from config.json"""
with open("home/config.json", "r", encoding="utf-8") as f:
config_str = f.read()
config_file = json.loads(config_str)
config_file['application'].update(self.get_config_env())
config_file["application"].update(self.get_config_env())
return config_file
@staticmethod
def get_config_env():
""" read environment application variables """
"""read environment application variables"""
application = {
'REDIS_HOST': os.environ.get('REDIS_HOST'),
'es_url': os.environ.get('ES_URL'),
'HOST_UID': int(os.environ.get('HOST_UID')),
'HOST_GID': int(os.environ.get('HOST_GID'))
"REDIS_HOST": os.environ.get("REDIS_HOST"),
"es_url": os.environ.get("ES_URL"),
"HOST_UID": int(os.environ.get("HOST_UID")),
"HOST_GID": int(os.environ.get("HOST_GID")),
}
return application
@staticmethod
def get_config_redis():
""" read config json set from redis to overwrite defaults """
config = get_message('config')
"""read config json set from redis to overwrite defaults"""
config = get_message("config")
if not list(config.values())[0]:
return False
return config
def update_config(self, form_post):
""" update config values from settings form """
"""update config values from settings form"""
config = self.config
for key, value in form_post.items():
to_write = value[0]
if len(to_write):
if to_write == '0':
if to_write == "0":
to_write = False
elif to_write == '1':
elif to_write == "1":
to_write = True
elif to_write.isdigit():
to_write = int(to_write)
config_dict, config_value = key.split('.')
config_dict, config_value = key.split(".")
config[config_dict][config_value] = to_write
set_message('config', config, expire=False)
set_message("config", config, expire=False)
def load_new_defaults(self):
""" check config.json for missing defaults """
"""check config.json for missing defaults"""
default_config = self.get_config_file()
redis_config = self.get_config_redis()
@ -100,4 +100,4 @@ class AppConfig:
needs_update = True
if needs_update:
set_message('config', redis_config, expire=False)
set_message("config", redis_config, expire=False)

View File

@ -19,15 +19,15 @@ from home.src.index import YoutubeChannel, index_new_video
class PendingList:
""" manage the pending videos list """
"""manage the pending videos list"""
CONFIG = AppConfig().config
ES_URL = CONFIG['application']['es_url']
VIDEOS = CONFIG['application']['videos']
ES_URL = CONFIG["application"]["es_url"]
VIDEOS = CONFIG["application"]["videos"]
@staticmethod
def parse_url_list(youtube_ids):
""" extract youtube ids from list """
"""extract youtube ids from list"""
missing_videos = []
for entry in youtube_ids:
# notify
@ -35,31 +35,31 @@ class PendingList:
"status": "pending",
"level": "info",
"title": "Adding to download queue.",
"message": 'Extracting lists'
"message": "Extracting lists",
}
set_message('progress:download', mess_dict)
set_message("progress:download", mess_dict)
# extract
url = entry['url']
url_type = entry['type']
if url_type == 'video':
url = entry["url"]
url_type = entry["type"]
if url_type == "video":
missing_videos.append(url)
elif url_type == 'channel':
elif url_type == "channel":
youtube_ids = ChannelSubscription().get_last_youtube_videos(
url, limit=False
)
missing_videos = missing_videos + youtube_ids
elif url_type == 'playlist':
elif url_type == "playlist":
youtube_ids = playlist_extractor(url)
missing_videos = missing_videos + youtube_ids
return missing_videos
def add_to_pending(self, missing_videos):
""" build the bulk json data from pending """
"""build the bulk json data from pending"""
# check if channel is indexed
channel_handler = ChannelSubscription()
all_indexed = channel_handler.get_channels(subscribed_only=False)
all_channel_ids = [i['channel_id'] for i in all_indexed]
all_channel_ids = [i["channel_id"] for i in all_indexed]
# check if already there
all_downloaded = self.get_all_downloaded()
# loop
@ -77,11 +77,11 @@ class PendingList:
if not video:
continue
if video['channel_id'] in all_channel_ids:
video['channel_indexed'] = True
if video["channel_id"] in all_channel_ids:
video["channel_indexed"] = True
else:
video['channel_indexed'] = False
video['status'] = "pending"
video["channel_indexed"] = False
video["status"] = "pending"
action = {"create": {"_id": youtube_id, "_index": "ta_download"}}
bulk_list.append(json.dumps(action))
bulk_list.append(json.dumps(video))
@ -90,128 +90,130 @@ class PendingList:
"status": "pending",
"level": "info",
"title": "Adding to download queue.",
"message": 'Processing IDs...'
"message": "Processing IDs...",
}
set_message('progress:download', mess_dict)
set_message("progress:download", mess_dict)
# add last newline
bulk_list.append('\n')
query_str = '\n'.join(bulk_list)
headers = {'Content-type': 'application/x-ndjson'}
url = self.ES_URL + '/_bulk'
bulk_list.append("\n")
query_str = "\n".join(bulk_list)
headers = {"Content-type": "application/x-ndjson"}
url = self.ES_URL + "/_bulk"
request = requests.post(url, data=query_str, headers=headers)
if not request.ok:
print(request)
@staticmethod
def get_youtube_details(youtube_id):
""" get details from youtubedl for single pending video """
"""get details from youtubedl for single pending video"""
obs = {
'default_search': 'ytsearch',
'quiet': True,
'skip_download': True,
"default_search": "ytsearch",
"quiet": True,
"skip_download": True,
}
try:
vid = youtube_dl.YoutubeDL(obs).extract_info(youtube_id)
except youtube_dl.utils.DownloadError:
print('failed to extract info for: ' + youtube_id)
print("failed to extract info for: " + youtube_id)
return False
# parse response
seconds = vid['duration']
seconds = vid["duration"]
duration_str = DurationConverter.get_str(seconds)
upload_date = vid['upload_date']
upload_date = vid["upload_date"]
upload_dt = datetime.strptime(upload_date, "%Y%m%d")
published = upload_dt.strftime("%Y-%m-%d")
# build dict
youtube_details = {
"youtube_id": youtube_id,
"channel_name": vid['channel'],
"vid_thumb_url": vid['thumbnail'],
"title": vid['title'],
"channel_id": vid['channel_id'],
"channel_name": vid["channel"],
"vid_thumb_url": vid["thumbnail"],
"title": vid["title"],
"channel_id": vid["channel_id"],
"duration": duration_str,
"published": published,
"timestamp": int(datetime.now().strftime("%s"))
"timestamp": int(datetime.now().strftime("%s")),
}
return youtube_details
def get_all_pending(self):
""" get a list of all pending videos in ta_download """
headers = {'Content-type': 'application/json'}
"""get a list of all pending videos in ta_download"""
headers = {"Content-type": "application/json"}
# get PIT ID
url = self.ES_URL + '/ta_download/_pit?keep_alive=1m'
url = self.ES_URL + "/ta_download/_pit?keep_alive=1m"
response = requests.post(url)
json_data = json.loads(response.text)
pit_id = json_data['id']
pit_id = json_data["id"]
# query
data = {
"size": 50, "query": {"match_all": {}},
"size": 50,
"query": {"match_all": {}},
"pit": {"id": pit_id, "keep_alive": "1m"},
"sort": [{"timestamp": {"order": "desc"}}]
"sort": [{"timestamp": {"order": "desc"}}],
}
query_str = json.dumps(data)
url = self.ES_URL + '/_search'
url = self.ES_URL + "/_search"
all_pending = []
all_ignore = []
while True:
response = requests.get(url, data=query_str, headers=headers)
json_data = json.loads(response.text)
all_hits = json_data['hits']['hits']
all_hits = json_data["hits"]["hits"]
if all_hits:
for hit in all_hits:
youtube_id = hit['_source']['youtube_id']
status = hit['_source']['status']
if status == 'pending':
all_pending.append(hit['_source'])
elif status == 'ignore':
youtube_id = hit["_source"]["youtube_id"]
status = hit["_source"]["status"]
if status == "pending":
all_pending.append(hit["_source"])
elif status == "ignore":
all_ignore.append(youtube_id)
search_after = hit['sort']
search_after = hit["sort"]
# update search_after with last hit data
data['search_after'] = search_after
data["search_after"] = search_after
query_str = json.dumps(data)
else:
break
# clean up PIT
query_str = json.dumps({"id": pit_id})
requests.delete(self.ES_URL + '/_pit', data=query_str, headers=headers)
requests.delete(self.ES_URL + "/_pit", data=query_str, headers=headers)
return all_pending, all_ignore
def get_all_indexed(self):
""" get a list of all videos indexed """
headers = {'Content-type': 'application/json'}
"""get a list of all videos indexed"""
headers = {"Content-type": "application/json"}
# get PIT ID
url = self.ES_URL + '/ta_video/_pit?keep_alive=1m'
url = self.ES_URL + "/ta_video/_pit?keep_alive=1m"
response = requests.post(url)
json_data = json.loads(response.text)
pit_id = json_data['id']
pit_id = json_data["id"]
# query
data = {
"size": 500, "query": {"match_all": {}},
"size": 500,
"query": {"match_all": {}},
"pit": {"id": pit_id, "keep_alive": "1m"},
"sort": [{"published": {"order": "desc"}}]
"sort": [{"published": {"order": "desc"}}],
}
query_str = json.dumps(data)
url = self.ES_URL + '/_search'
url = self.ES_URL + "/_search"
all_indexed = []
while True:
response = requests.get(url, data=query_str, headers=headers)
json_data = json.loads(response.text)
all_hits = json_data['hits']['hits']
all_hits = json_data["hits"]["hits"]
if all_hits:
for hit in all_hits:
all_indexed.append(hit)
search_after = hit['sort']
search_after = hit["sort"]
# update search_after with last hit data
data['search_after'] = search_after
data["search_after"] = search_after
query_str = json.dumps(data)
else:
break
# clean up PIT
query_str = json.dumps({"id": pit_id})
requests.delete(self.ES_URL + '/_pit', data=query_str, headers=headers)
requests.delete(self.ES_URL + "/_pit", data=query_str, headers=headers)
return all_indexed
def get_all_downloaded(self):
""" get a list of all videos in archive """
"""get a list of all videos in archive"""
all_channel_folders = os.listdir(self.VIDEOS)
all_downloaded = []
for channel_folder in all_channel_folders:
@ -223,125 +225,131 @@ class PendingList:
return all_downloaded
def delete_from_pending(self, youtube_id):
""" delete the youtube_id from ta_download """
url = f'{self.ES_URL}/ta_download/_doc/{youtube_id}'
"""delete the youtube_id from ta_download"""
url = f"{self.ES_URL}/ta_download/_doc/{youtube_id}"
response = requests.delete(url)
if not response.ok:
print(response.text)
def ignore_from_pending(self, ignore_list):
""" build the bulk query string """
"""build the bulk query string"""
stamp = int(datetime.now().strftime("%s"))
bulk_list = []
for youtube_id in ignore_list:
action = {"update": {"_id": youtube_id, "_index": "ta_download"}}
source = {"doc": {"status": 'ignore', "timestamp": stamp}}
source = {"doc": {"status": "ignore", "timestamp": stamp}}
bulk_list.append(json.dumps(action))
bulk_list.append(json.dumps(source))
# add last newline
bulk_list.append('\n')
query_str = '\n'.join(bulk_list)
bulk_list.append("\n")
query_str = "\n".join(bulk_list)
headers = {'Content-type': 'application/x-ndjson'}
url = self.ES_URL + '/_bulk'
headers = {"Content-type": "application/x-ndjson"}
url = self.ES_URL + "/_bulk"
request = requests.post(url, data=query_str, headers=headers)
mess_dict = {
"status": "ignore",
"level": "info",
"title": "Added to ignore list",
"message": ''
"message": "",
}
set_message('progress:download', mess_dict)
set_message("progress:download", mess_dict)
if not request.ok:
print(request)
class ChannelSubscription:
""" manage the list of channels subscribed """
"""manage the list of channels subscribed"""
def __init__(self):
config = AppConfig().config
self.es_url = config['application']['es_url']
self.channel_size = config['subscriptions']['channel_size']
self.es_url = config["application"]["es_url"]
self.channel_size = config["subscriptions"]["channel_size"]
def get_channels(self, subscribed_only=True):
""" get a list of all channels subscribed to """
headers = {'Content-type': 'application/json'}
"""get a list of all channels subscribed to"""
headers = {"Content-type": "application/json"}
# get PIT ID
url = self.es_url + '/ta_channel/_pit?keep_alive=1m'
url = self.es_url + "/ta_channel/_pit?keep_alive=1m"
response = requests.post(url)
json_data = json.loads(response.text)
pit_id = json_data['id']
pit_id = json_data["id"]
# query
if subscribed_only:
data = {
"query": {"term": {"channel_subscribed": {"value": True}}},
"size": 50, "pit": {"id": pit_id, "keep_alive": "1m"},
"sort": [{"channel_name.keyword": {"order": "asc"}}]
"size": 50,
"pit": {"id": pit_id, "keep_alive": "1m"},
"sort": [{"channel_name.keyword": {"order": "asc"}}],
}
else:
data = {
"query": {"match_all": {}},
"size": 50, "pit": {"id": pit_id, "keep_alive": "1m"},
"sort": [{"channel_name.keyword": {"order": "asc"}}]
"size": 50,
"pit": {"id": pit_id, "keep_alive": "1m"},
"sort": [{"channel_name.keyword": {"order": "asc"}}],
}
query_str = json.dumps(data)
url = self.es_url + '/_search'
url = self.es_url + "/_search"
all_channels = []
while True:
response = requests.get(url, data=query_str, headers=headers)
json_data = json.loads(response.text)
all_hits = json_data['hits']['hits']
all_hits = json_data["hits"]["hits"]
if all_hits:
for hit in all_hits:
source = hit['_source']
search_after = hit['sort']
source = hit["_source"]
search_after = hit["sort"]
all_channels.append(source)
# update search_after with last hit data
data['search_after'] = search_after
data["search_after"] = search_after
query_str = json.dumps(data)
else:
break
# clean up PIT
query_str = json.dumps({"id": pit_id})
requests.delete(self.es_url + '/_pit', data=query_str, headers=headers)
requests.delete(self.es_url + "/_pit", data=query_str, headers=headers)
return all_channels
def get_last_youtube_videos(self, channel_id, limit=True):
""" get a list of last videos from channel """
url = f'https://www.youtube.com/channel/{channel_id}/videos'
"""get a list of last videos from channel"""
url = f"https://www.youtube.com/channel/{channel_id}/videos"
obs = {
'default_search': 'ytsearch', 'quiet': True,
'skip_download': True, 'extract_flat': True
"default_search": "ytsearch",
"quiet": True,
"skip_download": True,
"extract_flat": True,
}
if limit:
obs['playlistend'] = self.channel_size
obs["playlistend"] = self.channel_size
chan = youtube_dl.YoutubeDL(obs).extract_info(url, download=False)
last_videos = [(i['id'], i['title']) for i in chan['entries']]
last_videos = [(i["id"], i["title"]) for i in chan["entries"]]
return last_videos
def find_missing(self):
""" add missing videos from subscribed channels to pending """
"""add missing videos from subscribed channels to pending"""
all_channels = self.get_channels()
pending_handler = PendingList()
all_pending, all_ignore = pending_handler.get_all_pending()
all_pending_ids = [i['youtube_id'] for i in all_pending]
all_pending_ids = [i["youtube_id"] for i in all_pending]
all_downloaded = pending_handler.get_all_downloaded()
to_ignore = all_pending_ids + all_ignore + all_downloaded
missing_videos = []
counter = 1
for channel in all_channels:
channel_id = channel['channel_id']
channel_id = channel["channel_id"]
last_videos = self.get_last_youtube_videos(channel_id)
set_message('progress:download', {
"status": "rescan",
"level": "info",
"title": "Rescanning: Looking for new videos.",
"message": f'Progress: {counter}/{len(all_channels)}'
}
set_message(
"progress:download",
{
"status": "rescan",
"level": "info",
"title": "Rescanning: Looking for new videos.",
"message": f"Progress: {counter}/{len(all_channels)}",
},
)
for video in last_videos:
youtube_id = video[0]
@ -352,22 +360,22 @@ class ChannelSubscription:
return missing_videos
def change_subscribe(self, channel_id, channel_subscribed):
""" subscribe or unsubscribe from channel and update """
"""subscribe or unsubscribe from channel and update"""
if not isinstance(channel_subscribed, bool):
print('invalid status, should be bool')
print("invalid status, should be bool")
return
headers = {'Content-type': 'application/json'}
headers = {"Content-type": "application/json"}
channel_handler = YoutubeChannel(channel_id)
channel_dict = channel_handler.channel_dict
channel_dict['channel_subscribed'] = channel_subscribed
channel_dict["channel_subscribed"] = channel_subscribed
if channel_subscribed:
# handle subscribe
url = self.es_url + '/ta_channel/_doc/' + channel_id
url = self.es_url + "/ta_channel/_doc/" + channel_id
payload = json.dumps(channel_dict)
print(channel_dict)
else:
url = self.es_url + '/ta_channel/_update/' + channel_id
payload = json.dumps({'doc': channel_dict})
url = self.es_url + "/ta_channel/_update/" + channel_id
payload = json.dumps({"doc": channel_dict})
# update channel
request = requests.post(url, data=payload, headers=headers)
if not request.ok:
@ -377,27 +385,30 @@ class ChannelSubscription:
def playlist_extractor(playlist_id):
""" return youtube_ids from a playlist_id """
url = 'https://www.youtube.com/playlist?list=' + playlist_id
"""return youtube_ids from a playlist_id"""
url = "https://www.youtube.com/playlist?list=" + playlist_id
obs = {
'default_search': 'ytsearch', 'quiet': True, 'ignoreerrors': True,
'skip_download': True, 'extract_flat': True
"default_search": "ytsearch",
"quiet": True,
"ignoreerrors": True,
"skip_download": True,
"extract_flat": True,
}
playlist = youtube_dl.YoutubeDL(obs).extract_info(url, download=False)
playlist_vids = [(i['id'], i['title']) for i in playlist['entries']]
playlist_vids = [(i["id"], i["title"]) for i in playlist["entries"]]
return playlist_vids
class VideoDownloader:
""" handle the video download functionality """
"""handle the video download functionality"""
def __init__(self, youtube_id_list):
self.youtube_id_list = youtube_id_list
self.config = AppConfig().config
def download_list(self):
""" download the list of youtube_ids """
limit_count = self.config['downloads']['limit_count']
"""download the list of youtube_ids"""
limit_count = self.config["downloads"]["limit_count"]
if limit_count:
self.youtube_id_list = self.youtube_id_list[:limit_count]
@ -405,112 +416,118 @@ class VideoDownloader:
try:
self.dl_single_vid(youtube_id)
except youtube_dl.utils.DownloadError:
print('failed to download ' + youtube_id)
print("failed to download " + youtube_id)
continue
vid_dict = index_new_video(youtube_id)
self.move_to_archive(vid_dict)
self.delete_from_pending(youtube_id)
if self.config['downloads']['sleep_interval']:
sleep(self.config['downloads']['sleep_interval'])
if self.config["downloads"]["sleep_interval"]:
sleep(self.config["downloads"]["sleep_interval"])
@staticmethod
def progress_hook(response):
""" process the progress_hooks from youtube_dl """
"""process the progress_hooks from youtube_dl"""
# title
filename = response['filename'][12:].replace('_', ' ')
filename = response["filename"][12:].replace("_", " ")
title = "Downloading: " + os.path.split(filename)[-1]
# message
try:
percent = response['_percent_str']
size = response['_total_bytes_str']
speed = response['_speed_str']
eta = response['_eta_str']
message = f'{percent} of {size} at {speed} - time left: {eta}'
percent = response["_percent_str"]
size = response["_total_bytes_str"]
speed = response["_speed_str"]
eta = response["_eta_str"]
message = f"{percent} of {size} at {speed} - time left: {eta}"
except KeyError:
message = ''
message = ""
mess_dict = {
"status": "downloading",
"level": "info",
"title": title,
"message": message
"message": message,
}
set_message('progress:download', mess_dict)
set_message("progress:download", mess_dict)
def dl_single_vid(self, youtube_id):
""" download single video """
"""download single video"""
obs = {
'default_search': 'ytsearch',
'merge_output_format': 'mp4', 'restrictfilenames': True,
'outtmpl': (self.config['application']['cache_dir'] +
'/download/' +
self.config['application']['file_template']),
'progress_hooks': [self.progress_hook],
'quiet': True, 'continuedl': True, 'retries': 3
"default_search": "ytsearch",
"merge_output_format": "mp4",
"restrictfilenames": True,
"outtmpl": (
self.config["application"]["cache_dir"]
+ "/download/"
+ self.config["application"]["file_template"]
),
"progress_hooks": [self.progress_hook],
"quiet": True,
"continuedl": True,
"retries": 3,
}
if self.config['downloads']['format']:
obs['format'] = self.config['downloads']['format']
if self.config['downloads']['limit_speed']:
obs['ratelimit'] = self.config['downloads']['limit_speed'] * 1024
if self.config["downloads"]["format"]:
obs["format"] = self.config["downloads"]["format"]
if self.config["downloads"]["limit_speed"]:
obs["ratelimit"] = self.config["downloads"]["limit_speed"] * 1024
external = False
if external:
obs['external_downloader'] = 'aria2c'
obs["external_downloader"] = "aria2c"
postprocessors = []
if self.config['downloads']['add_metadata']:
postprocessors.append({
'key': 'FFmpegMetadata',
'add_chapters': True,
'add_metadata': True,
})
if self.config["downloads"]["add_metadata"]:
postprocessors.append(
{
"key": "FFmpegMetadata",
"add_chapters": True,
"add_metadata": True,
}
)
obs['postprocessors'] = postprocessors
obs["postprocessors"] = postprocessors
# check if already in cache to continue from there
cache_dir = self.config['application']['cache_dir']
all_cached = os.listdir(cache_dir + '/download/')
cache_dir = self.config["application"]["cache_dir"]
all_cached = os.listdir(cache_dir + "/download/")
for file_name in all_cached:
if youtube_id in file_name:
obs['outtmpl'] = cache_dir + '/download/' + file_name
obs["outtmpl"] = cache_dir + "/download/" + file_name
with youtube_dl.YoutubeDL(obs) as ydl:
try:
ydl.download([youtube_id])
except youtube_dl.utils.DownloadError:
print('retry failed download: ' + youtube_id)
print("retry failed download: " + youtube_id)
sleep(10)
ydl.download([youtube_id])
def move_to_archive(self, vid_dict):
""" move downloaded video from cache to archive """
videos = self.config['application']['videos']
channel_name = vid_dict['channel']['channel_name']
"""move downloaded video from cache to archive"""
videos = self.config["application"]["videos"]
channel_name = vid_dict["channel"]["channel_name"]
channel_name_clean = clean_string(channel_name)
media_url = vid_dict['media_url']
youtube_id = vid_dict['youtube_id']
media_url = vid_dict["media_url"]
youtube_id = vid_dict["youtube_id"]
# make archive folder
videos = self.config['application']['videos']
videos = self.config["application"]["videos"]
new_folder = os.path.join(videos, channel_name_clean)
os.makedirs(new_folder, exist_ok=True)
# find real filename
cache_dir = self.config['application']['cache_dir']
for file_str in os.listdir(cache_dir + '/download'):
cache_dir = self.config["application"]["cache_dir"]
for file_str in os.listdir(cache_dir + "/download"):
if youtube_id in file_str:
old_file = file_str
old_file_path = os.path.join(cache_dir, 'download', old_file)
old_file_path = os.path.join(cache_dir, "download", old_file)
new_file_path = os.path.join(videos, media_url)
# move and fix permission
shutil.move(old_file_path, new_file_path)
os.chown(
new_file_path,
self.config['application']['HOST_UID'],
self.config['application']['HOST_GID']
self.config["application"]["HOST_UID"],
self.config["application"]["HOST_GID"],
)
def delete_from_pending(self, youtube_id):
""" delete downloaded video from pending index if its there """
es_url = self.config['application']['es_url']
url = f'{es_url}/ta_download/_doc/{youtube_id}'
"""delete downloaded video from pending index if its there"""
es_url = self.config["application"]["es_url"]
url = f"{es_url}/ta_download/_doc/{youtube_id}"
response = requests.delete(url)
if not response.ok and not response.status_code == 404:
print(response.text)

View File

@ -13,53 +13,53 @@ import unicodedata
import redis
import requests
REDIS_HOST = os.environ.get('REDIS_HOST')
REDIS_HOST = os.environ.get("REDIS_HOST")
def get_total_hits(index, es_url, match_field):
""" get total hits from index """
headers = {'Content-type': 'application/json'}
"""get total hits from index"""
headers = {"Content-type": "application/json"}
data = {"query": {"match": {match_field: True}}}
payload = json.dumps(data)
url = f'{es_url}/{index}/_search?filter_path=hits.total'
url = f"{es_url}/{index}/_search?filter_path=hits.total"
request = requests.post(url, data=payload, headers=headers)
if not request.ok:
print(request.text)
total_json = json.loads(request.text)
total_hits = total_json['hits']['total']['value']
total_hits = total_json["hits"]["total"]["value"]
return total_hits
def clean_string(file_name):
""" clean string to only asci characters """
"""clean string to only asci characters"""
whitelist = "-_.() " + string.ascii_letters + string.digits
normalized = unicodedata.normalize('NFKD', file_name)
ascii_only = normalized.encode('ASCII', 'ignore').decode().strip()
white_listed = ''.join(c for c in ascii_only if c in whitelist)
cleaned = re.sub(r'[ ]{2,}', ' ', white_listed)
normalized = unicodedata.normalize("NFKD", file_name)
ascii_only = normalized.encode("ASCII", "ignore").decode().strip()
white_listed = "".join(c for c in ascii_only if c in whitelist)
cleaned = re.sub(r"[ ]{2,}", " ", white_listed)
return cleaned
def process_url_list(url_str):
""" parse url_list to find valid youtube video or channel ids """
to_replace = ['watch?v=', 'playlist?list=']
url_list = re.split('\n+', url_str[0])
"""parse url_list to find valid youtube video or channel ids"""
to_replace = ["watch?v=", "playlist?list="]
url_list = re.split("\n+", url_str[0])
youtube_ids = []
for url in url_list:
url_clean = url.strip().strip('/').split('/')[-1]
url_clean = url.strip().strip("/").split("/")[-1]
for i in to_replace:
url_clean = url_clean.replace(i, '')
url_no_param = url_clean.split('&')[0]
url_clean = url_clean.replace(i, "")
url_no_param = url_clean.split("&")[0]
str_len = len(url_no_param)
if str_len == 11:
link_type = 'video'
link_type = "video"
elif str_len == 24:
link_type = 'channel'
link_type = "channel"
elif str_len == 34:
link_type = 'playlist'
link_type = "playlist"
else:
# unable to parse
raise ValueError('not a valid url: ' + url)
raise ValueError("not a valid url: " + url)
youtube_ids.append({"url": url_no_param, "type": link_type})
@ -67,19 +67,17 @@ def process_url_list(url_str):
def set_message(key, message, expire=True):
""" write new message to redis """
"""write new message to redis"""
redis_connection = redis.Redis(host=REDIS_HOST)
redis_connection.execute_command(
'JSON.SET', key, '.', json.dumps(message)
)
redis_connection.execute_command("JSON.SET", key, ".", json.dumps(message))
if expire:
redis_connection.execute_command('EXPIRE', key, 20)
redis_connection.execute_command("EXPIRE", key, 20)
def get_message(key):
""" get any message from JSON key """
"""get any message from JSON key"""
redis_connection = redis.Redis(host=REDIS_HOST)
reply = redis_connection.execute_command('JSON.GET', key)
reply = redis_connection.execute_command("JSON.GET", key)
if reply:
json_str = json.loads(reply)
else:
@ -88,9 +86,9 @@ def get_message(key):
def get_dl_message(cache_dir):
""" get latest message if available """
"""get latest message if available"""
redis_connection = redis.Redis(host=REDIS_HOST)
reply = redis_connection.execute_command('JSON.GET', 'progress:download')
reply = redis_connection.execute_command("JSON.GET", "progress:download")
if reply:
json_str = json.loads(reply)
elif json_str := monitor_cache_dir(cache_dir):
@ -101,7 +99,7 @@ def get_dl_message(cache_dir):
def get_lock(lock_key):
""" handle lock for task management """
"""handle lock for task management"""
redis_lock = redis.Redis(host=REDIS_HOST).lock(lock_key)
return redis_lock
@ -110,15 +108,15 @@ def monitor_cache_dir(cache_dir):
"""
look at download cache dir directly as alternative progress info
"""
dl_cache = os.path.join(cache_dir, 'download')
dl_cache = os.path.join(cache_dir, "download")
cache_file = os.listdir(dl_cache)
if cache_file:
filename = cache_file[0][12:].replace('_', ' ').split('.')[0]
filename = cache_file[0][12:].replace("_", " ").split(".")[0]
mess_dict = {
"status": "downloading",
"level": "info",
"title": "Downloading: " + filename,
"message": ""
"message": "",
}
else:
return False
@ -133,27 +131,37 @@ class DurationConverter:
@staticmethod
def get_sec(file_path):
""" read duration from file """
duration = subprocess.run([
"ffprobe", "-v", "error", "-show_entries", "format=duration",
"-of", "default=noprint_wrappers=1:nokey=1", file_path
], capture_output=True, check=True)
"""read duration from file"""
duration = subprocess.run(
[
"ffprobe",
"-v",
"error",
"-show_entries",
"format=duration",
"-of",
"default=noprint_wrappers=1:nokey=1",
file_path,
],
capture_output=True,
check=True,
)
duration_sec = int(float(duration.stdout.decode().strip()))
return duration_sec
@staticmethod
def get_str(duration_sec):
""" takes duration in sec and returns clean string """
"""takes duration in sec and returns clean string"""
hours = duration_sec // 3600
minutes = (duration_sec - (hours * 3600)) // 60
secs = duration_sec - (hours * 3600) - (minutes * 60)
duration_str = str()
if hours:
duration_str = str(hours).zfill(2) + ':'
duration_str = str(hours).zfill(2) + ":"
if minutes:
duration_str = duration_str + str(minutes).zfill(2) + ':'
duration_str = duration_str + str(minutes).zfill(2) + ":"
else:
duration_str = duration_str + '00:'
duration_str = duration_str + "00:"
duration_str = duration_str + str(secs).zfill(2)
return duration_str

View File

@ -19,11 +19,11 @@ from home.src.helper import DurationConverter, clean_string
class YoutubeChannel:
""" represents a single youtube channel """
"""represents a single youtube channel"""
CONFIG = AppConfig().config
ES_URL = CONFIG['application']['es_url']
CACHE_DIR = CONFIG['application']['cache_dir']
ES_URL = CONFIG["application"]["es_url"]
CACHE_DIR = CONFIG["application"]["cache_dir"]
def __init__(self, channel_id):
self.channel_id = channel_id
@ -32,193 +32,187 @@ class YoutubeChannel:
self.channel_dict = self.build_channel_dict()
def build_channel_dict(self, scrape=False):
""" combine the dicts build from extracted json payload """
"""combine the dicts build from extracted json payload"""
if scrape:
channel_dict = False
else:
channel_dict = self.get_es_channel()
if not channel_dict:
print('scrape data from youtube')
print("scrape data from youtube")
self.scrape_channel()
channel_dict = self.parse_channel_main()
channel_dict.update(self.parse_channel_meta())
self.source = 'scraped'
self.source = "scraped"
return channel_dict
def get_es_channel(self):
""" get from elastic search first if possible """
"""get from elastic search first if possible"""
channel_id = self.channel_id
url = f'{self.ES_URL}/ta_channel/_doc/{channel_id}'
url = f"{self.ES_URL}/ta_channel/_doc/{channel_id}"
response = requests.get(url)
if response.ok:
channel_source = response.json()['_source']
self.source = 'elastic'
channel_source = response.json()["_source"]
self.source = "elastic"
return channel_source
return False
def scrape_channel(self):
""" scrape channel page for additional infos """
"""scrape channel page for additional infos"""
channel_id = self.channel_id
url = f'https://www.youtube.com/channel/{channel_id}/about?hl=en'
cookies = {
'CONSENT': 'YES+xxxxxxxxxxxxxxxxxxxxxxxxxxx'
}
url = f"https://www.youtube.com/channel/{channel_id}/about?hl=en"
cookies = {"CONSENT": "YES+xxxxxxxxxxxxxxxxxxxxxxxxxxx"}