mirror of
https://github.com/tubearchivist/tubearchivist.git
synced 2024-12-30 13:30:12 +00:00
rewrite url_str extractor to convert channel names into channel ids, #40
This commit is contained in:
parent
c92800701a
commit
1ba3090db1
@ -10,9 +10,9 @@ Accessible at `/channel/` of your Tube Archivist, the **Overview Page** shows a
|
||||
The **Subscribe to Channels** button <img src="assets/icon-add.png?raw=true" alt="add icon" width="20px" style="margin:0 5px;"> opens a text field to subscribe to a channel. You have a few options:
|
||||
- Enter the YouTube channel ID, a 25 character alphanumeric string. For example *UCBa659QWEk1AI4Tg--mrJ2A*
|
||||
- Enter the URL to the channel page on YouTube. For example *https://www.youtube.com/channel/UCBa659QWEk1AI4Tg--mrJ2A*
|
||||
- Enter the channel name for example: *https://www.youtube.com/c/TomScottGo*.
|
||||
- Enter the video URL for any video and let Tube Archivist extract the channel ID for you. For example *https://www.youtube.com/watch?v=2tdiKTSdE9Y*
|
||||
- Add one per line.
|
||||
- **Note**: Adding a link to a YouTube channel name is not yet supported, for example: *https://www.youtube.com/c/TomScottGo* will fail.
|
||||
|
||||
The search icon <img src="assets/icon-search.png?raw=true" alt="search icon" width="20px" style="margin:0 5px;"> opens a text box to search for indexed channel names. Possible matches will show as you type.
|
||||
|
||||
|
@ -18,9 +18,9 @@ The **Add to Download Queue** icon <img src="assets/icon-add.png?raw=true" alt="
|
||||
- Add a YouTube video ID. For example *2tdiKTSdE9Y*.
|
||||
- Add a link to a YouTube video by providing the shortened URL, for example *https://youtu.be/2tdiKTSdE9Y*.
|
||||
- Add a Channel ID or Channel URL to add every available video to the download queue. This will ignore the channel page size as described before and is meant for an initial download of the whole channel. You can still ignore selected videos before starting the download.
|
||||
- Add a channel name like for example *https://www.youtube.com/c/TomScottGo*.
|
||||
- Add a playlist ID or URL to add every available video in the list to the download queue, for example *https://www.youtube.com/playlist?list=PL96C35uN7xGLLeET0dOWaKHkAlPsrkcha* or *PL96C35uN7xGLLeET0dOWaKHkAlPsrkcha*. Note that when you add a link to a video in a playlist, Tube Archivist assumes you want to download only the specific video and not the whole playlist, for example *https://www.youtube.com/watch?v=CINVwWHlzTY&list=PL96C35uN7xGLLeET0dOWaKHkAlPsrkcha* will only add one video *CINVwWHlzTY* to the queue.
|
||||
- Add one link per line.
|
||||
- **Note**: Adding a link to a YouTube channel name is not yet supported, for example: *https://www.youtube.com/c/TomScottGo* will fail.
|
||||
|
||||
## The Download Queue
|
||||
Below the three buttons you find the download queue. New items will get added at the bottom of the queue, the next video to download once you click on **Start Download** will be the first in the list.
|
||||
|
@ -9,9 +9,11 @@ import re
|
||||
import string
|
||||
import subprocess
|
||||
import unicodedata
|
||||
from urllib.parse import parse_qs, urlparse
|
||||
|
||||
import redis
|
||||
import requests
|
||||
import yt_dlp as youtube_dl
|
||||
|
||||
|
||||
def get_total_hits(index, es_url, es_auth, match_field):
|
||||
@ -51,34 +53,91 @@ def ignore_filelist(filelist):
|
||||
return cleaned
|
||||
|
||||
|
||||
def process_url_list(url_str):
|
||||
"""parse url_list to find valid youtube video or channel ids"""
|
||||
to_replace = ["watch?v=", "playlist?list="]
|
||||
url_list = re.split("\n+", url_str[0])
|
||||
class UrlListParser:
|
||||
"""take a multi line string and detect valid youtube ids"""
|
||||
|
||||
def __init__(self, url_str):
|
||||
self.url_list = [i.strip() for i in url_str.split()]
|
||||
|
||||
def process_list(self):
|
||||
"""loop through the list"""
|
||||
youtube_ids = []
|
||||
for url in url_list:
|
||||
if "/c/" in url or "/user/" in url:
|
||||
raise ValueError("user name is not unique, use channel ID")
|
||||
|
||||
url_clean = url.strip().strip("/").split("/")[-1]
|
||||
for i in to_replace:
|
||||
url_clean = url_clean.replace(i, "")
|
||||
url_no_param = url_clean.split("&")[0]
|
||||
str_len = len(url_no_param)
|
||||
if str_len == 11:
|
||||
link_type = "video"
|
||||
elif str_len == 24:
|
||||
link_type = "channel"
|
||||
elif str_len == 34:
|
||||
link_type = "playlist"
|
||||
for url in self.url_list:
|
||||
parsed = urlparse(url)
|
||||
print(f"processing: {url}")
|
||||
print(parsed)
|
||||
if not parsed.netloc:
|
||||
# is not a url
|
||||
id_type = self.find_valid_id(url)
|
||||
youtube_id = url
|
||||
elif parsed.path:
|
||||
# is a url
|
||||
youtube_id, id_type = self.detect_from_url(parsed)
|
||||
else:
|
||||
# unable to parse
|
||||
raise ValueError("not a valid url: " + url)
|
||||
# not detected
|
||||
raise ValueError(f"failed to detect {url}")
|
||||
|
||||
youtube_ids.append({"url": url_no_param, "type": link_type})
|
||||
youtube_ids.append({"url": youtube_id, "type": id_type})
|
||||
|
||||
return youtube_ids
|
||||
|
||||
def detect_from_url(self, parsed):
|
||||
"""detect from parsed url"""
|
||||
if parsed.netloc == "youtu.be":
|
||||
# shortened
|
||||
youtube_id = parsed.path.strip("/")
|
||||
return youtube_id, "video"
|
||||
|
||||
if parsed.query:
|
||||
# detect from query string
|
||||
query_parsed = parse_qs(parsed.query)
|
||||
if "v" in query_parsed.keys():
|
||||
youtube_id = query_parsed["v"][0]
|
||||
return youtube_id, "video"
|
||||
|
||||
if "list" in query_parsed.keys():
|
||||
youtube_id = query_parsed["list"][0]
|
||||
return youtube_id, "playlist"
|
||||
|
||||
if parsed.path.startswith("/channel/"):
|
||||
# channel id in url
|
||||
youtube_id = parsed.path.split("/")[2]
|
||||
return youtube_id, "channel"
|
||||
|
||||
# dedect channel with yt_dlp
|
||||
youtube_id = self.extract_channel_name(parsed.geturl())
|
||||
return youtube_id, "channel"
|
||||
|
||||
@staticmethod
|
||||
def find_valid_id(id_str):
|
||||
"""dedect valid id from length of string"""
|
||||
str_len = len(id_str)
|
||||
if str_len == 11:
|
||||
id_type = "video"
|
||||
elif str_len == 24:
|
||||
id_type = "channel"
|
||||
elif str_len == 34:
|
||||
id_type = "playlist"
|
||||
else:
|
||||
# unable to parse
|
||||
raise ValueError("not a valid id_str: " + id_str)
|
||||
|
||||
return id_type
|
||||
|
||||
@staticmethod
|
||||
def extract_channel_name(url):
|
||||
"""find channel id from channel name with yt-dlp help"""
|
||||
obs = {
|
||||
"default_search": "ytsearch",
|
||||
"quiet": True,
|
||||
"skip_download": True,
|
||||
"extract_flat": True,
|
||||
"playlistend": 0,
|
||||
}
|
||||
url_info = youtube_dl.YoutubeDL(obs).extract_info(url, download=False)
|
||||
channel_id = url_info["channel_id"]
|
||||
return channel_id
|
||||
|
||||
|
||||
class RedisArchivist:
|
||||
"""collection of methods to interact with redis"""
|
||||
|
@ -15,7 +15,7 @@ import requests
|
||||
import yt_dlp as youtube_dl
|
||||
from bs4 import BeautifulSoup
|
||||
from home.src.config import AppConfig
|
||||
from home.src.helper import DurationConverter, clean_string, process_url_list
|
||||
from home.src.helper import DurationConverter, UrlListParser, clean_string
|
||||
from home.src.thumbnails import ThumbManager
|
||||
|
||||
|
||||
@ -446,9 +446,8 @@ class WatchState:
|
||||
|
||||
def dedect_type(self):
|
||||
"""find youtube id type"""
|
||||
url_process = process_url_list([self.youtube_id])
|
||||
url_process = UrlListParser(self.youtube_id).process_list()
|
||||
url_type = url_process[0]["type"]
|
||||
|
||||
return url_type
|
||||
|
||||
def mark_vid_watched(self, revert=False):
|
||||
|
@ -26,7 +26,7 @@ from home.forms import (
|
||||
)
|
||||
from home.src.config import AppConfig
|
||||
from home.src.download import ChannelSubscription, PendingList
|
||||
from home.src.helper import RedisArchivist, RedisQueue, process_url_list
|
||||
from home.src.helper import RedisArchivist, RedisQueue, UrlListParser
|
||||
from home.src.index import WatchState, YoutubeChannel, YoutubeVideo
|
||||
from home.src.searching import Pagination, SearchForm, SearchHandler
|
||||
from home.tasks import (
|
||||
@ -303,13 +303,13 @@ class DownloadView(View):
|
||||
"""handle post requests"""
|
||||
to_queue = AddToQueueForm(data=request.POST)
|
||||
if to_queue.is_valid():
|
||||
vid_url_list = [request.POST.get("vid_url")]
|
||||
print(vid_url_list)
|
||||
url_str = request.POST.get("vid_url")
|
||||
print(url_str)
|
||||
try:
|
||||
youtube_ids = process_url_list(vid_url_list)
|
||||
youtube_ids = UrlListParser(url_str).process_list()
|
||||
except ValueError:
|
||||
# failed to process
|
||||
print(f"failed to parse: {vid_url_list}")
|
||||
print(f"failed to parse: {url_str}")
|
||||
mess_dict = {
|
||||
"status": "downloading",
|
||||
"level": "error",
|
||||
@ -521,8 +521,8 @@ class ChannelView(View):
|
||||
"""handle http post requests"""
|
||||
subscribe_form = SubscribeToChannelForm(data=request.POST)
|
||||
if subscribe_form.is_valid():
|
||||
vid_url_list = [request.POST.get("subscribe")]
|
||||
youtube_ids = process_url_list(vid_url_list)
|
||||
url_str = request.POST.get("subscribe")
|
||||
youtube_ids = UrlListParser(url_str).process_list()
|
||||
print(youtube_ids)
|
||||
subscribe_to.delay(youtube_ids)
|
||||
|
||||
@ -819,7 +819,7 @@ class PostData:
|
||||
youtube_id = self.exec_val
|
||||
print("add vid to dl queue: " + youtube_id)
|
||||
PendingList().delete_from_pending(youtube_id)
|
||||
youtube_ids = process_url_list([youtube_id])
|
||||
youtube_ids = UrlListParser(youtube_id).process_list()
|
||||
extrac_dl.delay(youtube_ids)
|
||||
return {"success": True}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user