mirror of
https://github.com/tubearchivist/tubearchivist-frontend.git
synced 2024-11-25 13:10:15 +00:00
add random headers for requests outside of yt-dlp
This commit is contained in:
parent
6d874f4b7a
commit
fcadb5ead8
@ -17,7 +17,7 @@ from home.src.download.thumbnails import ThumbManager
|
||||
from home.src.es.connect import ElasticWrap, IndexPaginate
|
||||
from home.src.index.generic import YouTubeItem
|
||||
from home.src.index.playlist import YoutubePlaylist
|
||||
from home.src.ta.helper import clean_string
|
||||
from home.src.ta.helper import clean_string, requests_headers
|
||||
from home.src.ta.ta_redis import RedisArchivist
|
||||
|
||||
|
||||
@ -46,7 +46,9 @@ class ChannelScraper:
|
||||
print(f"{self.channel_id}: scrape channel data from youtube")
|
||||
url = f"https://www.youtube.com/channel/{self.channel_id}/about?hl=en"
|
||||
cookies = {"CONSENT": "YES+xxxxxxxxxxxxxxxxxxxxxxxxxxx"}
|
||||
response = requests.get(url, cookies=cookies)
|
||||
response = requests.get(
|
||||
url, cookies=cookies, headers=requests_headers()
|
||||
)
|
||||
if response.ok:
|
||||
channel_page = response.text
|
||||
else:
|
||||
|
@ -12,7 +12,11 @@ import requests
|
||||
from home.src.es.connect import ElasticWrap
|
||||
from home.src.index import channel as ta_channel
|
||||
from home.src.index.generic import YouTubeItem
|
||||
from home.src.ta.helper import DurationConverter, clean_string
|
||||
from home.src.ta.helper import (
|
||||
DurationConverter,
|
||||
clean_string,
|
||||
requests_headers,
|
||||
)
|
||||
from ryd_client import ryd_client
|
||||
|
||||
|
||||
@ -115,9 +119,12 @@ class YoutubeSubtitle:
|
||||
dest_path = os.path.join(videos_base, subtitle["media_url"])
|
||||
source = subtitle["source"]
|
||||
lang = subtitle.get("lang")
|
||||
response = requests.get(subtitle["url"])
|
||||
response = requests.get(
|
||||
subtitle["url"], headers=requests_headers()
|
||||
)
|
||||
if not response.ok:
|
||||
print(f"{self.video.youtube_id}: failed to download subtitle")
|
||||
print(response.text)
|
||||
continue
|
||||
|
||||
parser = SubtitleParser(response.text, lang, source)
|
||||
|
@ -3,6 +3,7 @@ Loose collection of helper functions
|
||||
- don't import AppConfig class here to avoid circular imports
|
||||
"""
|
||||
|
||||
import random
|
||||
import re
|
||||
import string
|
||||
import subprocess
|
||||
@ -35,6 +36,58 @@ def ignore_filelist(filelist):
|
||||
return cleaned
|
||||
|
||||
|
||||
def requests_headers():
|
||||
"""build header with random user agent for requests outside of yt-dlp"""
|
||||
|
||||
chrome_versions = (
|
||||
"90.0.4430.212",
|
||||
"90.0.4430.24",
|
||||
"90.0.4430.70",
|
||||
"90.0.4430.72",
|
||||
"90.0.4430.85",
|
||||
"90.0.4430.93",
|
||||
"91.0.4472.101",
|
||||
"91.0.4472.106",
|
||||
"91.0.4472.114",
|
||||
"91.0.4472.124",
|
||||
"91.0.4472.164",
|
||||
"91.0.4472.19",
|
||||
"91.0.4472.77",
|
||||
"92.0.4515.107",
|
||||
"92.0.4515.115",
|
||||
"92.0.4515.131",
|
||||
"92.0.4515.159",
|
||||
"92.0.4515.43",
|
||||
"93.0.4556.0",
|
||||
"93.0.4577.15",
|
||||
"93.0.4577.63",
|
||||
"93.0.4577.82",
|
||||
"94.0.4606.41",
|
||||
"94.0.4606.54",
|
||||
"94.0.4606.61",
|
||||
"94.0.4606.71",
|
||||
"94.0.4606.81",
|
||||
"94.0.4606.85",
|
||||
"95.0.4638.17",
|
||||
"95.0.4638.50",
|
||||
"95.0.4638.54",
|
||||
"95.0.4638.69",
|
||||
"95.0.4638.74",
|
||||
"96.0.4664.18",
|
||||
"96.0.4664.45",
|
||||
"96.0.4664.55",
|
||||
"96.0.4664.93",
|
||||
"97.0.4692.20",
|
||||
)
|
||||
template = (
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||
+ f"Chrome/{random.choice(chrome_versions)} Safari/537.36"
|
||||
)
|
||||
|
||||
return {"User-Agent": template}
|
||||
|
||||
|
||||
class UrlListParser:
|
||||
"""take a multi line string and detect valid youtube ids"""
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user