mirror of
https://github.com/tubearchivist/tubearchivist-frontend.git
synced 2024-11-25 13:10:15 +00:00
add random headers for requests outside of yt-dlp
This commit is contained in:
parent
6d874f4b7a
commit
fcadb5ead8
@ -17,7 +17,7 @@ from home.src.download.thumbnails import ThumbManager
|
|||||||
from home.src.es.connect import ElasticWrap, IndexPaginate
|
from home.src.es.connect import ElasticWrap, IndexPaginate
|
||||||
from home.src.index.generic import YouTubeItem
|
from home.src.index.generic import YouTubeItem
|
||||||
from home.src.index.playlist import YoutubePlaylist
|
from home.src.index.playlist import YoutubePlaylist
|
||||||
from home.src.ta.helper import clean_string
|
from home.src.ta.helper import clean_string, requests_headers
|
||||||
from home.src.ta.ta_redis import RedisArchivist
|
from home.src.ta.ta_redis import RedisArchivist
|
||||||
|
|
||||||
|
|
||||||
@ -46,7 +46,9 @@ class ChannelScraper:
|
|||||||
print(f"{self.channel_id}: scrape channel data from youtube")
|
print(f"{self.channel_id}: scrape channel data from youtube")
|
||||||
url = f"https://www.youtube.com/channel/{self.channel_id}/about?hl=en"
|
url = f"https://www.youtube.com/channel/{self.channel_id}/about?hl=en"
|
||||||
cookies = {"CONSENT": "YES+xxxxxxxxxxxxxxxxxxxxxxxxxxx"}
|
cookies = {"CONSENT": "YES+xxxxxxxxxxxxxxxxxxxxxxxxxxx"}
|
||||||
response = requests.get(url, cookies=cookies)
|
response = requests.get(
|
||||||
|
url, cookies=cookies, headers=requests_headers()
|
||||||
|
)
|
||||||
if response.ok:
|
if response.ok:
|
||||||
channel_page = response.text
|
channel_page = response.text
|
||||||
else:
|
else:
|
||||||
|
@ -12,7 +12,11 @@ import requests
|
|||||||
from home.src.es.connect import ElasticWrap
|
from home.src.es.connect import ElasticWrap
|
||||||
from home.src.index import channel as ta_channel
|
from home.src.index import channel as ta_channel
|
||||||
from home.src.index.generic import YouTubeItem
|
from home.src.index.generic import YouTubeItem
|
||||||
from home.src.ta.helper import DurationConverter, clean_string
|
from home.src.ta.helper import (
|
||||||
|
DurationConverter,
|
||||||
|
clean_string,
|
||||||
|
requests_headers,
|
||||||
|
)
|
||||||
from ryd_client import ryd_client
|
from ryd_client import ryd_client
|
||||||
|
|
||||||
|
|
||||||
@ -115,9 +119,12 @@ class YoutubeSubtitle:
|
|||||||
dest_path = os.path.join(videos_base, subtitle["media_url"])
|
dest_path = os.path.join(videos_base, subtitle["media_url"])
|
||||||
source = subtitle["source"]
|
source = subtitle["source"]
|
||||||
lang = subtitle.get("lang")
|
lang = subtitle.get("lang")
|
||||||
response = requests.get(subtitle["url"])
|
response = requests.get(
|
||||||
|
subtitle["url"], headers=requests_headers()
|
||||||
|
)
|
||||||
if not response.ok:
|
if not response.ok:
|
||||||
print(f"{self.video.youtube_id}: failed to download subtitle")
|
print(f"{self.video.youtube_id}: failed to download subtitle")
|
||||||
|
print(response.text)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
parser = SubtitleParser(response.text, lang, source)
|
parser = SubtitleParser(response.text, lang, source)
|
||||||
|
@ -3,6 +3,7 @@ Loose collection of helper functions
|
|||||||
- don't import AppConfig class here to avoid circular imports
|
- don't import AppConfig class here to avoid circular imports
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import random
|
||||||
import re
|
import re
|
||||||
import string
|
import string
|
||||||
import subprocess
|
import subprocess
|
||||||
@ -35,6 +36,58 @@ def ignore_filelist(filelist):
|
|||||||
return cleaned
|
return cleaned
|
||||||
|
|
||||||
|
|
||||||
|
def requests_headers():
|
||||||
|
"""build header with random user agent for requests outside of yt-dlp"""
|
||||||
|
|
||||||
|
chrome_versions = (
|
||||||
|
"90.0.4430.212",
|
||||||
|
"90.0.4430.24",
|
||||||
|
"90.0.4430.70",
|
||||||
|
"90.0.4430.72",
|
||||||
|
"90.0.4430.85",
|
||||||
|
"90.0.4430.93",
|
||||||
|
"91.0.4472.101",
|
||||||
|
"91.0.4472.106",
|
||||||
|
"91.0.4472.114",
|
||||||
|
"91.0.4472.124",
|
||||||
|
"91.0.4472.164",
|
||||||
|
"91.0.4472.19",
|
||||||
|
"91.0.4472.77",
|
||||||
|
"92.0.4515.107",
|
||||||
|
"92.0.4515.115",
|
||||||
|
"92.0.4515.131",
|
||||||
|
"92.0.4515.159",
|
||||||
|
"92.0.4515.43",
|
||||||
|
"93.0.4556.0",
|
||||||
|
"93.0.4577.15",
|
||||||
|
"93.0.4577.63",
|
||||||
|
"93.0.4577.82",
|
||||||
|
"94.0.4606.41",
|
||||||
|
"94.0.4606.54",
|
||||||
|
"94.0.4606.61",
|
||||||
|
"94.0.4606.71",
|
||||||
|
"94.0.4606.81",
|
||||||
|
"94.0.4606.85",
|
||||||
|
"95.0.4638.17",
|
||||||
|
"95.0.4638.50",
|
||||||
|
"95.0.4638.54",
|
||||||
|
"95.0.4638.69",
|
||||||
|
"95.0.4638.74",
|
||||||
|
"96.0.4664.18",
|
||||||
|
"96.0.4664.45",
|
||||||
|
"96.0.4664.55",
|
||||||
|
"96.0.4664.93",
|
||||||
|
"97.0.4692.20",
|
||||||
|
)
|
||||||
|
template = (
|
||||||
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||||
|
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||||
|
+ f"Chrome/{random.choice(chrome_versions)} Safari/537.36"
|
||||||
|
)
|
||||||
|
|
||||||
|
return {"User-Agent": template}
|
||||||
|
|
||||||
|
|
||||||
class UrlListParser:
|
class UrlListParser:
|
||||||
"""take a multi line string and detect valid youtube ids"""
|
"""take a multi line string and detect valid youtube ids"""
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user