add random headers for requests outside of yt-dlp

This commit is contained in:
simon 2022-03-26 11:49:53 +07:00
parent 6d874f4b7a
commit fcadb5ead8
No known key found for this signature in database
GPG Key ID: 2C15AA5E89985DD4
3 changed files with 66 additions and 4 deletions

View File

@ -17,7 +17,7 @@ from home.src.download.thumbnails import ThumbManager
from home.src.es.connect import ElasticWrap, IndexPaginate from home.src.es.connect import ElasticWrap, IndexPaginate
from home.src.index.generic import YouTubeItem from home.src.index.generic import YouTubeItem
from home.src.index.playlist import YoutubePlaylist from home.src.index.playlist import YoutubePlaylist
from home.src.ta.helper import clean_string from home.src.ta.helper import clean_string, requests_headers
from home.src.ta.ta_redis import RedisArchivist from home.src.ta.ta_redis import RedisArchivist
@ -46,7 +46,9 @@ class ChannelScraper:
print(f"{self.channel_id}: scrape channel data from youtube") print(f"{self.channel_id}: scrape channel data from youtube")
url = f"https://www.youtube.com/channel/{self.channel_id}/about?hl=en" url = f"https://www.youtube.com/channel/{self.channel_id}/about?hl=en"
cookies = {"CONSENT": "YES+xxxxxxxxxxxxxxxxxxxxxxxxxxx"} cookies = {"CONSENT": "YES+xxxxxxxxxxxxxxxxxxxxxxxxxxx"}
response = requests.get(url, cookies=cookies) response = requests.get(
url, cookies=cookies, headers=requests_headers()
)
if response.ok: if response.ok:
channel_page = response.text channel_page = response.text
else: else:

View File

@ -12,7 +12,11 @@ import requests
from home.src.es.connect import ElasticWrap from home.src.es.connect import ElasticWrap
from home.src.index import channel as ta_channel from home.src.index import channel as ta_channel
from home.src.index.generic import YouTubeItem from home.src.index.generic import YouTubeItem
from home.src.ta.helper import DurationConverter, clean_string from home.src.ta.helper import (
DurationConverter,
clean_string,
requests_headers,
)
from ryd_client import ryd_client from ryd_client import ryd_client
@ -115,9 +119,12 @@ class YoutubeSubtitle:
dest_path = os.path.join(videos_base, subtitle["media_url"]) dest_path = os.path.join(videos_base, subtitle["media_url"])
source = subtitle["source"] source = subtitle["source"]
lang = subtitle.get("lang") lang = subtitle.get("lang")
response = requests.get(subtitle["url"]) response = requests.get(
subtitle["url"], headers=requests_headers()
)
if not response.ok: if not response.ok:
print(f"{self.video.youtube_id}: failed to download subtitle") print(f"{self.video.youtube_id}: failed to download subtitle")
print(response.text)
continue continue
parser = SubtitleParser(response.text, lang, source) parser = SubtitleParser(response.text, lang, source)

View File

@ -3,6 +3,7 @@ Loose collection of helper functions
- don't import AppConfig class here to avoid circular imports - don't import AppConfig class here to avoid circular imports
""" """
import random
import re import re
import string import string
import subprocess import subprocess
@ -35,6 +36,58 @@ def ignore_filelist(filelist):
return cleaned return cleaned
def requests_headers():
"""build header with random user agent for requests outside of yt-dlp"""
chrome_versions = (
"90.0.4430.212",
"90.0.4430.24",
"90.0.4430.70",
"90.0.4430.72",
"90.0.4430.85",
"90.0.4430.93",
"91.0.4472.101",
"91.0.4472.106",
"91.0.4472.114",
"91.0.4472.124",
"91.0.4472.164",
"91.0.4472.19",
"91.0.4472.77",
"92.0.4515.107",
"92.0.4515.115",
"92.0.4515.131",
"92.0.4515.159",
"92.0.4515.43",
"93.0.4556.0",
"93.0.4577.15",
"93.0.4577.63",
"93.0.4577.82",
"94.0.4606.41",
"94.0.4606.54",
"94.0.4606.61",
"94.0.4606.71",
"94.0.4606.81",
"94.0.4606.85",
"95.0.4638.17",
"95.0.4638.50",
"95.0.4638.54",
"95.0.4638.69",
"95.0.4638.74",
"96.0.4664.18",
"96.0.4664.45",
"96.0.4664.55",
"96.0.4664.93",
"97.0.4692.20",
)
template = (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
+ f"Chrome/{random.choice(chrome_versions)} Safari/537.36"
)
return {"User-Agent": template}
class UrlListParser: class UrlListParser:
"""take a multi line string and detect valid youtube ids""" """take a multi line string and detect valid youtube ids"""