西瓜视频解析分析
复制
# -*- coding: UTF-8 -*- import json import requests import base64 def xigua(url): headers={ "cookie": "ixigua-a-s=0; __ac_nonce=062711eef0074c53fd301; " "__ac_signature=_02B4Z6wo00f01ixJMjwAAIDDB8Np-ahvLqYsaTaAAOlxwFWsX1Khk-KvFlM1N2JBoEUa-eVrLxEyF" ".bLdz61qjeQ-CTdvApqa7NKn7HTvbu1Nktww-QaXyO3gNQSkIdrfp3XdUkGrdCwrQKpea; " "MONITOR_WEB_ID=6364cae8-2aa7-4531-9a30-1b98b0ee7596; " "ttwid=1%7CTOw3V-1qPgQfcCT4S0Hhey26Q4PJq9J6nGaLq6GMe14%7C1651580656" "%7C48653bc0c5afd31f1a919f9bd17a80173a1ea69d96b23359b103c4f078ef969b; support_webp=true; " "support_avif=true; _tea_utm_cache_1300=undefined", "Referer": "https://www.ixigua.com", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) " "Chrome/86.0.4240.75 Safari/537.36" } resp=requests.get(url=url,headers=headers) resp.encoding='utf-8' if resp.status_code==200: res_html=resp.text if 'pseries_more_v2' not in url: json_str=re.findall('window._SSR_HYDRATED_DATA=(.*?)</script>', res_html)[0] json_str=json_str.replace('undefined', 'null') json_data=json.loads(json_str) video_url=\ json_data['anyVideo']['gidInformation']['packerData']['video']['videoResource']['dash'][ 'dynamic_video'][ 'dynamic_video_list'][-1]['main_url'] audio_url=\ json_data['anyVideo']['gidInformation']['packerData']['video']['videoResource']['dash'][ 'dynamic_video'][ 'dynamic_audio_list'][-1]['main_url'] title=json_data['anyVideo']['gidInformation']['packerData']['video']['title'] title=re.sub(r"[/\\\:\*\?"\<\>\|]", "_", title).replace(" ", "") video_url=base64.b64decode(video_url).decode() audio_url=base64.b64decode(audio_url).decode() print(video_url) print(audio_url) print(title) else: json_data=json.loads(res_html) for item in json_data['data']: title=item['title'] # backup_url_1 video_url=item['preloadVideoResource']['dynamic_video']['dynamic_video_list'][-1][ 'backup_url_1'] audio_url=item['preloadVideoResource']['dynamic_video']['dynamic_audio_list'][-1][ 'backup_url_1'] video_url=base64.b64decode(video_url).decode() audio_url=base64.b64decode(audio_url).decode() title=re.sub(r"[/\\\:\*\?"\<\>\|]", "_", title).replace(" ", "") print(video_url) print(audio_url) print(title) xigua(url)