结合猫抓批量解析提取影视剧集源链

前言

本期分享chatgpt写的python脚本。功能：通过自动化操作浏览器和模拟鼠标点击，从指定的影视剧集 URL 中批量提取视频源链接，方便后续批量下载。根据输入剧集 URL 和总集数后，配合猫抓插件，程序会依次访问每个剧集页面，模拟鼠标点击并复制猫抓解析出的链接，最后将提取到的链接保存到文件中。

注意事项

浏览器类型：
- 脚本默认使用 Google Chrome。确保该浏览器已安装并能够正常运行。
浏览器安装位置：
- 脚本中使用了 Chrome 浏览器的路径 C:\Program Files\Google\Chrome\Application\chrome.exe。如果 Chrome 安装在其他路径，请相应修改脚本中 chrome_path 变量为正确的安装路径。
浏览器插件：
- 在 Chrome 浏览器中确保已安装猫抓插件。下载猫抓；蓝奏下载，提取码：5ajr
- 确保插件在脚本运行时已启用，并能够正常工作。
- 插件固定到标签栏。
运行时浏览器窗口在最前层。

准备环境：

确保已经安装了 Python 3.x。
安装脚本依赖的第三方库。在命令行中运行：
1
pip install pynput pyautogui pyperclip

脚本代码

from pynput import mouse, keyboard
import subprocess
import time
import pyautogui
import pyperclip
import re
import tkinter as tk
from tkinter import simpledialog

# 全局变量来存储鼠标点击位置
mouse_positions = []

# 鼠标点击事件处理函数
def on_click(x, y, button, pressed):
    if pressed and button == mouse.Button.left:  # 只处理左键点击
        mouse_positions.append((x, y))
        print(f"第 {len(mouse_positions)} 次点击的位置: ({x}, {y})")

        # 如果已经记录了两次点击，停止监听
        if len(mouse_positions) >= 2:
            return False  # 停止监听

    return True

# 第一步：运行鼠标点击监听器
listener = mouse.Listener(on_click=on_click)
listener.start()

print("请单击两次鼠标左键以记录位置...")

# 等待监听器结束
listener.join()

# 确保两次点击已被记录
if len(mouse_positions) < 2:
    print("未记录到足够的点击位置，程序退出。")
    exit()

# 记录的鼠标点击位置
first_click_position = mouse_positions[0]
second_click_position = mouse_positions[1]

print("记录的点击位置已应用到程序。")

# 第二步：运行带有鼠标点击位置的解析程序
def move_and_click(position):
    controller = pyautogui
    controller.moveTo(position)
    time.sleep(0.5)
    controller.click()

# 自定义弹窗类，用于获取 URL 和总集数
class CustomDialog(simpledialog.Dialog):
    def body(self, master):
        tk.Label(master, text="请输入剧集URL：").grid(row=0, column=0)
        self.url_entry = tk.Entry(master, width=50)
        self.url_entry.grid(row=0, column=1)

        tk.Label(master, text="请输入总集数：").grid(row=1, column=0)
        self.total_entry = tk.Entry(master, width=50)
        self.total_entry.grid(row=1, column=1)

        return self.url_entry  # 初始焦点

    def apply(self):
        self.url = self.url_entry.get()
        try:
            self.total_episodes = int(self.total_entry.get())
        except ValueError:
            self.total_episodes = None

def get_user_input():
    root = tk.Tk()
    root.withdraw()  # 隐藏主窗口
    dialog = CustomDialog(root, title="批量解析视频")
    if not dialog.url or dialog.total_episodes is None:
        print("输入无效，脚本退出。")
        exit()
    return dialog.url, dialog.total_episodes

chrome_path = r"C:\Program Files\Google\Chrome\Application\chrome.exe"
exit_flag = False
start_time = time.time()
links_count = 0

# 按键监听函数
def on_press(key):
    global exit_flag
    try:
        if key.char == 'q':
            exit_flag = True  # 设置退出标志
            return False  # 停止监听器
    except AttributeError:
        pass

# 创建和启动按键监听器
listener = keyboard.Listener(on_press=on_press)
listener.start()

input_url, total_episodes = get_user_input()
pattern = re.compile(r"(.*?)(\d+)(\.html)$")
match = pattern.match(input_url)
last_copied_link = input_url

if match:
    base_url_part = match.group(1)
    first_episode = int(match.group(2))
    suffix = match.group(3)
    base_url = f"{base_url_part}{{}}{suffix}"
    episode_range = range(first_episode, 1 + total_episodes)
else:
    print("输入的 URL 格式无效")
    exit()

for i in episode_range:
    if exit_flag:
        break
    url = base_url.format(i)
    process = subprocess.Popen([chrome_path, url])
    time.sleep(3)
    move_and_click(first_click_position)
    time.sleep(0.5)
    move_and_click(second_click_position)
    time.sleep(0.5)
    copied_link = pyperclip.paste()

    # 进入 while 循环，检查剪贴板内容是否更新
    retry_count = 0  # 初始化重试计数器
    refresh_count = 0  # 初始化刷新计数器
    while copied_link == last_copied_link:
        time.sleep(1)
        move_and_click(first_click_position)
        move_and_click(second_click_position)
        time.sleep(0.5)
        copied_link = pyperclip.paste()
        
        retry_count += 1  # 每次循环增加计数器
        if retry_count > 3:  # 如果重试次数超过 3 次
            if refresh_count < 3:  # 如果刷新次数未超过 3 次
                print("剪贴板内容未更新，刷新页面并重试...")
                pyautogui.hotkey('ctrl', 'r')  # 刷新页面
                time.sleep(3)  # 等待页面加载
                move_and_click(first_click_position)  # 重新点击第一个位置
                move_and_click(second_click_position)  # 重新点击第二个位置
                retry_count = 0  # 重置重试计数器
                refresh_count += 1  # 增加刷新计数器
                time.sleep(0.5)
                copied_link = pyperclip.paste()  # 重新获取剪贴板内容
            else:
                print("刷新超过 3 次，剪贴板内容仍未更新，跳过当前页面。")
                with open("copied_links.txt", "a") as file:
                    file.write(f"第 {i} 集：未提取到链接，请手动获取。\n")  # 记录提示信息
                break  # 跳出循环，进入下一个页面
        
        if exit_flag:
            break

    if copied_link != last_copied_link:
        last_copied_link = copied_link
        print(f"复制的链接: {copied_link}")
        with open("copied_links.txt", "a") as file:
            file.write(f"第 {i} 集：{copied_link}\n")  # 记录成功提取的链接
        links_count += 1
    else:
        print("剪贴板内容未更新，跳过写入。")
    
    time.sleep(0.5)
    pyautogui.hotkey('ctrl', 'w')  # 关闭当前标签页
    time.sleep(0.5)
    if exit_flag:
        break

end_time = time.time()
total_time = end_time - start_time
average_time = total_time / links_count if links_count > 0 else 0

print(f"总共提取了 {links_count} 条链接")
print(f"总耗时: {total_time:.2f} 秒")
print(f"平均每条耗时: {average_time:.2f} 秒")
input("按回车键退出程序...")