little_C's nest

化兴趣为生活

捕获b站弹幕,溯源用户


根据网络查找,和结合copilot,找到能够查出发出弹幕的用户id的方法,现如题给出代码:

import re
import requests
from bs4 import BeautifulSoup
import xml.etree.ElementTree as ET
import binascii
from tqdm import tqdm
def crc32asii(v):
    """
    计算给定字符串的crc32哈希值并返回十六进制字符串。
    """
    return '%x' % (binascii.crc32(v) & 0xffffffff)
def findUid2(toBeFind, begin=500000001, end=900000000):
    """
    通过穷举法逆向用户的哈希值。
    :param toBeFind: 要查找的哈希值
    :param begin: 开始的UID
    :param end: 结束的UID
    :return: 找到的UID列表
    """
    ans = []
    print("逆向中……")
    for i in tqdm(range(begin, end)):
        c = crc32asii(bytes(str(i), encoding='utf-8'))
        if toBeFind == c:
            ans.append(i)
            print("找到:" + str(i))
            if input("极小可能哈希重值,是否继续? y/n :") != 'y':
                break
    return ans
def findUid1(toBeFind, begin=1, end=500000000):
    """
    通过穷举法逆向用户的哈希值。
    :param toBeFind: 要查找的哈希值
    :param begin: 开始的UID
    :param end: 结束的UID
    :return: 找到的UID列表
    """
    ans = []
    print("逆向中……")
    for i in tqdm(range(begin, end)):
        c = crc32asii(bytes(str(i), encoding='utf-8'))
        if toBeFind == c:
            ans.append(i)
            print("找到:" + str(i))
            if input("极小可能哈希重值,是否继续? y/n :") != 'y':
                break
    return ans

name = input("请给出bv号")
url = "https://www.bilibili.com/video/" + name
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 Edg/122.0.0.0"
}
response = requests.get(url, headers=headers)
html_content=response.text
final=re.search(r'"cid":(\d+)',html_content)
#final = final.group(0).split(':')[1]
final = final.group(1)
url = r"https://comment.bilibili.com/"+str(final)+r".xml"
response = requests.get(url, headers=headers)
response.encoding=response.apparent_encoding
root = ET.fromstring(response.text)
d_elements_with_p = root.findall(".//d[@p]")

# Iterate over the elements and print their 'p' attribute and text content
extracted_content = []
for elem in d_elements_with_p:
   p_values=elem.attrib['p'].split(',')
   time_post = float(p_values[0])
   mode=int(p_values[1])
   font_size = int(p_values[2])
   color=int(p_values[3])
   id=int(p_values[4])
   pool = int(p_values[5])
   user_hash = p_values[6]
   database_id = int(p_values[7])
   type_id = int(p_values[8])
   print("时间:"+str(time_post)+"s 模式: "+str(mode)+"  字体大小:"+str(font_size)+"  颜色: "+str(color)+" id:"+str(id))
   print("用户哈希:" +str(user_hash)+" 数据库id:"+str(database_id)+" 类型id:"+str(type_id))
   print("内容:"+str(elem.text)+"\n")
print("是否查找用户,根据用户哈希值,y/n")
choice = input()
if choice=="y":
    yourHash = input("给出哈希值")  # 这里替换成您自己的哈希值
    ans1 = findUid1(yourHash)
    ans2 = findUid2(yourHash)
    if ans1:
        print("uid = " +str(ans1))
    else:
        print("uid = " +str(ans2))
else:
    print("结束")

有不清楚的内容或者建议可以在评论中打出

《“捕获b站弹幕,溯源用户”》 有 1 条评论

  1. 头像
    764742096

    看不懂,谢谢

    1

发表回复

您的邮箱地址不会被公开。 必填项已用 * 标注


little_C's nest