0%

py爬虫_python考试网站

基于 requests 的 HTTP 协议逆向方案,直接调用后台接口。

1. 数据加载模块 (data_loader.py)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import json
from typing import List, Dict, Any

class DataLoader:
"""
负责从文件加载和验证数据 [cite: 137]
"""
def __init__(self, file_path: str):
self.file_path = file_path

def load_data(self) -> List[Dict[str, Any]]:
"""
加载并返回原始数据列表 [cite: 137]
"""
try:
with open(self.file_path, 'r', encoding='utf-8') as f:
return json.load(f)
except FileNotFoundError:
print(f"❌ 错误:文件未找到,请确保 '{self.file_path}' 存在。")
return []
except json.JSONDecodeError:
print(f"❌ 错误:'{self.file_path}' 文件不是有效的 JSON 格式。")
return []
except Exception as e:
print(f"❌ 错误:加载文件时发生未知错误: {e}")
return []

def filter_valid_questions(self, data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""
过滤出状态为 200 且包含详情的有效题目 [cite: 139]
"""
return [
item for item in data
if item.get('status') == 200 and item.get('detail')
]

2. 业务逻辑模块 (question_service.py)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
from typing import List, Dict, Any

class QuestionService:
"""
提供题目查询和格式化服务 [cite: 153]
"""

def __init__(self, questions: List[Dict[str, Any]]):
self.questions = questions

def format_question_output(self, q_data: Dict[str, Any], show_answer: bool = False) -> str:
"""格式化单个题目输出 [cite: 153]"""
question = q_data['detail']['question']
knowledge = q_data['detail'].get('knowledgePoint', {})
test_cases = q_data['detail'].get('questionTestList', [])

output = [
"=" * 60,
f"ID: {question['id']}",
f"标题: {question['title']}",
f"类型: {question['type']} | 难度: {question['difficulty']}",
f"知识点: {knowledge.get('name', 'N/A')}",
"-" * 60,
"📝 题目内容:",
question['content'].strip(),
f"\n测试用例 ({len(test_cases)}个):"
]

for idx, case in enumerate(test_cases[:3]):
output.append(f" - 输入: {case.get('input', '无')} | 预期输出: {case.get('output', '无')}")

if show_answer:
output.append("\n💡 标准答案:")
output.append(question['answer'].strip())

output.append("=" * 60)
return "\n".join(output)

def find_by_id(self, question_id: str) -> Dict[str, Any]:
for item in self.questions:
if item['id'] == question_id:
return item
return None

def find_by_type(self, q_type: str) -> List[Dict[str, Any]]:
return [
item for item in self.questions
if item['detail']['question']['type'] == q_type
]

def find_by_difficulty(self, difficulty: str) -> List[Dict[str, Any]]:
return [
item for item in self.questions
if item['detail']['question']['difficulty'] == difficulty
]

def search_by_keyword(self, keyword: str) -> List[Dict[str, Any]]:
keyword = keyword.lower()
results = []
for item in self.questions:
question = item['detail']['question']
title = question['title'].lower()
content = question['content'].lower()
if keyword in title or keyword in content:
results.append(item)
return results

def get_statistics(self) -> tuple:
"""返回(类型集合, 难度集合) [cite: 159]"""
types = set()
difficulties = set()
for item in self.questions:
question = item['detail']['question']
types.add(question['type'])
difficulties.add(question['difficulty'])
return sorted(list(types)), sorted(list(difficulties))

3. 主程序入口 (main.py)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
from data_loader import DataLoader
from question_service import QuestionService

class QuestionAnalyzerApp:
def __init__(self, file_path: str):
# 1. 加载数据
loader = DataLoader(file_path)
raw_data = loader.load_data()
valid_questions = loader.filter_valid_questions(raw_data)

print(f"✅ 数据加载成功。找到 {len(raw_data)} 条记录,其中 {len(valid_questions)} 条有效。")

# 2. 初始化服务
self.service = QuestionService(valid_questions)

def run(self):
if not self.service.questions:
print("🛑 数据集为空,程序退出。")
return

while True:
self._print_menu()
choice = input("请输入操作编号 (1-6): ").strip()

if choice == '1':
self._handle_find_by_id()
elif choice == '2':
self._handle_statistics()
elif choice == '3':
self._handle_find_by_type()
elif choice == '4':
self._handle_find_by_difficulty()
elif choice == '5':
self._handle_search()
elif choice == '6':
print("👋 感谢使用,程序已退出。")
break
else:
print("⚠️ 无效的输入,请重新输入编号 (1-6)。")

input("\n按 Enter 键继续...")

def _print_menu(self):
print("\n" + "=" * 50)
print("📚 题目数据分析器 v3.0 (Modular) - 请选择操作:")
print("=" * 50)
print("1. 🔍 按 ID 查找题目和答案")
print("2. 📊 统计与列出所有类型/难度")
print("3. 🔢 按题目类型查找 (预览)")
print("4. ⭐ 按题目难度查找 (预览)")
print("5. 🔎 关键字搜索 (标题/内容)")
print("6. ❌ 退出程序")
print("-" * 50)

def _handle_find_by_id(self):
q_id = input("请输入要查找的题目 ID: ").strip()
show_ans = input("是否显示标准答案? (y/n): ").lower() == 'y'
item = self.service.find_by_id(q_id)
if item:
print(f"\n🔍 找到题目 (ID: {q_id}):")
print(self.service.format_question_output(item, show_ans))
else:
print(f"❌ 未找到 ID 为 '{q_id}' 的题目。")

def _handle_statistics(self):
types, difficulties = self.service.get_statistics()
print("\n📊 数据集统计:")
print(f" - 所有题目类型: {', '.join(types)}")
print(f" - 所有题目难度: {', '.join(difficulties)}")

def _handle_find_by_type(self):
q_type = input("请输入要查找的题目类型: ").strip()
self._search_and_print(self.service.find_by_type, q_type, f"类型为 '{q_type}'")

def _handle_find_by_difficulty(self):
difficulty = input("请输入要查找的题目难度: ").strip()
self._search_and_print(self.service.find_by_difficulty, difficulty, f"难度为 '{difficulty}'")

def _handle_search(self):
keyword = input("请输入要搜索的关键字: ").strip()
self._search_and_print(self.service.search_by_keyword, keyword, f"包含关键字 '{keyword}'")

def _search_and_print(self, func, arg, desc):
limit = input("请输入限制显示的数量 (默认5): ")
limit = int(limit) if limit.isdigit() else 5

results = func(arg)
print(f"\n🔍 找到 {len(results)} 个{desc}的题目。显示前 {min(len(results), limit)} 个。")

for item in results[:limit]:
print(self.service.format_question_output(item, show_answer=False))

if __name__ == "__main__":
app = QuestionAnalyzerApp('ans.json')
app.run()