今天我们将学习Python网络编程,包括HTTP请求、API调用和简单的网络爬虫。这是Python非常重要的应用领域。
第一部分:HTTP协议基础
1.1 HTTP协议简介
- HTTP(超文本传输协议)是Web通信的基础
- 常见的HTTP方法:GET、POST、PUT、DELETE
- HTTP状态码:200(成功)、404(未找到)、500(服务器错误)等
1.2 安装必要的库
# 在命令行中安装requests库
pip install requests beautifulsoup4第二部分:使用requests库进行HTTP请求
import requests
import json
from bs4 import BeautifulSoup
import time
import os
from urllib.parse import urljoin, urlparse
import csv
class HTTPDemo:
def __init__(self):
self.session = requests.Session()
# 设置通用的请求头,模拟浏览器访问
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
def get_request_demo(self):
"""GET请求示例"""
print("=== GET请求演示 ===")
# 使用公开的测试API
url = "https://httpbin.org/get"
try:
response = self.session.get(url, headers=self.headers, params={"name": "Python", "day": 25})
print(f"状态码: {response.status_code}")
print(f"响应头: {dict(response.headers)}")
print(f"响应内容: {response.text}")
# 将响应解析为JSON
if response.status_code == 200:
data = response.json()
print(f"解析后的JSON: {json.dumps(data, indent=2, ensure_ascii=False)}")
except requests.exceptions.RequestException as e:
print(f"请求失败: {e}")
def post_request_demo(self):
"""POST请求示例"""
print("\n=== POST请求演示 ===")
url = "https://httpbin.org/post"
# 准备数据
data = {
"name": "Python学习者",
"course": "Python网络编程",
"day": 25
}
try:
response = self.session.post(url, headers=self.headers, json=data)
print(f"状态码: {response.status_code}")
if response.status_code == 200:
result = response.json()
print(f"服务器返回的数据: {json.dumps(result, indent=2, ensure_ascii=False)}")
except requests.exceptions.RequestException as e:
print(f"POST请求失败: {e}")
def download_file(self, url, filename=None):
"""下载文件示例"""
print(f"\n=== 文件下载演示 ===")
if filename is None:
filename = os.path.basename(urlparse(url).path) or "download.file"
try:
response = self.session.get(url, headers=self.headers, stream=True)
if response.status_code == 200:
# 获取文件大小
total_size = int(response.headers.get('content-length', 0))
with open(filename, 'wb') as f:
downloaded_size = 0
for chunk in response.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
downloaded_size += len(chunk)
# 显示下载进度
if total_size > 0:
percent = (downloaded_size / total_size) * 100
print(f"\r下载进度: {percent:.1f}%", end='')
print(f"\n文件已下载: {filename}")
return True
else:
print(f"下载失败,状态码: {response.status_code}")
return False
except Exception as e:
print(f"下载过程中出错: {e}")
return False
def check_website_status(self, url):
"""检查网站状态"""
try:
start_time = time.time()
response = self.session.get(url, headers=self.headers, timeout=10)
end_time = time.time()
response_time = (end_time - start_time) * 1000 # 转换为毫秒
return {
'url': url,
'status_code': response.status_code,
'response_time': f"{response_time:.2f}ms",
'content_length': len(response.content),
'server': response.headers.get('Server', 'Unknown')
}
except requests.exceptions.RequestException as e:
return {
'url': url,
'error': str(e)
}
# 运行HTTP演示
def run_http_demo():
demo = HTTPDemo()
demo.get_request_demo()
demo.post_request_demo()
# 下载一个小文件示例(使用一个公开的测试文件)
demo.download_file("https://httpbin.org/image/jpeg", "sample_image.jpg")
# 检查网站状态
websites = [
"https://www.baidu.com",
"https://www.google.com",
"https://httpbin.org/status/404",
"https://invalid-website-that-does-not-exist.com"
]
print("\n=== 网站状态检查 ===")
for website in websites:
result = demo.check_website_status(website)
if 'error' in result:
print(f"{result['url']} - 错误: {result['error']}")
else:
print(f"{result['url']} - 状态: {result['status_code']}, "
f"响应时间: {result['response_time']}, "
f"服务器: {result['server']}")
if __name__ == "__main__":
run_http_demo()