如果我想爬取商品图片，应该怎么修改代码？_淘宝下载图片的方法爬代码-CSDN博客

本文链接：https://blog.csdn.net/one6688/article/details/147740686

要爬取商品图片，你需要从 HTML 中提取图片的 URL，然后下载图片。以下是修改后的代码示例，展示如何爬取商品图片并保存到本地。

修改后的代码示例

（一）解析 HTML 内容并提取图片 URL

在 parse_product_details 函数中，添加代码来提取商品图片的 URL。

Python

from bs4 import BeautifulSoup
import requests
import os

def parse_product_details(html):
    soup = BeautifulSoup(html, 'html.parser')
    products = soup.select(".m-itemlist .items .item")
    for product in products:
        title = product.select_one(".title").get_text(strip=True)
        price = product.select_one(".price").get_text(strip=True)
        shop = product.select_one(".shop").get_text(strip=True)
        img_url = product.select_one(".pic .img")['data-src']  # 提取图片 URL
        print(f"商品名称: {title}")
        print(f"商品价格: {price}")
        print(f"店铺名称: {shop}")
        print(f"商品图片: {img_url}")
        print("------------------------")
        download_image(img_url, title)  # 下载图片

（二）下载图片

添加一个函数 download_image，用于下载图片并保存到本地。

Python

def download_image(url, title):
    if not os.path.exists('images'):
        os.makedirs('images')  # 创建保存图片的目录
    try:
        response = requests.get(url)
        if response.status_code == 200:
            # 使用商品标题作为图片文件名，替换非法字符
            filename = ''.join([c for c in title if c.isalnum() or c in (' ', '.')]).rstrip()
            with open(f'images/{filename}.jpg', 'wb') as f:
                f.write(response.content)
            print(f"图片已保存: {filename}.jpg")
    except Exception as e:
        print(f"下载图片时出错: {e}")

（三）整合代码

将上述功能整合到主程序中，实现完整的爬虫程序。

Python

import requests
from bs4 import BeautifulSoup
import os

def get_page(url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
    }
    response = requests.get(url, headers=headers)
    return response.text

def parse_product_details(html):
    soup = BeautifulSoup(html, 'html.parser')
    products = soup.select(".m-itemlist .items .item")
    for product in products:
        title = product.select_one(".title").get_text(strip=True)
        price = product.select_one(".price").get_text(strip=True)
        shop = product.select_one(".shop").get_text(strip=True)
        img_url = product.select_one(".pic .img")['data-src']  # 提取图片 URL
        print(f"商品名称: {title}")
        print(f"商品价格: {price}")
        print(f"店铺名称: {shop}")
        print(f"商品图片: {img_url}")
        print("------------------------")
        download_image(img_url, title)  # 下载图片

def download_image(url, title):
    if not os.path.exists('images'):
        os.makedirs('images')  # 创建保存图片的目录
    try:
        response = requests.get(url)
        if response.status_code == 200:
            # 使用商品标题作为图片文件名，替换非法字符
            filename = ''.join([c for c in title if c.isalnum() or c in (' ', '.')]).rstrip()
            with open(f'images/{filename}.jpg', 'wb') as f:
                f.write(response.content)
            print(f"图片已保存: {filename}.jpg")
    except Exception as e:
        print(f"下载图片时出错: {e}")

def search_products(keyword):
    url = f"https://s.taobao.com/search?q={keyword}"
    html = get_page(url)
    parse_product_details(html)

if __name__ == "__main__":
    keyword = "iPhone 13"
    search_products(keyword)