当前位置：首页 > news >正文

山西做杂粮的网站百度信息流

news 2026/4/8 11:15:43

山西做杂粮的网站,百度信息流,网络运维工程师简历怎么写,网站建设登录注册怎么做基于Python爬虫的房价可视化项目提示项目说明功能特点环境要求参数说明代码项目提示本项目仅供学习交流使用，请勿用于非法用途。使用本项目所产生的一切法律后果由使用者自行承担。项目说明这是一个用于爬取多个租房网站信息并进行数据分析可视化的Python项…

基于Python爬虫的房价可视化

项目提示
项目说明
功能特点
环境要求
参数说明
代码

项目提示

本项目仅供学习交流使用，请勿用于非法用途。使用本项目所产生的一切法律后果由使用者自行承担。

项目说明

这是一个用于爬取多个租房网站信息并进行数据分析可视化的Python项目。目前支持以下网站：

链家网
贝壳找房
58同城

功能特点

支持多个租房网站数据爬取
自动保存为CSV格式
可合并多个来源的数据
支持自定义爬取页数
支持自定义城市
提供数据可视化分析功能

环境要求

Python 3.7+
依赖包：requests, beautifulsoup4, pandas, lxml, matplotlib, seaborn, numpy, jieba, wordcloud

参数说明

--pages: 每个网站爬取的页数，默认为3
--city: 城市代码，如sz表示深圳，默认为sz

代码

贝壳：

#!/usr/bin/env python
# -*- coding: utf-8 -*-import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import random
from datetime import datetimeclass BeikeRentalScraper:def __init__(self):self.headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36','Accept-Language': 'zh-CN,zh;q=0.9',}self.base_url = 'https://sz.zu.ke.com/zufang/'  # 贝壳深圳租房self.data = []def get_page(self, url):"""获取页面内容"""try:response = requests.get(url, headers=self.headers)response.raise_for_status()return response.textexcept Exception as e:print(f"获取页面失败: {e}")return Nonedef parse_list_page(self, html):"""解析列表页面"""if not html:return []soup = BeautifulSoup(html, 'lxml')house_items = soup.select('.content__list--item')houses = []for item in house_items:try:# 跳过广告if item.get('class') and 'content__list--item--ad' in item.get('class'):continuetitle_elem = item.select_one('.content__list--item--title a')title = title_elem.text.strip()link = title_elem['href']if not link.startswith('http'):link = 'https://sz.zu.ke.com' + linkdesc = item.select_one('.content__list--item--des').text.strip().replace('\n', ' ')price_elem = item.select_one('.content__list--item-price')price = price_elem.select_one('em').text.strip() + '元/月' if price_elem else '价格未知'# 提取更多信息location = ''area = ''layout = ''desc_parts = desc.split()if len(desc_parts) >= 3:location = desc_parts[0]layout = desc_parts[1] if len(desc_parts) > 1 else ''area = desc_parts[2] if len(desc_parts) > 2 else ''houses.append({'标题': title,'链接': link,'位置': location,'户型': layout,'面积': area,'价格': price,'描述': desc})except Exception as e:print(f"解析房源信息失败: {e}")continuereturn housesdef scrape(self, pages=3):"""爬取指定页数的租房信息"""for page in range(1, pages + 1):print(f"正在爬取第 {page} 页...")url = f"{self.base_url}pg{page}/"html = self.get_page(url)houses = self.parse_list_page(html)self.data.extend(houses)# 添加随机延时，避免被封IPtime.sleep(random.uniform(2, 5))return self.datadef save_to_csv(self, filename=None):"""保存数据到CSV文件"""if not self.data:print("没有数据可保存")returnif filename is None:now = datetime.now().strftime("%Y%m%d_%H%M%S")filename = f"beike_rental_data_{now}.csv"df = pd.DataFrame(self.data)df.to_csv(filename, index=False, encoding='utf-8-sig')print(f"数据已保存到 {filename}，共 {len(self.data)} 条记录")def main():print("开始爬取贝壳找房租房信息...")scraper = BeikeRentalScraper()scraper.scrape(pages=5)  # 默认爬取20页scraper.save_to_csv("beike_rental_data.csv")print("爬取完成！")if __name__ == "__main__":main()

58同城：

#!/usr/bin/env python
# -*- coding: utf-8 -*-import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import random
import os
from datetime import datetimeclass RentalScraper:def __init__(self):self.headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36','Accept-Language': 'zh-CN,zh;q=0.9',}self.base_url = 'https://sz.lianjia.com/zufang/'  # 以链家深圳租房为例self.data = []def get_page(self, url):"""获取页面内容"""try:response = requests.get(url, headers=self.headers)response.raise_for_status()return response.textexcept Exception as e:print(f"获取页面失败: {e}")return Nonedef parse_list_page(self, html):"""解析列表页面"""if not html:return []soup = BeautifulSoup(html, 'lxml')house_items = soup.select('.content__list .content__list--item')houses = []for item in house_items:try:title = item.select_one('.content__list--item--title a').text.strip()link = item.select_one('.content__list--item--title a')['href']if not link.startswith('http'):link = 'https://sz.lianjia.com' + linkdesc = item.select_one('.content__list--item--des').text.strip().replace('\n', ' ').replace(' ', '')price = item.select_one('.content__list--item-price').text.strip()houses.append({'标题': title,'链接': link,'描述': desc,'价格': price})except Exception as e:print(f"解析房源信息失败: {e}")continuereturn housesdef scrape(self, pages=3):"""爬取指定页数的租房信息"""for page in range(1, pages + 1):print(f"正在爬取第 {page} 页...")url = f"{self.base_url}pg{page}/"html = self.get_page(url)houses = self.parse_list_page(html)self.data.extend(houses)# 添加随机延时，避免被封IPtime.sleep(random.uniform(2, 5))return self.datadef save_to_csv(self, filename=None):"""保存数据到CSV文件"""if not self.data:print("没有数据可保存")returnif filename is None:now = datetime.now().strftime("%Y%m%d_%H%M%S")filename = f"rental_data_{now}.csv"df = pd.DataFrame(self.data)df.to_csv(filename, index=False, encoding='utf-8-sig')print(f"数据已保存到 {filename}，共 {len(self.data)} 条记录")def main():print("开始爬取租房信息...")scraper = RentalScraper()scraper.scrape(pages=20)  # 默认爬取20页scraper.save_to_csv("rental_data.csv")print("爬取完成！")if __name__ == "__main__":main()