当前位置：首页 > news >正文

公司网站制作企业火车头 wordpress发布

news 2026/4/17 12:21:00

公司网站制作企业,火车头 wordpress发布,使用WordPress快速建站视频,如何请人做网站基于Python爬虫的房价可视化项目提示项目说明功能特点环境要求参数说明代码项目提示本项目仅供学习交流使用#xff0c;请勿用于非法用途。使用本项目所产生的一切法律后果由使用者自行承担。项目说明这是一个用于爬取多个租房网站信息并进行数据分析可视化的Python项… 基于Python爬虫的房价可视化项目提示项目说明功能特点环境要求参数说明代码项目提示本项目仅供学习交流使用请勿用于非法用途。使用本项目所产生的一切法律后果由使用者自行承担。项目说明这是一个用于爬取多个租房网站信息并进行数据分析可视化的Python项目。目前支持以下网站链家网贝壳找房58同城功能特点支持多个租房网站数据爬取自动保存为CSV格式可合并多个来源的数据支持自定义爬取页数支持自定义城市提供数据可视化分析功能环境要求 Python 3.7依赖包requests, beautifulsoup4, pandas, lxml, matplotlib, seaborn, numpy, jieba, wordcloud 参数说明 --pages: 每个网站爬取的页数默认为3--city: 城市代码如sz表示深圳默认为sz 代码贝壳 #!/usr/bin/env python # -*- coding: utf-8 -*-import requests from bs4 import BeautifulSoup import pandas as pd import time import random from datetime import datetimeclass BeikeRentalScraper:def __init__(self):self.headers {User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36,Accept-Language: zh-CN,zh;q0.9,}self.base_url https://sz.zu.ke.com/zufang/ # 贝壳深圳租房self.data []def get_page(self, url):获取页面内容try:response requests.get(url, headersself.headers)response.raise_for_status()return response.textexcept Exception as e:print(f获取页面失败: {e})return Nonedef parse_list_page(self, html):解析列表页面if not html:return []soup BeautifulSoup(html, lxml)house_items soup.select(.content__list--item)houses []for item in house_items:try:# 跳过广告if item.get(class) and content__list--item--ad in item.get(class):continuetitle_elem item.select_one(.content__list--item--title a)title title_elem.text.strip()link title_elem[href]if not link.startswith(http):link https://sz.zu.ke.com linkdesc item.select_one(.content__list--item--des).text.strip().replace(\n, )price_elem item.select_one(.content__list--item-price)price price_elem.select_one(em).text.strip() 元/月 if price_elem else 价格未知# 提取更多信息location area layout desc_parts desc.split()if len(desc_parts) 3:location desc_parts[0]layout desc_parts[1] if len(desc_parts) 1 else area desc_parts[2] if len(desc_parts) 2 else houses.append({标题: title,链接: link,位置: location,户型: layout,面积: area,价格: price,描述: desc})except Exception as e:print(f解析房源信息失败: {e})continuereturn housesdef scrape(self, pages3):爬取指定页数的租房信息for page in range(1, pages 1):print(f正在爬取第 {page} 页...)url f{self.base_url}pg{page}/html self.get_page(url)houses self.parse_list_page(html)self.data.extend(houses)# 添加随机延时避免被封IPtime.sleep(random.uniform(2, 5))return self.datadef save_to_csv(self, filenameNone):保存数据到CSV文件if not self.data:print(没有数据可保存)returnif filename is None:now datetime.now().strftime(%Y%m%d_%H%M%S)filename fbeike_rental_data_{now}.csvdf pd.DataFrame(self.data)df.to_csv(filename, indexFalse, encodingutf-8-sig)print(f数据已保存到 {filename}共 {len(self.data)} 条记录)def main():print(开始爬取贝壳找房租房信息...)scraper BeikeRentalScraper()scraper.scrape(pages5) # 默认爬取20页scraper.save_to_csv(beike_rental_data.csv)print(爬取完成)if __name__ __main__:main() 58同城 #!/usr/bin/env python # -*- coding: utf-8 -*-import requests from bs4 import BeautifulSoup import pandas as pd import time import random import os from datetime import datetimeclass RentalScraper:def __init__(self):self.headers {User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36,Accept-Language: zh-CN,zh;q0.9,}self.base_url https://sz.lianjia.com/zufang/ # 以链家深圳租房为例self.data []def get_page(self, url):获取页面内容try:response requests.get(url, headersself.headers)response.raise_for_status()return response.textexcept Exception as e:print(f获取页面失败: {e})return Nonedef parse_list_page(self, html):解析列表页面if not html:return []soup BeautifulSoup(html, lxml)house_items soup.select(.content__list .content__list--item)houses []for item in house_items:try:title item.select_one(.content__list--item--title a).text.strip()link item.select_one(.content__list--item--title a)[href]if not link.startswith(http):link https://sz.lianjia.com linkdesc item.select_one(.content__list--item--des).text.strip().replace(\n, ).replace( , )price item.select_one(.content__list--item-price).text.strip()houses.append({标题: title,链接: link,描述: desc,价格: price})except Exception as e:print(f解析房源信息失败: {e})continuereturn housesdef scrape(self, pages3):爬取指定页数的租房信息for page in range(1, pages 1):print(f正在爬取第 {page} 页...)url f{self.base_url}pg{page}/html self.get_page(url)houses self.parse_list_page(html)self.data.extend(houses)# 添加随机延时避免被封IPtime.sleep(random.uniform(2, 5))return self.datadef save_to_csv(self, filenameNone):保存数据到CSV文件if not self.data:print(没有数据可保存)returnif filename is None:now datetime.now().strftime(%Y%m%d_%H%M%S)filename frental_data_{now}.csvdf pd.DataFrame(self.data)df.to_csv(filename, indexFalse, encodingutf-8-sig)print(f数据已保存到 {filename}共 {len(self.data)} 条记录)def main():print(开始爬取租房信息...)scraper RentalScraper()scraper.scrape(pages20) # 默认爬取20页scraper.save_to_csv(rental_data.csv)print(爬取完成)if __name__ __main__:main() 58同城略点击主页获取可视化分析可视化分析效果

查看全文

http://www.hkea.cn/news/14301075/