详解Python之Scrapy爬虫教程NBA球员数据存放到Mysql数据库

时间：2021-05-23

获取要爬取的URL

爬虫前期工作

用Pycharm打开项目开始写爬虫文件

字段文件items

# Define here the models for your scraped items## See documentation in:# https://docs.scrapy.org/en/latest/topics/items.htmlimport scrapyclass NbaprojectItem(scrapy.Item): # define the fields for your item here like: # name = scrapy.Field() # pass # 创建字段的固定格式-->scrapy.Field() # 英文名 engName = scrapy.Field() # 中文名 chName = scrapy.Field() # 身高 height = scrapy.Field() # 体重 weight = scrapy.Field() # 国家英文名 contryEn = scrapy.Field() # 国家中文名 contryCh = scrapy.Field() # NBA球龄 experience = scrapy.Field() # 球衣号码 jerseyNo = scrapy.Field() # 入选年 draftYear = scrapy.Field() # 队伍英文名 engTeam = scrapy.Field() # 队伍中文名 chTeam = scrapy.Field() # 位置 position = scrapy.Field() # 东南部 displayConference = scrapy.Field() # 分区 division = scrapy.Field()

爬虫文件

import scrapyimport jsonfrom nbaProject.items import NbaprojectItemclass NbaspiderSpider(scrapy.Spider): name = 'nbaSpider' allowed_domains = ['nba.com'] # 第一次爬取的网址,可以写多个网址 # start_urls = ['http://nba.com/'] start_urls = ['https://china.nba.com/static/data/league/playerlist.json'] # 处理网址的response def parse(self, response): # 因为访问的网站返回的是json格式,首先用第三方包处理json数据 data = json.loads(response.text)['payload']['players'] # 以下列表用来存放不同的字段 # 英文名 engName = [] # 中文名 chName = [] # 身高 height = [] # 体重 weight = [] # 国家英文名 contryEn = [] # 国家中文名 contryCh = [] # NBA球龄 experience = [] # 球衣号码 jerseyNo = [] # 入选年 draftYear = [] # 队伍英文名 engTeam = [] # 队伍中文名 chTeam = [] # 位置 position = [] # 东南部 displayConference = [] # 分区 division = [] # 计数 count = 1 for i in data: # 英文名 engName.append(str(i['playerProfile']['firstNameEn'] + i['playerProfile']['lastNameEn'])) # 中文名 chName.append(str(i['playerProfile']['firstName'] + i['playerProfile']['lastName'])) # 国家英文名 contryEn.append(str(i['playerProfile']['countryEn'])) # 国家中文 contryCh.append(str(i['playerProfile']['country'])) # 身高 height.append(str(i['playerProfile']['height'])) # 体重 weight.append(str(i['playerProfile']['weight'])) # NBA球龄 experience.append(str(i['playerProfile']['experience'])) # 球衣号码 jerseyNo.append(str(i['playerProfile']['jerseyNo'])) # 入选年 draftYear.append(str(i['playerProfile']['draftYear'])) # 队伍英文名 engTeam.append(str(i['teamProfile']['code'])) # 队伍中文名 chTeam.append(str(i['teamProfile']['displayAbbr'])) # 位置 position.append(str(i['playerProfile']['position'])) # 东南部 displayConference.append(str(i['teamProfile']['displayConference'])) # 分区 division.append(str(i['teamProfile']['division'])) # 创建item字段对象,用来存储信息这里的item就是对应上面导的NbaprojectItem item = NbaprojectItem() item['engName'] = str(i['playerProfile']['firstNameEn'] + i['playerProfile']['lastNameEn']) item['chName'] = str(i['playerProfile']['firstName'] + i['playerProfile']['lastName']) item['contryEn'] = str(i['playerProfile']['countryEn']) item['contryCh'] = str(i['playerProfile']['country']) item['height'] = str(i['playerProfile']['height']) item['weight'] = str(i['playerProfile']['weight']) item['experience'] = str(i['playerProfile']['experience']) item['jerseyNo'] = str(i['playerProfile']['jerseyNo']) item['draftYear'] = str(i['playerProfile']['draftYear']) item['engTeam'] = str(i['teamProfile']['code']) item['chTeam'] = str(i['teamProfile']['displayAbbr']) item['position'] = str(i['playerProfile']['position']) item['displayConference'] = str(i['teamProfile']['displayConference']) item['division'] = str(i['teamProfile']['division']) # 打印爬取信息 print("传输了",count,"条字段") count += 1 # 将字段交回给引擎 -> 管道文件 yield item

配置文件->开启管道文件

# Scrapy settings for nbaProject project## For simplicity, this file contains only settings considered important or# commonly used. You can find more settings consulting the documentation:## https://docs.scrapy.org/en/latest/topics/settings.html# https://docs.scrapy.org/en/latest/topics/downloader-middleware.html# https://docs.scrapy.org/en/latest/topics/spider-middleware.html# ----------不做修改部分---------BOT_NAME = 'nbaProject'SPIDER_MODULES = ['nbaProject.spiders']NEWSPIDER_MODULE = 'nbaProject.spiders'# ----------不做修改部分---------# Crawl responsibly by identifying yourself (and your website) on the user-agent#USER_AGENT = 'nbaProject (+http://mit() print("数据提交成功!")

启动爬虫

屏幕上滚动的数据

去数据库查看数据

简简单单就把球员数据爬回来啦~

到此这篇关于详解Python之Scrapy爬虫教程NBA球员数据存放到Mysql数据库的文章就介绍到这了,更多相关Scrapy爬虫员数据存放到Mysql内容请搜索以前的文章或继续浏览下面的相关文章希望大家以后多多支持！

详解Python之Scrapy爬虫教程NBA球员数据存放到Mysql数据库

获取要爬取的URL

爬虫前期工作

用Pycharm打开项目开始写爬虫文件

启动爬虫

相关文章

python使用adbapi实现MySQL数据库的异步存储

MySQL 数据库 source 命令详解及实例

python3对接mysql数据库实例详解

详解JDBC数据库链接及相关方法的封装

Python之csv文件从MySQL数据库导入导出的方法