1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
|
import pymysql import requests
class TxWork: aim_url = "https://careers.tencent.com/tencentcareer/api/post/Query?timestamp=1736323379245&countryId=&cityId=&bgIds=&productId=&categoryId=&parentCategoryId=&attrId=&keyword=python" \ "&pageIndex={}&pageSize=10&language=zh-cn&area=cn" headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36' }
def __init__(self): self.db = pymysql.connect(host="localhost", user="root", password="root", db="py_spider") self.cursor = self.db.cursor()
@classmethod def get_information(cls): """获取所有职位信息 Returns: return_type: <class 'generator'> """ for page in range(1,100): res = requests.get(cls.aim_url.format(page)).json() if res['Data']['Count'] == 0: print(f"全部数据已经插入, 共有{page}页数据") break print(f"正在抓取第{page}页的数据") yield res['Data']['Posts']
def creat_table(self): """建tx_work表""" sql = """ create table if not exists tx_work( id int auto_increment not null primary key, post_id long , recruit_postName varchar(100) , responsibility text, post_url varchar(100) ) charset =utf8mb4 engine=innodb; """ try: self.cursor.execute(sql) print("tx_work建表成功") except Exception as e: print("==>tx_work建表失败", e)
def insert_data(self, *params): """插入数据 :arg post_id(long): 职位ID recruit_postName(str): 职位名称 responsibility(str): 负责内容 post_url(str): 职位链接 :return void :raises 如果插入错误, 会直接抛出异常 """ print(params) sql = "insert into tx_work(post_id, recruit_postName,responsibility,post_url) values(%s,%s,%s,%s);" try: self.cursor.execute(sql, params) self.db.commit() print("成功插入") except Exception as e: self.db.rollback() print("==>插入数据失败", params, e)
def main(self): self.creat_table()
work_list = self.get_information() for works in work_list: for work in works: self.insert_data(work['PostId'], work['RecruitPostName'], work['Responsibility'], work['PostURL']) print("数据插入中: PostId:{}, RecruitPostName:{}, Responsibility:{}, PostURL:{}".format(work['PostId'], work['RecruitPostName'], work['Responsibility'], work['PostURL'])) self.db.close()
if __name__ == '__main__': tx_work = TxWork() tx_work.main()
|