1 程序结构介绍
代码结构图:
代码目录结构:
结果图:
2 数据源网站
https://www.dayfund.cn/incrank.html
3 GitHub地址
https://github.com/crazyjums/crawl_fund_data
4 文件代码:
main.py
from get_fund_code import *
from get_fund_data import *
from MysqlDB import MysqlFundCode
import time
def main():
s_time = time.time()
print("程序正在运行....")
mysql_code = MysqlFundCode()
fund_code_lists = mysql_code.get_code_and_name_and_type("指数型")
save_to_mysql(fund_code_lists=fund_code_lists)
e_time = time.time()
print("一共运行了{}秒".format(e_time-s_time))
if __name__ == '__main__':
main()
get_fund_data.py
import logging
import sys
from bs4 import BeautifulSoup as bs
from MysqlDB import MysqlFundDetailData
from toolkit import LOG_FORMAT,DATE_FORMAT,get_year_mon_day,get_class_name,get_HTML_content
def get_refer_fund_detail_data(fund_code,start_date="2019-09-28",end_date="2019-12-28"):
'''
:param fund_code:
:param start_date:
:param end_date:
:return:
date,
fund_name,
latest_nvalue_pu,
latest_sum_nvalue,
last_nvalue_pu,
last_sum_nvalue,
daily_growth,
daily_growth_rate
'''
try:
url = "https://www.dayfund.cn/fundvalue/{}.html?sdate={}&edate={}".format(fund_code, start_date, end_date)
resp = get_HTML_content(url)
soup = bs(resp,"lxml")
trs = soup.find_all("table",attrs={"class":"mt1 clear"})[0]
# fund_name = soup.find("h1",attrs={"class":"myfundTitle"}).string
# t = re.sub(r"\(","_",fund_name)
# fund_name = re.sub(r"\)","",t)
_soup = bs(str(trs),"lxml")
lis = _soup.find_all("tr")
fund_lists = []
count = 0
for i in lis:
fund_dict = {}
if count == 1:
count += 1
pass
t = list(i)
if len(t) >= 17:
date = t[1].string
latest_nvalue_pu = t[7].string
latest_sum_nvalue = t[9].string
last_nvalue_pu = t[11].string
last_sum_nvalue = t[13].string
daily_growth = t[15].string
daily_growth_rate = t[17].string
fund_dict["date"] = date
fund_dict["latest_nvalue_pu"] = latest_nvalue_pu
fund_dict["latest_sum_nvalue"] = latest_sum_nvalue
fund_dict["last_nvalue_pu"] = last_nvalue_pu
fund_dict["last_sum_nvalue"] = last_sum_nvalue
fund_dict["daily_growth"] = daily_growth
fund_dict["daily_growth_rate"] = daily_growth_rate
fund_lists.append(fund_dict)
# logging.info("{} | {} appended into fund_lists".format(date,latest_nvalue_pu))
return fund_lists[1:]
except Exception as e:
logging.error("{} | {}".format(e,sys._getframe().f_code.co_name))
def save_to_mysql(start_time=None,end_time=None, fund_code_lists=[]):
if start_time == None:
start_time = get_year_mon_day(y=1)
if end_time == None:
end_time = get_year_mon_day()
if len(fund_code_lists) != 0:
mysql = MysqlFundDetailData()
for info in fund_code_lists:
fund_code = info[0]
table_name = info[-1]
fund_lists = get_refer_fund_detail_data(fund_code,start_time,end_time)
if not mysql.check_table_if_exist(table_name):
logging.info("{}表没有创建,正在创建... | {}".format(table_name, sys._getframe().f_code.co_name))
mysql.create_table(table_name)
logging.info("创建成功!正在将数据写入{}中... | {}".format(table_name,sys._getframe().f_code.co_name))
for i in fund_lists:
mysql.insert_into_table(table_name,i)
logging.info("{}。写入成功。 | {}".format(table_name,sys._getframe().f_code.co_name))
else:
logging.info("表已存在,正在将数据写入{}中... | {}".format(table_name, sys._getframe().f_code.co_name))
for i in fund_lists:
mysql.insert_into_table(table_name, i)
logging.info("{}。写入成功。 | {}".format(table_name, sys._getframe().f_code.co_name))
# for i in fund_lists:
# mysql.insert_into_table(table_name,i)
# print("{},done".format(table_name))
else:
logging.info("列表为空,没有爬取到数据。| {}".format(sys._getframe().f_code.co_name))
def get_name_data():
s = '''
fund_type = i[2]
date = i[3]
nvalue_pu = i[4]
day_growth_rate = i[5]
a_week_rate = i[6]
a_month_rate = i[7]
_3_month_rate = i[8]
_6_month_rate = i[9]
a_year_rate = i[10]
_2_year_rate = i[11]
_3_year_rate = i[12]
from_this_year = i[13]
from_found_year = i[14]
poundage = i[-2]
purchase_money = i[-5]
'''
l = s.split("\n")
for i in l:
li = i.strip().split("=")
if len(li) == 2:
name = li[0].strip()
data = li[-1].strip()
print("temp_dict[\"{}\"] = {}".format(name, data))
get_fund_code.py
import demjson,re
import logging,sys
import os,time
from MysqlDB import MysqlFundCode
from toolkit import LOG_FORMAT,DATE_FORMAT,get_year_mon_day,get_class_name,get_HTML_content
def get_fund_code_lists_by_page(page):
try:
'''https://fundapi.eastmoney.com/fundtradenew.aspx
?ft=zs&sc=1n&st=desc&pi=1&pn=100&cp=&ct=&cd=&ms=&fr=&plevel=&fst=&ftype=&fr1=&fl=0&isab=1
https://fundapi.eastmoney.com/fundtradenew.aspx?ft=zs&sc=1n&st=desc&pi=3&pn=100&cp=&ct=&cd=&ms=&fr=&plevel=&fst=&ftype=&fr1=&fl=0&isab=1'''
url = "https://fundapi.eastmoney.com/fundtradenew.aspx?ft=zs&sc=1n&st=desc&pi={}&pn=100&cp=&ct=&cd=&ms=&fr=&plevel=&fst=&ftype=&fr1=&fl=0&isab=1".format(page)
content = get_HTML_content(url)
_ = re.sub("\|"," ",content[15:-1])
d = demjson.decode(_)
fund_info_lists = []
for i in d["datas"]:
i = i.split(" ")
temp_dict = {}
temp_dict["fund_code"] = i[0]
temp_dict["fund_name"] = i[1]
temp_dict["fund_type"] = i[2]
temp_dict["date"] = i[3]
temp_dict["nvalue_pu"] = i[4]
temp_dict["day_growth_rate"] = i[5]
temp_dict["a_week_rate"] = i[6]
temp_dict["a_month_rate"] = i[7]
temp_dict["_3_month_rate"] = i[8]
temp_dict["_6_month_rate"] = i[9]
temp_dict["a_year_rate"] = i[10]
temp_dict["_2_year_rate"] = i[11]
temp_dict["_3_year_rate"] = i[12]
temp_dict["from_this_year"] = i[13]
temp_dict["from_found_year"] = i[14]
temp_dict["poundage"] = i[-2]
temp_dict["purchase_money"] = i[-5]
fund_info_lists.append(temp_dict)
logging.info("{} | {}".format("第 {} 页数据抓取完成。".format(page),sys._getframe().f_code.co_name))
return fund_info_lists
except Exception as e:
logging.error("{} | {}".format(e, sys._getframe().f_code.co_name))
def get_total_page_num():
try:
url = "https://fundapi.eastmoney.com/fundtradenew.aspx?ft=zs&sc=1n&st=desc&pi=1&pn=100&cp=&ct=&cd=&ms=&fr=&plevel=&fst=&ftype=&fr1=&fl=0&isab=1"
content = get_HTML_content(url)
_ = re.sub("\|", " ", content[15:-1])
d = demjson.decode(_)
total_page = (int(d["allPages"]))
logging.info("{} pages | {}".format(total_page, sys._getframe().f_code.co_name))
return total_page
except Exception as e:
logging.error("{} | {}".format(e, sys._getframe().f_code.co_name))
def get_all_fund_lists():
all_fund_lists = []
for page in range(1,get_total_page_num() + 1):
all_fund_lists.append(get_fund_code_lists_by_page(page))
return all_fund_lists
def write_all_fund_lists_into_file(filename="all_fund_lists.txt"):
if os.path.exists(filename):
with open(filename,"a+",encoding="utf-8") as file:
logging.info("{} 文件存在,正在追加... | {}".format(filename, sys._getframe().f_code.co_name))
file.write("\n\n")
file.write("-"*20 + "这是新加的数据,时间:{}".format(time.ctime()) + "\n\n")
for fund_list in get_all_fund_lists():
file.write(str(fund_list))
file.write("\n")
file.write("\n" + "-"*20)
else:
with open(filename,"w",encoding="utf-8") as file:
logging.info("{} 文件不存在,正在创建并写数据... | {}".format(filename, sys._getframe().f_code.co_name))
for fund_list in get_all_fund_lists():
file.write(str(fund_list))
file.write("\n")
def get_name_data():
s = '''fund_dict["date"] = date
fund_dict["latest_nvalue_pu"] = latest_nvalue_pu
fund_dict["latest_sum_nvalue"] = latest_sum_nvalue
fund_dict["last_nvalue_pu"] = last_nvalue_pu
fund_dict["last_sum_nvalue"] = last_sum_nvalue
fund_dict["daily_growth"] = daily_growth
fund_dict["daily_growth_rate"] = daily_growth_rate'''
li = s.split("\n")
tt = ""
data = ""
values = ""
import re
for i in li:
t = i.split("=")[0].strip()
t = re.sub("fund_dict\[\"", "", t)
t = re.sub("\"\]", "", t)
tt += t + ","
d = "{}=each_data[\"{}\"],".format(t, t)
data += d
v = r"\'{" + t + r"}\'"
values += v + ","
sql = r"insert into {table_name} " + "({}) values({})".format(tt,values)
print(sql)
print(values)
print(data)
print(tt)
def get_sql():
s = "date,fund_name,latest_nvalue_pu,latest_sum_nvalue,last_nvalue_pu,last_sum_nvalue,daily_growth,daily_growth_rate"
li = s.split(",")
s = ""
for i in li:
'''`fund_code` VARCHAR(50) NULL,'''
t = "`" + i + "`" + "VARCHAR(50) NULL," + "\n"
s += t
print(s)
def save_to_mysql():
mysql = MysqlFundCode()
table_name = get_fund_code_lists_by_page(1)[1]["fund_type"]
all_fund_lists = get_all_fund_lists()
if not mysql.check_table_if_exist(table_name=table_name):
logging.info("{}表没有创建,正在创建... | {}".format(table_name, sys._getframe().f_code.co_name))
mysql.create_table(table_name)
logging.info("创建成功!正在将数据写入{}中... | {}".format(table_name,sys._getframe().f_code.co_name))
for fund_list in all_fund_lists:
for each_data in fund_list:
# print("now --> {}".format(each_data))
mysql.insert_into_table(table_name,each_data)
# logging.info("{} | {}".format(each_data, sys._getframe().f_code.co_name))
logging.info("{}。写入成功。 | {}".format(table_name, sys._getframe().f_code.co_name))
else:
logging.info("表已存在,正在将数据写入{}中... | {}".format(table_name, sys._getframe().f_code.co_name))
for fund_list in all_fund_lists:
for each_data in fund_list:
# print("now --> {}".format(each_data))
mysql.insert_into_table(table_name,each_data)
logging.info("{}。写入成功。 | {}".format(table_name, sys._getframe().f_code.co_name))
MysqlDB.py
import pymysql,logging,sys
from toolkit import LOG_FORMAT,DATE_FORMAT,get_year_mon_day,get_class_name
class MysqlFundCode():
def __init__(self):
self.host = "127.0.0.1"
self.user = "root"
self.password = "root"
self.database = "fund_data"
self.charset = "utf8mb4"
self.port = 3306
self.count = 0
def DB(self):
return pymysql.connect(self.host,self.user, self.password, self.database, self.port, charset=self.charset)
def insert_into_table(self, table_name,each_data):
mysqlDB = self.DB()
# logging.info("{} | {} | {}".format(mysqlDB, get_class_name(self), sys._getframe().f_code.co_name))
# if not isinstance(each_data,list):
# each_data = eval(each_data)
sql = '''
insert into {table_name}(fund_code,fund_name,fund_type,date,nvalue_pu,day_growth_rate,
a_week_rate,a_month_rate,_3_month_rate,_6_month_rate,a_year_rate,_2_year_rate,_3_year_rate,
from_this_year,from_found_year,poundage,purchase_money)
values(\'{fund_code}\',\'{fund_name}\',\'{fund_type}\',\'{date}\',\'{nvalue_pu}\',
\'{day_growth_rate}\',\'{a_week_rate}\',\'{a_month_rate}\',\'{_3_month_rate}\',
\'{_6_month_rate}\',\'{a_year_rate}\',\'{_2_year_rate}\',\'{_3_year_rate}\',
\'{from_this_year}\',\'{from_found_year}\',\'{poundage}\',\'{purchase_money}\')
'''.format(table_name=table_name,fund_code=each_data["fund_code"],fund_name=each_data["fund_name"],
fund_type=each_data["fund_type"],date=each_data["date"],nvalue_pu=each_data["nvalue_pu"],
day_growth_rate=each_data["day_growth_rate"],a_week_rate=each_data["a_week_rate"],
a_month_rate=each_data["a_month_rate"],_3_month_rate=each_data["_3_month_rate"],_6_month_rate=each_data["_6_month_rate"],
a_year_rate=each_data["a_year_rate"],_2_year_rate=each_data["_2_year_rate"],_3_year_rate=each_data["_3_year_rate"],
from_this_year=each_data["from_this_year"],from_found_year=each_data["from_found_year"],poundage=each_data["poundage"],
purchase_money=each_data["purchase_money"])
# print(sql)
try:
with mysqlDB.cursor() as cursor:
info = cursor.execute(sql)
mysqlDB.commit()
if cursor.rowcount >= 1:
self.count += 1
else:
pass
except Exception as e:
logging.error("{} | {} | {}".format(e, get_class_name(self), sys._getframe().f_code.co_name))
finally:
mysqlDB.close()
def create_table(self,table_name):
mysqlDB = self.DB()
# logging.info("{} | {} | {}".format(mysqlDB, get_class_name(self), sys._getframe().f_code.co_name))
'''
date,fund_name,latest_nvalue_pu,latest_sum_nvalue,last_nvalue_pu,
last_sum_nvalue,daily_growth,daily_growth_rate
'''
'''
temp_dict["fund_code"] = i[0]
temp_dict["fund_name"] = i[1]
temp_dict["fund_type"] = i[2]
temp_dict["date"] = i[3]
temp_dict["nvalue_pu"] = i[4]
temp_dict["day_growth_rate"] = i[5]
temp_dict["a_week_rate"] = i[6]
temp_dict["a_month_rate"] = i[7]
temp_dict["_3_month_rate"] = i[8]
temp_dict["_6_month_rate"] = i[9]
temp_dict["a_year_rate"] = i[10]
temp_dict["_2_year_rate"] = i[11]
temp_dict["_3_year_rate"] = i[12]
temp_dict["from_this_year"] = i[13]
temp_dict["from_found_year"] = i[14]
temp_dict["poundage"] = i[-2]
temp_dict["purchase_money"] = i[-5]
'''
sql = '''
CREATE TABLE IF NOT EXISTS `{table_name}`(
`id` bigint NOT NULL AUTO_INCREMENT ,
`fund_code` VARCHAR(40) NULL,
`fund_name` VARCHAR(100) NULL,
`fund_type` VARCHAR(40) NULL,
`date` VARCHAR(40) NULL,
`nvalue_pu` VARCHAR(40) NULL,
`day_growth_rate` VARCHAR(40) NULL,
`a_week_rate` VARCHAR(40) NULL,
`a_month_rate` VARCHAR(40) NULL,
`_3_month_rate` VARCHAR(40) NULL,
`_6_month_rate` VARCHAR(40) NULL,
`a_year_rate` VARCHAR(40) NULL,
`_2_year_rate` VARCHAR(40) NULL,
`_3_year_rate` VARCHAR(40) NULL,
`from_this_year` VARCHAR(40) NULL,
`from_found_year` VARCHAR(40) NULL,
`poundage` VARCHAR(40) NULL,
`purchase_money` VARCHAR(40) NULL,
PRIMARY KEY ( `id` )
)ENGINE=InnoDB DEFAULT CHARSET=utf8;
'''.format(table_name=table_name)
try:
with mysqlDB.cursor() as cursor:
cursor.execute(sql)
return True
except Exception as e:
logging.error("{} | {} | {}".format(e, get_class_name(self), sys._getframe().f_code.co_name))
return False
finally:
mysqlDB.close()
def check_table_if_exist(self,table_name):
mysqlDB = self.DB()
# logging.info("{} | {} | {}".format(mysqlDB,get_class_name(self),sys._getframe().f_code.co_name))
sql = "show tables"
try:
with mysqlDB.cursor() as cursor:
cursor.execute(sql)
_tables = cursor.fetchall()
table_lists = []
for i in _tables:
table_lists.append(i[0])
# print("all tables:{}".format(len(table_lists)))
for _ in table_lists:
if table_name in _:
return True
return False
except Exception as e:
logging.error("{} | {} | {}".format(e, get_class_name(self), sys._getframe().f_code.co_name))
finally:
mysqlDB.close()
def get_code_and_name_and_type(self,table_name):
mysqlDB = self.DB()
# logging.info("{} | {} | {}".format(mysqlDB, get_class_name(self), sys._getframe().f_code.co_name))
sql = "SELECT fund_code,fund_name,fund_type FROM {}".format(table_name)
try:
with mysqlDB.cursor() as cursor:
cursor.execute(sql)
info = cursor.fetchall()
'''
info是一个<class 'tuple'>类型的数据
'''
return_info = []
for each in info:
_ = []
fund_code = each[0]
_table_name = "{}_{}_{}".format(each[0],each[1],each[2])
_.append(fund_code)
_.append(_table_name)
return_info.append(_)
return return_info
except Exception as e:
logging.error("{} | {} | {}".format(e, get_class_name(self), sys._getframe().f_code.co_name))
finally:
mysqlDB.close()
def show_data_rows(self):
mysqlDB = self.DB()
# logging.info("{} | {} | {}".format(mysqlDB, get_class_name(self), sys._getframe().f_code.co_name))
sql_1 = "show tables"
total_count = 0
try:
with mysqlDB.cursor() as cursor:
cursor.execute(sql_1)
_tables = cursor.fetchall()
table_lists = []
for i in _tables:
table_lists.append(i[0])
for i in table_lists:
sql_2 = "select count(*) from {}".format(i)
cursor.execute(sql_2)
res = cursor.fetchall()
num = res[0][0]
total_count = total_count + num
print("_"*20)
print("from now on,there are {} lines data in database.".format(self.good_to_show(total_count)))
print("_" * 20)
except Exception as e:
logging.error("{} | {} | {}".format(e, get_class_name(self), sys._getframe().f_code.co_name))
finally:
mysqlDB.close()
def show_insert_rows(self):
print("_" * 20)
print("there total insert {} lines data in database.".format(self.good_to_show(self.count)))
print("_" * 20)
@staticmethod
def good_to_show(num):
_s = str(num)
if len(_s) == 5:
head = _s[0]
tail = _s[1]
total = head + "." + tail + "万"
return total
elif len(_s) == 6:
head = _s[0:2]
tail = _s[2]
total = head + "." + tail + "万"
return total
elif len(_s) == 7:
head = _s[0]
tail = _s[1]
total = head + "." + tail + "百万"
return total
elif len(_s) == 8:
head = _s[0]
tail = _s[1]
total = head + "." + tail + "千万"
return total
elif len(_s) == 9:
head = _s[0]
tail = _s[1]
total = head + "." + tail + "亿"
return total
elif len(_s) > 9:
head = _s[0:-8]
tail = _s[1]
total = head + "." + tail + "亿"
return total
else:
return str(num)
class MysqlFundDetailData():
def __init__(self):
self.host = "127.0.0.1"
self.user = "root"
self.password = "root"
self.database = "fund_data"
self.charset = "utf8mb4"
self.port = 3306
self.count = 0
def DB(self):
return pymysql.connect(self.host,self.user, self.password, self.database, self.port, charset=self.charset)
def insert_into_table(self, table_name,each_data):
mysqlDB = self.DB()
# logging.info("{} | {} | {}".format(mysqlDB, get_class_name(self), sys._getframe().f_code.co_name))
# if not isinstance(each_data,list):
# each_data = eval(each_data)
sql = '''insert into {table_name}(date, latest_nvalue_pu, latest_sum_nvalue, last_nvalue_pu, last_sum_nvalue,
daily_growth,daily_growth_rate)
values(\'{date}\',\'{latest_nvalue_pu}\',\'{latest_sum_nvalue}\',\'{last_nvalue_pu}\',
\'{last_sum_nvalue}\',\'{daily_growth}\',\'{daily_growth_rate}\')
'''.format(table_name=table_name, date=each_data["date"], latest_nvalue_pu=each_data["latest_nvalue_pu"],
latest_sum_nvalue=each_data["latest_sum_nvalue"], last_nvalue_pu=each_data["last_nvalue_pu"],
last_sum_nvalue=each_data["last_sum_nvalue"], daily_growth=each_data["daily_growth"],
daily_growth_rate=each_data["daily_growth_rate"])
# print(sql)
try:
with mysqlDB.cursor() as cursor:
info = cursor.execute(sql)
mysqlDB.commit()
if cursor.rowcount >= 1:
self.count += 1
else:
pass
except Exception as e:
logging.error("{} | {} | {}".format(e, get_class_name(self), sys._getframe().f_code.co_name))
finally:
mysqlDB.close()
def create_table(self,table_name):
mysqlDB = self.DB()
# logging.info("{} | {} | {}".format(mysqlDB, get_class_name(self), sys._getframe().f_code.co_name))
'''
date,fund_name,latest_nvalue_pu,latest_sum_nvalue,last_nvalue_pu,
last_sum_nvalue,daily_growth,daily_growth_rate
'''
'''
temp_dict["fund_code"] = i[0]
temp_dict["fund_name"] = i[1]
temp_dict["fund_type"] = i[2]
temp_dict["date"] = i[3]
temp_dict["nvalue_pu"] = i[4]
temp_dict["day_growth_rate"] = i[5]
temp_dict["a_week_rate"] = i[6]
temp_dict["a_month_rate"] = i[7]
temp_dict["_3_month_rate"] = i[8]
temp_dict["_6_month_rate"] = i[9]
temp_dict["a_year_rate"] = i[10]
temp_dict["_2_year_rate"] = i[11]
temp_dict["_3_year_rate"] = i[12]
temp_dict["from_this_year"] = i[13]
temp_dict["from_found_year"] = i[14]
temp_dict["poundage"] = i[-2]
temp_dict["purchase_money"] = i[-5]
'''
sql = '''
CREATE TABLE IF NOT EXISTS `{table_name}`(
`id` bigint NOT NULL AUTO_INCREMENT ,
`date`VARCHAR(50) NULL,
`latest_nvalue_pu`VARCHAR(50) NULL,
`latest_sum_nvalue`VARCHAR(50) NULL,
`last_nvalue_pu`VARCHAR(50) NULL,
`last_sum_nvalue`VARCHAR(50) NULL,
`daily_growth`VARCHAR(50) NULL,
`daily_growth_rate`VARCHAR(50) NULL,
PRIMARY KEY ( `id` )
)ENGINE=InnoDB DEFAULT CHARSET=utf8;
'''.format(table_name=table_name)
try:
with mysqlDB.cursor() as cursor:
cursor.execute(sql)
return True
except Exception as e:
logging.error("{} | {} | {}".format(e, get_class_name(self), sys._getframe().f_code.co_name))
return False
finally:
mysqlDB.close()
def check_table_if_exist(self,table_name):
mysqlDB = self.DB()
# logging.info("{} | {} | {}".format(mysqlDB, get_class_name(self), sys._getframe().f_code.co_name))
sql = "show tables"
try:
with mysqlDB.cursor() as cursor:
cursor.execute(sql)
_tables = cursor.fetchall()
table_lists = []
for i in _tables:
table_lists.append(i[0])
# print("all tables:{}".format(len(table_lists)))
for _ in table_lists:
if table_name in _:
return True
return False
except Exception as e:
logging.error("{} | {} | {}".format(e, get_class_name(self), sys._getframe().f_code.co_name))
finally:
mysqlDB.close()
toolkit.py
import time,sys
import requests
from random import randint
import logging
LOG_FORMAT = "%(asctime)s - %(levelname)s - %(message)s"
DATE_FORMAT = "%m/%d/%Y %H:%M:%S %p"
def get_year_mon_day(y = 0,m = 0,d = 0):
t = time.localtime()
year = t.tm_year
month = t.tm_mon
day = t.tm_mday
_time = "{}-{}-{}".format(int(year) - y, month - m, day - d)
return _time
filename = "log/mylog_{}.log".format(get_year_mon_day())
logging.basicConfig(filename=filename, level=logging.DEBUG, format=LOG_FORMAT, datefmt=DATE_FORMAT)
def get_class_name(self):
return self.__class__.__name__
def getUser_Agent():
headers = [
'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50', #safari 5.1 – MAC
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50', #safari 5.1 – Windows
'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0', #IE 9.0
'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)', #IE 8.0
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)', #IE 7.0
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)', # IE6.0
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.70 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1', #Firefox 4.0.1 – MAC
'Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1', #Firefox 4.0.1 – Windows
'Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto/2.8.131 Version/11.11', #Opera 11.11 – MAC
'Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11', #Opera 11.11 – Windows
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11', #Chrome 17.0 – MAC
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Maxthon 2.0)', #傲游(Maxthon)
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; TencentTraveler 4.0)', #腾讯TT
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)', #世界之窗(The World) 2.x
'ozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; The World)', #世界之窗(The World) 3.x
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0)', #搜狗浏览器 1.x
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)', #360浏览器
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Avant Browser)', #Avant
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)', #Green Browser
##移动端用户代理
'Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5', #safari iOS 4.33 – iPhone
'Mozilla/5.0 (iPod; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5', #safari iOS 4.33 – iPod Touch
'Mozilla/5.0 (iPad; U; CPU OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5', #safari iOS 4.33 – iPad
'Mozilla/5.0 (Linux; U; Android 2.3.7; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1', #Android N1
'MQQBrowser/26 Mozilla/5.0 (Linux; U; Android 2.3.7; zh-cn; MB200 Build/GRJ22; CyanogenMod-7) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1', #Android QQ浏览器 For android
'Opera/9.80 (Android 2.3.4; Linux; Opera Mobi/build-1107180945; U; en-GB) Presto/2.8.149 Version/11.10', #Android Opera Mobile
'Mozilla/5.0 (BlackBerry; U; BlackBerry 9800; en) AppleWebKit/534.1+ (KHTML, like Gecko) Version/6.0.0.337 Mobile Safari/534.1+', #Android Pad Moto Xoom
'Mozilla/5.0 (hp-tablet; Linux; hpwOS/3.0.0; U; en-US) AppleWebKit/534.6 (KHTML, like Gecko) wOSBrowser/233.70 Safari/534.6 TouchPad/1.0', #WebOS HP Touchpad
'Mozilla/5.0 (SymbianOS/9.4; Series60/5.0 NokiaN97-1/20.0.019; Profile/MIDP-2.1 Configuration/CLDC-1.1) AppleWebKit/525 (KHTML, like Gecko) BrowserNG/7.1.18124', #Nokia N97
'Mozilla/5.0 (compatible; MSIE 9.0; Windows Phone OS 7.5; Trident/5.0; IEMobile/9.0; HTC; Titan)', #Windows Phone Mango
'UCWEB7.0.2.37/28/999', #UC无
'Openwave/ UCWEB7.0.2.37/28/999', #UCOpenwave
'Mozilla/4.0 (compatible; MSIE 6.0; ) Opera/UCWEB7.0.2.37/28/999', #UC Opera
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)",
"Mozilla/4.0 (compatible; MSIE 7.0; AOL 9.5; AOLBuild 4337.35; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
"Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)",
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)",
"Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)",
"Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 3.0.04506.30)",
"Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)",
"Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6",
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1",
"Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0",
"Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5",
"Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20",
"Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/2.0 Safari/536.11",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER",
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; LBBROWSER)",
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E; LBBROWSER)",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 LBBROWSER",
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)",
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; QQBrowser/7.0.3698.400)",
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)",
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SV1; QQDownload 732; .NET4.0C; .NET4.0E; 360SE)",
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)",
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)",
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1",
"Mozilla/5.0 (iPad; U; CPU OS 4_2_1 like Mac OS X; zh-cn) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8C148 Safari/6533.18.5",
"Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:2.0b13pre) Gecko/20110307 Firefox/4.0b13pre",
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:16.0) Gecko/20100101 Firefox/16.0",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11",
"Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36",
]
return {'User-Agent':headers[randint(0,len(headers)-1)]}
headers = getUser_Agent()
def get_HTML_content(url):
resp = requests.get(url = url, headers=headers).content.decode("utf-8")
return resp