from getPageInformation import *
from getMainPageInformation import *
from bs4 import BeautifulSoup
import requests
import time
url1='http://bj.xiaozhu.com/'
urls=['http://bj.xiaozhu.com/search-duanzufang-p{}-0/'.format(str(i)) for i in range(2,20,1)]
sourceData=[]
hreflist=MainPageInformation(url1)
for i in urls:
hreflist=hreflist+MainPageInformation(i)
time.sleep(1)
for i in hreflist:
sourceData.append(getPageInformation(i))
time.sleep(1)
for i in sourceData:
print(i)
print('\n')
getMainPageInformation
from bs4 import BeautifulSoup
import requests
def MainPageInformation(url):
self_url=url
pageData=requests.get(self_url)
data=BeautifulSoup(pageData.text,'lxml')
href=data.select(' ul > li > a[class="resule_img_a"]')
hreflist=[]
for i in href:
hreflist.append(i.get('href'))
return hreflist