use python3 Do a web crawler , The website to climb this time is http://www.lovewzly.com/jiaoyou.html Dating website , I don't say much nonsense , Go straight to the code :
First import the corresponding module :
import requests
import os
Then write the corresponding fractional function :
# Set the age
def set_age():
age = int(input(" Please enter the expected age of the other party :")) # Force string to integer
if 21 <= age <=30:
startage = 21
endage = 30
elif 31 <= age <=40:
startage = 31
endage = 40
return startage,endage
# Set gender
def set_sex():
sex = input(" Please enter the expected gender of the other party :")
if sex == ' male ':
gender = 1
elif sex == ' Woman ':
gender = 2
return gender
# Set city
def set_city():
city = input(" Please enter the other party's expected city :")
if city == ' Beijing ':
cityid = 52
elif city == ' Guangzhou ':
cityid = 76
elif city == ' fuzhou ':
cityid = 53
elif city == ' Xiamen ':
cityid = 60
elif city == ' Hangzhou ':
cityid = 383
elif city == ' Qingdao ':
cityid = 284
elif city == ' Changsha ':
cityid = 197
elif city == ' jinan ':
cityid = 283
elif city == ' nanjing ':
cityid = 220
elif city == ' Hong Kong ':
cityid = 395
elif city == ' Shanghai ':
cityid = 321
elif city == ' Chengdu ':
cityid = 322
elif city == ' wuhan ':
cityid = 180
elif city == ' Suzhou ':
cityid = 221
elif city == ' Shenzhen ':
cityid = 77
return cityid
# Set the height
def set_height():
height = int(input(" Please enter the other person's expected height :"))
if 0<= height <=150:
startheight = 0
endheight = 150
elif 151 <= height <=160:
startheight = 151
endheight = 160
elif 161 <= height <=170:
startheight = 161
endheight = 170
elif 171 <= height <=180:
startheight = 171
endheight = 180
return startheight,endheight
# Set academic qualifications
def set_education1():
education1 = input(" Please enter the expected education of the other party :")
if education1 == ' Junior high school ':
education = 10
elif education1 == ' high school ':
education = 20
elif education1 == ' secondary specialized school ':
education = 25
elif education1 == ' junior college ':
education = 30
elif education1 == ' Undergraduate ':
education = 40
elif education1 == ' master ':
education = 50
elif education1 == ' Doctor ':
education = 60
return education
# Set monthly salary
def set_yue():
yue = int(input(" Please enter the expected monthly salary of the other party :"))
if 2000<= yue <=4999:
salary = 2
elif 5000<= yue <=9999:
salary = 3
elif 10000<= yue <=19999:
salary = 4
elif 20000<= yue :
salary = 5
return salary
# Parse web pages
def get_info(page,startage,endage,gender,cityid,startheight,endheight ,education ,salary ):
# http://www.lovewzly.com/api/user/pc/list/search?startage=21&endage=30&gender=2&marry=1&page=1
# String formatting %s
url = 'http://www.lovewzly.com/api/user/pc/list/search?startage={}&endage={}&gender={}&cityid={}&startheight={}&endheight={}&marry=1&education={}&salary={}&page={}'.format(startage,endage,gender,cityid,startheight,endheight,education,salary,page)
# response = requests.get(url).json() # Simple writing
response = requests.get(url)
if response.status_code == 200:
result = response.json()
# 《Response[200]> Response successful Status code
# print(response)
return result
# Download the file to a custom folder
def save_images(item,i): # Define a images Folder
if not os.path.exists('images'):
#os.makedirs(path) # He can create multi-level directories at once
os.mkdir('images') # Is to create a directory level by level
images_url = item['avatar'] # get images Information ,avatar yes Picture location
# print(images_url)images_url
response = requests.get(images_url) # utilize requests Request picture address
if response.status_code == 200: # Judge whether the address is valid
print(" Getting :%s Information about " % item['username']) # Set the save address of the picture
file_path = 'images//{}.jpg'.format(i) # Naming will go wrong , Yes * Names like that
with open(file_path,'wb') as f: # b Binary system
f.write(response.content) # Open the corresponding file
Finally, write the main function :
# The main program
def get_data():
print(" Please enter your filter criteria , Start this marriage :")
# Call the function written above
startage,endage = set_age()
gender = set_sex()
cityid = set_city()
startheigth,endheigth = set_height()
education = set_education1()
salary = set_yue()
# To prevent system naming errors , Replace names with numbers
i = 0
for i in range(1,10): # take 1~10 The content of
# Get the captured json data
json = get_info(i,startage,endage,gender,cityid,startheigth,endheigth,education,salary)
#print(json['data']['list'])
# Save the picture
for item in json['data']['list']:
i += 1
print(item,i)
save_images(item,i)
get_data()
The code ends here , This is the basic version of the crawler, which is more suitable for python For beginners , Bloggers have a perfect version of this crawler , Readers in need can chat with bloggers privately or leave messages in the comment area , The blogger will reply and answer at the first time , If the number is large, the blogger will update the perfect version of the crawler to the blog , Those who like bloggers' blog posts can pay attention to bloggers , I am an active smiling face .