Я пытаюсь увидеть, существует ли текст "Зарегистрированные на национальном уровне" на страницах профиля на веб-сайте, который я просматривал. Его сразу после текста «Лицензировано для работы в:» ... если он содержит текст, я запишу их тип лицензии в файл CSV как «Зарегистрированный на национальном уровне», и если этот текст не существует, я напишу «состояние» для Лицензия в файле CSV ... вот проблема / логика кодирования им использую

Вот ссылка на страницу профиля, на которой я тестирую свой код https://www.zillow.com/lender-profile/zackdisinger/

Он продолжает печатать ложь ... ниже мой код, который я пытаюсь

from selenium import webdriver
from bs4 import BeautifulSoup
import time

#Chrome webdriver filepath...Chromedriver version 74
driver = webdriver.Chrome(r'C:\Users\mfoytlin\Desktop\chromedriver.exe')
page = driver.get('https://www.zillow.com/lender-profile/zackdisinger/')
time.sleep(2)
show_more_button = driver.find_element_by_class_name('zsg-wrapper-footer').click()
time.sleep(2)
soup = BeautifulSoup(driver.page_source, 'html.parser')


if soup.find(text='Nationally registered'):
    print('Success')
else:
    print('False')
1
mcfoyt 8 Июл 2019 в 18:39

4 ответа

Лучший ответ

Используйте регулярное выражение re, чтобы проверить, существует ли текст или нет. Вот ваш код.

from selenium import webdriver
from bs4 import BeautifulSoup
import time
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import re

#Chrome webdriver filepath...Chromedriver version 74
driver = webdriver.Chrome(r'C:\Users\mfoytlin\Desktop\chromedriver.exe')
page = driver.get('https://www.zillow.com/lender-profile/zackdisinger/')
show_more_button =WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//a[contains(.,'Show')][contains(.,'more')]")))
#driver.execute_script("arguments[0].click();", show_more_button)
show_more_button.click()
time.sleep(2)
soup = BeautifulSoup(driver.page_source, 'html.parser')


if soup.find(text=re.compile('Nationally registered')):
    print('Success')
else:
    print('False')

Это успех печати на консоли.

Success
1
KunduK 8 Июл 2019 в 16:56

Данные загружаются через AJAX с другого URL:

import re
import requests
import json

url = 'https://www.zillow.com/lender-profile/zackdisinger/'
screen_name = [i for i in url.split('/') if i][-1]
r = requests.get(url).text

url_json = 'https://mortgageapi.zillow.com/getRegisteredLender?partnerId=' + re.search(r'"partnerId":"(.*?)"', r).group(1)
payload = {"fields":["aboutMe","address","cellPhone","contactLenderFormDisclaimer","companyName","employerMemberFDIC","employerScreenName","equalHousingLogo","faxPhone","hideCellPhone","imageId","individualName","languagesSpoken","memberFDIC","nationallyRegistered","nmlsId","nmlsType","officePhone","rating","screenName","stateLicenses","stateSponsorships","title","totalReviews","website"],"lenderRef":{"screenName":screen_name}}
data = requests.post(url_json, json=payload).json()
print(json.dumps(data, indent=4))
print()
print('Is nationally registered =', data['lender']['nationallyRegistered'])

Печать :

{
    "lender": {
        "aboutMe": "From day one I provide the utmost relational-based experience to make you feel comfortable with your home financing decisions.\n\nEmpowerment and integrity is key to successfully making a home loan a smooth process from start to finish. Acquiring a mortgage in today's market takes product knowledge and underwriting know how. Every client has their own story, their own future. I am here to match today's mortgages to clients dreams of home-ownership.\n",
        "address": {
            "address": "10412 Allisonville Rd Suite 50",
            "city": "Fishers",
            "stateAbbreviation": "IN",
            "zipCode": "46038"
        },
        "companyName": "Bank of England Mortgage",
        "employerMemberFDIC": true,
        "employerScreenName": "BoEMortgage",
        "equalHousingLogo": "EqualHousingLender",
        "faxPhone": {
            "areaCode": "317",
            "number": "3754",
            "prefix": "536"
        },
        "id": "ZU101hnzx7ntuyx_8z2sb",
        "imageId": "2910837992a9cc44d31c26bd7532d2dd",
        "individualName": {
            "firstName": "Zachary",
            "lastName": "Disinger"
        },
        "languagesSpoken": [],
        "nationallyRegistered": true,
        "nmlsId": 1053091,
        "nmlsType": "Individual",
        "officePhone": {
            "areaCode": "317",
            "number": "0416",
            "prefix": "252"
        },
        "rating": 5.0,
        "screenName": "zackdisinger",
        "stateLicenses": {},
        "stateSponsorships": {},
        "title": "Mortgage Banker",
        "totalReviews": 120,
        "website": "http://boeindy.com"
    }
}

Is nationally registered = True
1
Andrej Kesely 8 Июл 2019 в 15:59

В bs4 4.7.1 вы можете использовать: contains для проверки тега p, содержащего эту строку. Я дал Правда / Ложь, хотя легко адаптироваться к успеху / Ложь

from selenium import webdriver
from bs4 import BeautifulSoup
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

#Chrome webdriver filepath...Chromedriver version 74
driver = webdriver.Chrome(r'C:\Users\mfoytlin\Desktop\chromedriver.exe')
page = driver.get('https://www.zillow.com/lender-profile/zackdisinger/')
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, ".zsg-wrapper-footer a"))).click()
soup = BeautifulSoup(driver.page_source, 'html.parser')
data = soup.select_one('p:contains("Nationally registered")')
print(data is not None)
2
QHarr 8 Июл 2019 в 20:05

Попробуй условный блок вот так,

if(driver.findElement(By.xpath("//p[contains(text(),'Nationally registered')]").isDisplayed())
{
 print('Success')
}
else {
print ('False')
}
0
Sureshmani 8 Июл 2019 в 16:49