抓取奇摩字典英文單字的音標
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
import time # For pause
# For MacOS, place getCoordinate.py and chromedriver under users\bfhaha
driver = webdriver.Chrome(r'C:\Users\bfhaha\chromedriver')
vocabulary = [
"follicle",
"polio",
"groove",
]
n = len(vocabulary)
f = open("getKK.txt", "a", encoding='UTF-8')
f.truncate(0) # empty getKK.txt
for i in range(n):
driver.get("https://tw.dictionary.search.yahoo.com/search?p=" + vocabulary[i])
time.sleep(2)
try:
kk = driver.find_element_by_xpath("//span[@class = ' fz-14']").text
f.write(kk)
except NoSuchElementException as exception:
f.write("NULL")
f.write("\n")
f.close()
有些網頁會有預先載入的頁面,例如Youtube的廣告,會導致雖然看原始碼有某個元素,但實際上卻抓不到的情況(預先載入的頁面跟你實際上看到的頁面不同)。這時候可以先用kk = driver.execute_script("return document.getElementsByTagName('body')[0].innerHTML;"),看一下這個預先載入的網頁的原始碼,來決定需要的資訊在哪一個元素中。
No comments:
Post a Comment