본문 바로가기

Programming/Web Crawling

카페 지점명, 주소, 전화번호 크롤링

from selenium import webdriver as wb
from bs4 import BeautifulSoup as bs
import time
import pandas as pd

url = 'http://www.istarbucks.co.kr/store/store_map.do'

driver = wb.Chrome()
driver.get(url)

#지역검색 버튼 클릭
btn_search = driver.find_element_by_class_name('loca_search')
#btn_search = driver.find_element_by_xpath('//*[@id="container"]/div/form/fieldset/div/section/article[1]/article/header[2]/h3/a')
btn_search.click()

#전체지역 클릭
allArea = driver.find_element_by_css_selector('ul.gugun_arae_box > li')
allArea.click()

soup = bs(driver.page_source,'lxml')
name = soup.select('#mCSB_3_container > .quickSearchResultBoxSidoGugun > li > strong')

for index in range(len(name)):
    print(name[index].text)
    
    
#지점명, 주소, 번호 수집
name_list = []
addr_list = []
tel_list = []

# names = soup.select('li.quickResultLstCon > strong')
names = soup.select('#mCSB_3_container > .quickSearchResultBoxSidoGugun > li > strong')
# addrs = soup.findAll('p',class_='result_details')
addrs = soup.select('#mCSB_3_container > .quickSearchResultBoxSidoGugun > li > strong + p')
# tels = soup.select('p.result_details > a')
tels = soup.select('#mCSB_3_container > .quickSearchResultBoxSidoGugun > li > strong + p')
print(len(names), len(addrs), len(tels))

for index in range(len(names)):
    name_list.append(names[index].text)
    addr_list.append(addrs[index].text[:-12])
    tel_list.append(tels[index].text[-12:])
    
    
dic = {'name':name_list, 'address':addr_list, 'tel' :  tel_list}
df = pd.DataFrame(dic)
df

'Programming > Web Crawling' 카테고리의 다른 글

쇼핑 크롤링  (0) 2020.03.02
카페 모든 메뉴 가져오기  (0) 2020.03.02
실습  (0) 2020.03.02
도시락 크롤링  (0) 2020.03.02
Selenium 모듈  (0) 2020.03.02