'사과' 자동검색하기
!pip install selenium
import requests as req
from bs4 import BeautifulSoup as bs
import pandas as pd
from selenium import webdriver as wb
from selenium.webdriver.common.keys import Keys
driver = wb.Chrome() # 웹 페이지 띄워보기
url = 'https://www.google.com/' # 웹 사이트 설정
driver.get(url)
input_search = driver.find_element_by_class_name('gLFyf') # 개발자 모드 검색 창 클래스
input_search.send_keys('사과')
input_search.send_keys(Keys.ENTER)
실시간검색어 가져오기
from bs4 import BeautifulSoup as bs
import requests as req
url = "https://datalab.naver.com/keyword/realtimeList.naver?where=main"
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36'}
res = req.get(url, headers=headers)
res
soup = bs(res.content, 'lxml')
soup
rank=[]
info=[]
info_list = soup.find_all('span', class_='item_title')
for i in range(len(info_list)):
rank.append(str(i+1))
info.append(info_list[i].text.strip())
search_info = {'순위':rank, '검색어':info}
import pandas as pd
search = pd.DataFrame(search_info)
search.set_index('순위', inplace=True)
search
쇼핑Best상품 가져오기
from selenium import webdriver as wb
from selenium.webdriver.common.keys import Keys
import time
import pandas as pd
from tqdm import tqdm_notebook
driver = wb.Chrome()
url = 'https://front.wemakeprice.com/best'
driver.get(url)
numbers = []
names = []
prices =[]
navs =[]
for num in range(10):
image_list = driver.find_elements_by_css_selector('div.flag_wrap')
image_list[num].click()
time.sleep(1)
number = str(num+1)
try:
name = driver.find_element_by_css_selector('h3.deal_tit').text.strip()
except:
name = driver.find_element_by_css_selector('h4.deal_tit').text.strip()
price = driver.find_element_by_css_selector('em.num').text.strip()
try:
nav = driver.find_element_by_css_selector('dl.origin p').text[6:]
except:
nav = "원산지가 없습니다."
numbers.append(number)
names.append(name)
prices.append(price)
navs.append(nav)
driver.back()
time.sleep(1)
list = {'순위':numbers, '상품명':names, '가격':prices, '원산지 정보':navs}
df = pd.DataFrame(list)
df.set_index('순위', inplace=True)
df
df.to_csv('WeMakePrice.csv', encoding='utf-8-sig')
스포츠승률 가져오기
import requests as req
from bs4 import BeautifulSoup as bs
from selenium import webdriver as wb
from selenium.webdriver.common.keys import Keys
import time
url = 'https://www.koreabaseball.com/TeamRank/TeamRank.aspx'
driver = wb.Chrome()
driver.get(url)
soup = bs(driver.page_source, 'lxml')
team_col = soup.select('th', limit=12)
team_col
play_data = soup.select('td', limit=120)
play_data
len(team_col), len(play_data)
col_list = []
for index in team_col:
col_list.append(index.text)
data_list = []
for data in play_data:
data_list.append(data.text)
import numpy as np
import pandas as pd
info_array = np.array(data_list).reshape(-1, 12)
info_array
kbo_info = pd.DataFrame(info_array, columns=col_list)
kbo_info.set_index('순위', inplace=True)
kbo_info = kbo_info[['팀명', '경기', '승', '패', '무', '승률']]
kbo_info
kbo_info.to_csv('kbo_info.csv', encoding='utf-8-sig')
'Programming > Web Crawling' 카테고리의 다른 글
카페 모든 메뉴 가져오기 (0) | 2020.03.02 |
---|---|
카페 지점명, 주소, 전화번호 크롤링 (0) | 2020.03.02 |
도시락 크롤링 (0) | 2020.03.02 |
Selenium 모듈 (0) | 2020.03.02 |
iframe부분 크롤링 실습 (0) | 2020.03.02 |