본문 바로가기

Programming/Web Crawling

실습

'사과' 자동검색하기

!pip install selenium

import requests as req
from bs4 import BeautifulSoup as bs
import pandas as pd

from selenium import webdriver as wb
from selenium.webdriver.common.keys import Keys

driver = wb.Chrome()  # 웹 페이지 띄워보기

url = 'https://www.google.com/'  # 웹 사이트 설정

driver.get(url)

input_search = driver.find_element_by_class_name('gLFyf')  # 개발자 모드 검색 창 클래스

input_search.send_keys('사과')

input_search.send_keys(Keys.ENTER)

 

실시간검색어 가져오기

from bs4 import BeautifulSoup as bs
import requests as req
url = "https://datalab.naver.com/keyword/realtimeList.naver?where=main"
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36'}
res = req.get(url, headers=headers)
res

soup = bs(res.content, 'lxml')
soup

rank=[]
info=[]
info_list = soup.find_all('span', class_='item_title')
for i in range(len(info_list)):
    rank.append(str(i+1))
    info.append(info_list[i].text.strip())
    
search_info = {'순위':rank, '검색어':info}

import pandas as pd
search = pd.DataFrame(search_info)
search.set_index('순위', inplace=True)

search

 

쇼핑Best상품 가져오기

from selenium import webdriver as  wb
from selenium.webdriver.common.keys import Keys
import time
import pandas as pd
from tqdm import tqdm_notebook

driver = wb.Chrome()
url = 'https://front.wemakeprice.com/best'
driver.get(url)

numbers = []
names = []
prices =[]
navs =[]

for num in range(10):
    image_list = driver.find_elements_by_css_selector('div.flag_wrap')
    image_list[num].click()
    time.sleep(1)
    
    number = str(num+1)
    try:
        name = driver.find_element_by_css_selector('h3.deal_tit').text.strip()
    except:
        name = driver.find_element_by_css_selector('h4.deal_tit').text.strip()
    price = driver.find_element_by_css_selector('em.num').text.strip()
    try:
        nav = driver.find_element_by_css_selector('dl.origin p').text[6:]
    except:
        nav = "원산지가 없습니다."
    
    numbers.append(number)
    names.append(name)
    prices.append(price)
    navs.append(nav)

    driver.back()
    time.sleep(1)
    
list = {'순위':numbers, '상품명':names, '가격':prices, '원산지 정보':navs}

df = pd.DataFrame(list)
df.set_index('순위', inplace=True)

df

df.to_csv('WeMakePrice.csv', encoding='utf-8-sig')

 

스포츠승률 가져오기

import requests as req
from bs4 import BeautifulSoup as bs
from selenium import webdriver as wb
from selenium.webdriver.common.keys import Keys
import time

url = 'https://www.koreabaseball.com/TeamRank/TeamRank.aspx'

driver = wb.Chrome()
driver.get(url)

soup = bs(driver.page_source, 'lxml')

team_col = soup.select('th', limit=12)
team_col


play_data = soup.select('td', limit=120)
play_data


len(team_col), len(play_data)


col_list = []
for index in team_col:
    col_list.append(index.text)
    
data_list = []

for data in play_data:
    data_list.append(data.text)
    
import numpy as np
import pandas as pd

info_array = np.array(data_list).reshape(-1, 12)
info_array


kbo_info = pd.DataFrame(info_array, columns=col_list)
kbo_info.set_index('순위', inplace=True)

kbo_info = kbo_info[['팀명', '경기', '승', '패', '무', '승률']]

kbo_info


kbo_info.to_csv('kbo_info.csv', encoding='utf-8-sig')

'Programming > Web Crawling' 카테고리의 다른 글

카페 모든 메뉴 가져오기  (0) 2020.03.02
카페 지점명, 주소, 전화번호 크롤링  (0) 2020.03.02
도시락 크롤링  (0) 2020.03.02
Selenium 모듈  (0) 2020.03.02
iframe부분 크롤링 실습  (0) 2020.03.02