본문 바로가기

Programming/Web Crawling

쇼핑 크롤링

from selenium import webdriver as wb
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup as bs
import time

driver = wb.Chrome()

url = 'http://www.11st.co.kr/html/bestSellerMain.html'

driver.get(url)

titles = []
prices = []

for index in range(5):
    
    image_list = driver.find_elements_by_css_selector('div.pub_photo a')
    time.sleep(2)
    image_list[index].click()
    time.sleep(2)

    soup = bs(driver.page_source,'lxml')
    try:
        title = soup.select_one('.heading h2').text
        price = soup.select_one('.sale_price').text
        
    except:
        title = soup.select_one('div.tk_show_detail strong.tk_title').text
        price = soup.select_one('div.tk_cont > span').text
    
    
    titles.append(title)
    prices.append(price)
    
    time.sleep(2)
    driver.back()
    
ele_dic = {'title':titles, 'price':prices}

import pandas as pd

ele = pd.DataFrame(ele_dic)
ele

 

from selenium import webdriver as  wb
from selenium.webdriver.common.keys import Keys
import time
import pandas as pd
from tqdm import tqdm_notebook

driver = wb.Chrome()
url = 'http://corners.gmarket.co.kr/Bestsellers'
driver.get(url)

names= []
prices = []
divides = []
navs = []

for num in range(1, 11, 1):
    number = 'no'+str(num)
    link = 'p#' + number+"~div"
    menu = driver.find_element_by_css_selector(link)
    
    menu.click()
    time.sleep(1)

    name = driver.find_element_by_css_selector('h1.itemtit').text
    price = driver.find_element_by_css_selector('strong.price_real').text
    divide = driver.find_element_by_css_selector("li.on > a").text
    
    try:
        nav = driver.find_element_by_css_selector('p.nav').text
    except:
        nav= "원산지가 없습니다."
    
    names.append(name)
    prices.append(price)
    divides.append(divide)
    navs.append(nav)
    driver.back()
    time.sleep(1)
    
    
gmarket_dic = {'name':names, 'price':prices, 'divide':divides, 'nav':navs}

df = pd.DataFrame(gmarket_dic)
df

 

from selenium import webdriver as  wb
from selenium.webdriver.common.keys import Keys
import time
import pandas as pd
from tqdm import tqdm_notebook

driver = wb.Chrome()
url = 'http://www.11st.co.kr/html/bestSellerMain.html'
driver.get(url)

names= []
prices =[]
divides =[]
navs =[]

for num in range(20):
    image_list = driver.find_elements_by_css_selector('div.pub_photo > a')
    image_list[num].click()
    time.sleep(1)
    
    name = driver.find_element_by_css_selector('div.heading > h2').text
    price = driver.find_element_by_css_selector('strong.sale_price').text
    divide = driver.find_element_by_css_selector('button#headSel_3').text
    try:
        nav = driver.find_element_by_css_selector('div#dlvCstInfoViewFree+div.det_info > p.col.first').text[6:]
    except:
        nav = "원산지가 없습니다."
    
    names.append(name)
    prices.append(price)
    divides.append(divide)
    navs.append(nav)

    driver.back()
    time.sleep(1)
    
street11 = {'name':names, 'price':prices, 'nav':navs, 'divide':divides}

df = pd.DataFrame(street11)
df = df.set_index('divide')


df[['name', 'price']].set_index('name').loc[['그린핑거 유아로션/워시/크림/샴푸 모음전', '[사전판매 카드12%쿠폰] 삼성자급제 갤럭시 S20 / S20+ / S20 Ultra 사전판매 런칭! LTE/5G 유심호환']]

'Programming > Web Crawling' 카테고리의 다른 글

sns 크롤링  (0) 2020.03.02
이미지 크롤링  (0) 2020.03.02
카페 모든 메뉴 가져오기  (0) 2020.03.02
카페 지점명, 주소, 전화번호 크롤링  (0) 2020.03.02
실습  (0) 2020.03.02