from selenium import webdriver as wb
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup as bs
import time
driver = wb.Chrome()
url = 'http://www.11st.co.kr/html/bestSellerMain.html'
driver.get(url)
titles = []
prices = []
for index in range(5):
image_list = driver.find_elements_by_css_selector('div.pub_photo a')
time.sleep(2)
image_list[index].click()
time.sleep(2)
soup = bs(driver.page_source,'lxml')
try:
title = soup.select_one('.heading h2').text
price = soup.select_one('.sale_price').text
except:
title = soup.select_one('div.tk_show_detail strong.tk_title').text
price = soup.select_one('div.tk_cont > span').text
titles.append(title)
prices.append(price)
time.sleep(2)
driver.back()
ele_dic = {'title':titles, 'price':prices}
import pandas as pd
ele = pd.DataFrame(ele_dic)
ele
from selenium import webdriver as wb
from selenium.webdriver.common.keys import Keys
import time
import pandas as pd
from tqdm import tqdm_notebook
driver = wb.Chrome()
url = 'http://corners.gmarket.co.kr/Bestsellers'
driver.get(url)
names= []
prices = []
divides = []
navs = []
for num in range(1, 11, 1):
number = 'no'+str(num)
link = 'p#' + number+"~div"
menu = driver.find_element_by_css_selector(link)
menu.click()
time.sleep(1)
name = driver.find_element_by_css_selector('h1.itemtit').text
price = driver.find_element_by_css_selector('strong.price_real').text
divide = driver.find_element_by_css_selector("li.on > a").text
try:
nav = driver.find_element_by_css_selector('p.nav').text
except:
nav= "원산지가 없습니다."
names.append(name)
prices.append(price)
divides.append(divide)
navs.append(nav)
driver.back()
time.sleep(1)
gmarket_dic = {'name':names, 'price':prices, 'divide':divides, 'nav':navs}
df = pd.DataFrame(gmarket_dic)
df
from selenium import webdriver as wb
from selenium.webdriver.common.keys import Keys
import time
import pandas as pd
from tqdm import tqdm_notebook
driver = wb.Chrome()
url = 'http://www.11st.co.kr/html/bestSellerMain.html'
driver.get(url)
names= []
prices =[]
divides =[]
navs =[]
for num in range(20):
image_list = driver.find_elements_by_css_selector('div.pub_photo > a')
image_list[num].click()
time.sleep(1)
name = driver.find_element_by_css_selector('div.heading > h2').text
price = driver.find_element_by_css_selector('strong.sale_price').text
divide = driver.find_element_by_css_selector('button#headSel_3').text
try:
nav = driver.find_element_by_css_selector('div#dlvCstInfoViewFree+div.det_info > p.col.first').text[6:]
except:
nav = "원산지가 없습니다."
names.append(name)
prices.append(price)
divides.append(divide)
navs.append(nav)
driver.back()
time.sleep(1)
street11 = {'name':names, 'price':prices, 'nav':navs, 'divide':divides}
df = pd.DataFrame(street11)
df = df.set_index('divide')
df[['name', 'price']].set_index('name').loc[['그린핑거 유아로션/워시/크림/샴푸 모음전', '[사전판매 카드12%쿠폰] 삼성자급제 갤럭시 S20 / S20+ / S20 Ultra 사전판매 런칭! LTE/5G 유심호환']]
'Programming > Web Crawling' 카테고리의 다른 글
sns 크롤링 (0) | 2020.03.02 |
---|---|
이미지 크롤링 (0) | 2020.03.02 |
카페 모든 메뉴 가져오기 (0) | 2020.03.02 |
카페 지점명, 주소, 전화번호 크롤링 (0) | 2020.03.02 |
실습 (0) | 2020.03.02 |