본문 바로가기

BeautifulSoup

(4)
sns 크롤링 from selenium import webdriver as wb from selenium.webdriver.common.keys import Keys from bs4 import BeautifulSoup as bs import time url = 'https://www.instagram.com/' driver = wb.Chrome() driver.get(url) # 손으로 로그인 input_search = driver.find_element_by_css_selector('input.XTCLo') input_search.send_keys('찾는키워드') soup = bs(driver.page_source, 'lxml') title_list = soup.select('span._28KuJ + div spa..
도시락 크롤링 from selenium import webdriver as wb from selenium.webdriver.common.keys import Keys from bs4 import BeautifulSoup as bs import time import pandas as pd url = 'https://www.hsd.co.kr/menu/menu_list' driver = wb.Chrome() driver.get(url) # 예외처리(try except문) # 더보기 버튼요소를 3번 클릭하기 btn_more = driver.find_element_by_class_name('c_05') try: for index in range(50): btn_more.click() time.sleep(2) #2초동안 멈춤 e..
한달동안의 영화 평점 수집 import requests as req from bs4 import BeautifulSoup as bs import pandas as pd movie_date = [] movie_title = [] movie_rate = [] for day in range(20191201,20191226,1): url = "https://movie.naver.com/movie/sdb/rank/rmovie.nhn?sel=cur&tg=0&date="+str(day) res = req.get(url) soup = bs(res.content, 'lxml') title_list = soup.select('div.tit5 > a') rate_list = soup.find_all('td',class_='point') for ind..
음악 TOP50수집 import requests from bs4 import BeautifulSoup as bs url = 'https://music.naver.com/listen/top100.nhn?domain=TOTAL' res = req.get(url) soup = bs(res.text,'lxml') #select(CSS선택자) : 여러 요소를 검색한 후 리스트로 반환 # --> find_all()과 같음 #select_one(CSS선택자) : 하나의 요소만 반환 # --> find()와 같음 rank_list = soup.find_all('td',class_='ranking') name_list = soup.select('a._title > span') artist_list = soup.select('td.artis..