import requests as req
from bs4 import BeautifulSoup as bs
import pandas as pd
movie_date = []
movie_title = []
movie_rate = []
for day in range(20191201,20191226,1):
url = "https://movie.naver.com/movie/sdb/rank/rmovie.nhn?sel=cur&tg=0&date="+str(day)
res = req.get(url)
soup = bs(res.content, 'lxml')
title_list = soup.select('div.tit5 > a')
rate_list = soup.find_all('td',class_='point')
for index in range(len(title_list)):
movie_date.append(day)
movie_title.append(title[index].text)
movie_rate.append(rate[index].text)
movie_dic = {"date":movie_date, "title":movie_title, "rate":movie_rate}
movie_dic
movie = pd.DataFrame(movie_dic)
movie.set_index('date',inplace=True)
movie
# pandas를 활용한 날짜 생성
date = pd.date_range(start = '2019-09-01', end = '2019-11-30')#페리오즈
date
days = date.strftime('%Y%m%d')
days
movie_date = []
movie_title = []
movie_rate = []
for day in days:
url = "https://movie.naver.com/movie/sdb/rank/rmovie.nhn?sel=cur&date="+day
res = req.get(url)
soup = bs(res.content, 'lxml')
title = soup.select('div.tit5 > a')
rate = soup.find_all('td',class_='point')
for index in range(len(title)):
movie_date.append(day)
movie_title.append(title[index].text)
movie_rate.append(rate[index].text)
movie_dic = {"date":movie_date, "title":movie_title, "rate":movie_rate}
movie_dic
movie = pd.DataFrame(movie_dic)
movie.set_index('date',inplace=True)
movie