본문 바로가기

Programming/Web Crawling

음악 TOP50수집

import requests
from bs4 import BeautifulSoup as bs

url = 'https://music.naver.com/listen/top100.nhn?domain=TOTAL'
res = req.get(url)
soup = bs(res.text,'lxml')

#select(CSS선택자) : 여러 요소를 검색한 후 리스트로 반환
#                    --> find_all()과 같음
#select_one(CSS선택자) : 하나의 요소만 반환
#                    --> find()와 같음
rank_list = soup.find_all('td',class_='ranking')
name_list = soup.select('a._title > span')
artist_list = soup.select('td.artist > a')

len(rank_list), len(name_list), len(artist_list)




# 리스트를 생성해줌.
ranks = []
names = []
artists = []

for index in range(len(rank_list)):
    rank = rank_list[index].text
    name = name_list[index].text
    artist = artist_list[index].text.strip()
    
    ranks.append(rank) 
    names.append(name)
    artists.append(artist)

music_info = {'rank':ranks,'name':names,'artist':artists}

import pandas as pd
music = pd.DataFrame(music_info)
music.set_index('rank',inplace=True)
music