본문 바로가기

Programming/Web Crawling

sns 크롤링

from selenium import webdriver as wb
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup as bs
import time

url = 'https://www.instagram.com/'

driver = wb.Chrome()
driver.get(url)

# 손으로 로그인

input_search = driver.find_element_by_css_selector('input.XTCLo')
input_search.send_keys('찾는키워드')

soup = bs(driver.page_source, 'lxml')

title_list = soup.select('span._28KuJ + div span.Ap253')

boardNum = soup.select('div.Fy4o8')

title_list


boardNum


hrefs = soup.select('a.yCE8d')
len(hrefs)


hrefs[0]['href']


new_hrefs = []


for index in range(len(hrefs)):
    if 'tags' in hrefs[index]['href']:
        new_hrefs.append(hrefs[index])
        
len(new_hrefs)


url_main = 'https://www.instagram.com'

for index in range(len(new_hrefs)):
    print("검색된 키워드 : ", title_list[index].text)
    print("키워드 주소 : ", url_main + new_hrefs[index]['href'])
    print(boardNum[index].text)
    print('-'*50)
    
    
# 그 중에서도 찾는키워드지역에 대한 크롤링 진행
driver = wb.Chrome()
driver.get(url_main + new_hrefs[0]['href'])
time.sleep(0.3)


# 첫 번째 게시글 클릭
btn_board = driver.find_element_by_class_name('eLAPa')
btn_board.click()


# 댓글, 작성자 가지고 오기
def comment_def():
    soup = bs(driver.page_source,'lxml')
    
    commenters = soup.findAll('h3', class_ = '_6lAjh')
    comments = soup.select('h3._6lAjh + span')
    
    for index in range(len(comments)):
        print("작성자 : ", commenters[index].text)
        print("댓글 : ", comments[index].text)
        print('-'*50)

comment_def()


def nextMove():
    driver.find_element_by_class_name('_65Bje.coreSpriteRightPaginationArrow').click()
    time.sleep(2)
nextMove()


# 함수를 이용한 sns크롤링
driver = wb.Chrome()
driver.get(url_main + new_hrefs[0]['href'])
time.sleep(3)

btn_board = driver.find_element_by_class_name('eLAPa')
btn_board.click()

for num in range(10):
    comment_def()
    nextMove()
soup = bs(driver.page_source,'lxml')

'Programming > Web Crawling' 카테고리의 다른 글

이미지 크롤링  (0) 2020.03.02
쇼핑 크롤링  (0) 2020.03.02
카페 모든 메뉴 가져오기  (0) 2020.03.02
카페 지점명, 주소, 전화번호 크롤링  (0) 2020.03.02
실습  (0) 2020.03.02