from selenium import webdriver as wb
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup as bs
import time
url = 'https://www.instagram.com/'
driver = wb.Chrome()
driver.get(url)
# 손으로 로그인
input_search = driver.find_element_by_css_selector('input.XTCLo')
input_search.send_keys('찾는키워드')
soup = bs(driver.page_source, 'lxml')
title_list = soup.select('span._28KuJ + div span.Ap253')
boardNum = soup.select('div.Fy4o8')
title_list
boardNum
hrefs = soup.select('a.yCE8d')
len(hrefs)
hrefs[0]['href']
new_hrefs = []
for index in range(len(hrefs)):
if 'tags' in hrefs[index]['href']:
new_hrefs.append(hrefs[index])
len(new_hrefs)
url_main = 'https://www.instagram.com'
for index in range(len(new_hrefs)):
print("검색된 키워드 : ", title_list[index].text)
print("키워드 주소 : ", url_main + new_hrefs[index]['href'])
print(boardNum[index].text)
print('-'*50)
# 그 중에서도 찾는키워드지역에 대한 크롤링 진행
driver = wb.Chrome()
driver.get(url_main + new_hrefs[0]['href'])
time.sleep(0.3)
# 첫 번째 게시글 클릭
btn_board = driver.find_element_by_class_name('eLAPa')
btn_board.click()
# 댓글, 작성자 가지고 오기
def comment_def():
soup = bs(driver.page_source,'lxml')
commenters = soup.findAll('h3', class_ = '_6lAjh')
comments = soup.select('h3._6lAjh + span')
for index in range(len(comments)):
print("작성자 : ", commenters[index].text)
print("댓글 : ", comments[index].text)
print('-'*50)
comment_def()
def nextMove():
driver.find_element_by_class_name('_65Bje.coreSpriteRightPaginationArrow').click()
time.sleep(2)
nextMove()
# 함수를 이용한 sns크롤링
driver = wb.Chrome()
driver.get(url_main + new_hrefs[0]['href'])
time.sleep(3)
btn_board = driver.find_element_by_class_name('eLAPa')
btn_board.click()
for num in range(10):
comment_def()
nextMove()
soup = bs(driver.page_source,'lxml')