수프 크롤링 2탄

카테고리 없음

수프 크롤링 2탄

백준파이썬개발자:프로젝트골드 2024. 3. 6. 23:33

from urllib.request import urlopen

from bs4 import BeautifulSoup

url = "https://ai-dev.tistory.com/1?category=836119"

html = urlopen(url)

#print(html.read()) #전체 html보는 방법

bs_obj = BeautifulSoup(html, "html.parser")

# 제목 추출

title = bs_obj.find_all("h1")

print(title)

print(title[1])

print(title[1].text)

# 본문 내용 추출

contents = bs_obj.find_all("p")

#양쪽에 p를 남겨둔형태

print(contents)

# 그중에 하나만 가져오기

print(contents[1])

#사람이 보기 편한 최종형태

print(contents[1].text)

테이블 크롤링

from urllib.request import urlopen
from bs4 import BeautifulSoup 

url = "https://ai-dev.tistory.com/2"
html = urlopen(url)
bs_obj = BeautifulSoup(html, "html.parser")
table_tag = bs_obj.find_all("table")
table_tag01 = table_tag[0].find_all("td")
for i in table_tag01:
  print(i.text)