bot deny, add hearder

This commit is contained in:
2025-05-11 00:51:05 +09:00
parent 084f46edd9
commit 1ad58eaa81
15 changed files with 472 additions and 13 deletions

View File

@ -4,8 +4,15 @@ from bs4 import BeautifulSoup
from datetime import datetime
def getContents(url):
# ✅ User-Agent 헤더 추가 (403 방지용)
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) ' \
'AppleWebKit/537.36 (KHTML, like Gecko) ' \
'Chrome/113.0.0.0 Safari/537.36'
}
# HTTP GET 요청으로 페이지 가져오기
response = requests.get(url)
response = requests.get(url, headers=headers)
# 응답 상태 확인
if response.status_code == 200:
@ -14,6 +21,8 @@ def getContents(url):
# HTML 태그를 제거 후 페이지의 모든 텍스트 가져오기 (전체 내용)
page_content = soup.get_text()
print("### url DEBUG ###")
print(page_content)
# 빈 줄을 제거하고 텍스트만 출력 (줄바꿈 문자를 기준으로 필터링)
lines = [line.strip() for line in page_content.splitlines() if line.strip()]
@ -54,7 +63,7 @@ class WordPress():
# print(f"실패 code:{result.status_code} reason:{result.reason} msg:{result.text}")
if __name__ == "__main__":
# url = 'example_url'
# url = 'https://www.hani.co.kr/arti/science/science_general/1161001.html'
# tmp = getContents(url)
# print(tmp)
pass