import requests, json from urllib.parse import urljoin from bs4 import BeautifulSoup from datetime import datetime def getContents(url): # ✅ User-Agent 헤더 추가 (403 방지용) headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) ' \ 'AppleWebKit/537.36 (KHTML, like Gecko) ' \ 'Chrome/113.0.0.0 Safari/537.36' } # HTTP GET 요청으로 페이지 가져오기 response = requests.get(url, headers=headers) # 응답 상태 확인 if response.status_code == 200: # HTML 파싱 soup = BeautifulSoup(response.text, 'html.parser') # HTML 태그를 제거 후 페이지의 모든 텍스트 가져오기 (전체 내용) page_content = soup.get_text() print("### url DEBUG ###") print(page_content) # 빈 줄을 제거하고 텍스트만 출력 (줄바꿈 문자를 기준으로 필터링) lines = [line.strip() for line in page_content.splitlines() if line.strip()] # 결과 출력 contents = "\n".join(lines) return contents else: print(f"Failed to fetch the URL. Status code: {response.status_code}") class WordPress(): def __init__(self, dict): self.wp_url = dict['wp_url'] self.wp_user = dict['wp_user'] self.wp_api_key = dict['wp_api_key'] def create_post(self, category_id, content, media_id = None, status = "draft", title="파이썬 자동 포스팅"): payload = { "status": status, # publish / draft "title": title, "content": content, "date": datetime.now().isoformat(), # YYYY-MM-DDTHH:MM:SS "categories": category_id } if media_id is not None: payload['featured_media'] = media_id return requests.post(urljoin(self.wp_url, "wp-json/wp/v2/posts"), data=json.dumps(payload), headers={'Content-type': "application/json"}, auth=(self.wp_user, self.wp_api_key)) # if result.ok: # print(f"성공 code:{result.status_code}") # else: # print(f"실패 code:{result.status_code} reason:{result.reason} msg:{result.text}") if __name__ == "__main__": # url = 'https://www.hani.co.kr/arti/science/science_general/1161001.html' # tmp = getContents(url) # print(tmp) pass