diff --git a/dev.py b/dev.py new file mode 100644 index 0000000..426c342 --- /dev/null +++ b/dev.py @@ -0,0 +1,79 @@ +import requests +from bs4 import BeautifulSoup +from markdownify import markdownify as md +from package import GetConfig, MariaDB, ChangeTextToPost, WordPress +import markdown + +# 현재 DB연동 기능은 없음. 실행시키면 URL을 넣어야동작합니다. +def get_naver_blog_content_as_markdown(url): + # 네이버 블로그의 모바일 버전으로 리다이렉트 + mobile_url = url.replace("blog.naver.com", "m.blog.naver.com") + + # 웹브라우저 위장 -------------------------------------------------- + # 제외 하여도 이상 없이 동작하여 제외. + # headers = { + # "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" + # } + + + # response = requests.get(mobile_url, headers=headers) + # --------------------------------------------------------------- + response = requests.get(mobile_url) + + if response.status_code != 200: + print(f"Failed to fetch the page: {response.status_code}") + return None + + # BeautifulSoup으로 HTML 파싱 + soup = BeautifulSoup(response.text, 'html.parser') + + # 본문 추출 (모바일 버전의 본문 클래스 사용) + content = soup.find("div", class_="se-main-container") + + if content: + html_content = str(content) + markdown_content = md(html_content) # HTML → Markdown 변환 + + # 빈 줄 제거 + markdown_content = "\n".join([line for line in markdown_content.splitlines() if line.strip()]) + + return markdown_content + else: + print("Failed to extract the blog content.") + return None + + +# 2024-10-03 환경 변수 호출 +print('### Get values From .env') +config = GetConfig() +dict_data = config.get_config_as_dict() + +# 예제 URL +url = input("Enter your blog address : ") +# markdown_content = get_naver_blog_content_as_markdown(url) +post_article = get_naver_blog_content_as_markdown(url) +post_article = post_article.replace(">", "###") +# if markdown_content: +# print("Markdown Content:") +# print(markdown_content) + + # Markdown 파일로 저장 + # with open("blog_content.md", "w", encoding="utf-8") as file: + # file.write(markdown_content) + # print("Blog content saved as blog_content.md") + +# print('### Convert to HTML - markdown to html') +# # 2024-10-03 Markdown을 HTML로 변환 +# html = markdown.markdown(post_article) +# # 2024-10-03 워드프레스 포스팅 임시등록 +# print('### Create post') +# wp = WordPress(dict_data) +# rs = wp.create_post(2,html) + +# if __name__ == "__main__": +# # print(post_article) +# print("추가 확인을 위한 출력") +# if rs.ok: +# print(f"### 성공 code:{rs.status_code}") +# else: +# print(f"### 실패 code:{rs.status_code} reason:{rs.reason} msg:{rs.text}") diff --git a/main_naver_blog_html.py b/main_naver_blog_html.py index 0396813..b8bab50 100644 --- a/main_naver_blog_html.py +++ b/main_naver_blog_html.py @@ -3,6 +3,7 @@ from bs4 import BeautifulSoup from markdownify import markdownify as md from package import GetConfig, MariaDB, ChangeTextToPost, WordPress import markdown +import re # 현재 DB연동 기능은 없음. 실행시키면 URL을 넣어야동작합니다. def get_naver_blog_content_as_markdown(url): @@ -52,6 +53,8 @@ dict_data = config.get_config_as_dict() url = input("Enter your blog address : ") # markdown_content = get_naver_blog_content_as_markdown(url) post_article = get_naver_blog_content_as_markdown(url) +post_article = post_article.replace(">", "###") +post_article = re.sub(r"^\[!\[\].*?\]", "#### 이미지", post_article, flags=re.MULTILINE) # if markdown_content: # print("Markdown Content:") # print(markdown_content)