From a205c300c0fb2fe71329413eddbf815afc366c87 Mon Sep 17 00:00:00 2001 From: icurfer Date: Tue, 1 Oct 2024 23:45:35 +0900 Subject: [PATCH] =?UTF-8?q?text=EC=B6=94=EC=B6=9C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- translate_article.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/translate_article.py b/translate_article.py index e69de29..0d41d91 100644 --- a/translate_article.py +++ b/translate_article.py @@ -0,0 +1,30 @@ +import requests +from bs4 import BeautifulSoup +import get_url + + +url = get_url.fetch_data_from_mariadb()['url'] + +def getContents(): + # HTTP GET 요청으로 페이지 가져오기 + response = requests.get(url) + + # 응답 상태 확인 + if response.status_code == 200: + # HTML 파싱 + soup = BeautifulSoup(response.text, 'html.parser') + + # HTML 태그를 제거 후 페이지의 모든 텍스트 가져오기 (전체 내용) + page_content = soup.get_text() + + # 빈 줄을 제거하고 텍스트만 출력 (줄바꿈 문자를 기준으로 필터링) + lines = [line.strip() for line in page_content.splitlines() if line.strip()] + + # 결과 출력 + contents = "\n".join(lines) + return contents + else: + print(f"Failed to fetch the URL. Status code: {response.status_code}") + +tmp = getContents() +print(tmp) \ No newline at end of file