JUNGU's picture
Update app.py
35745e0 verified
raw
history blame
56.3 kB
import streamlit as st
import pandas as pd
import requests
from bs4 import BeautifulSoup
import re
import time
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from collections import Counter
import json
import os
from datetime import datetime, timedelta
from openai import OpenAI # μ΅œμ‹  방식 import
from dotenv import load_dotenv
import traceback
import plotly.graph_objects as go
import schedule
import threading
import matplotlib.pyplot as plt
# μ›Œλ“œν΄λΌμš°λ“œ μΆ”κ°€
try:
from wordcloud import WordCloud
except ImportError:
st.error("wordcloud νŒ¨ν‚€μ§€λ₯Ό μ„€μΉ˜ν•΄μ£Όμ„Έμš”: pip install wordcloud")
WordCloud = None
# μŠ€μΌ€μ€„λŸ¬ μƒνƒœ 클래슀 μΆ”κ°€
class SchedulerState:
def __init__(self):
self.is_running = False
self.thread = None
self.last_run = None
self.next_run = None
self.scheduled_jobs = []
self.scheduled_results = []
# μ „μ—­ μŠ€μΌ€μ€„λŸ¬ μƒνƒœ 객체 생성
global_scheduler_state = SchedulerState()
# API ν‚€ 관리λ₯Ό μœ„ν•œ μ„Έμ…˜ μƒνƒœ μ΄ˆκΈ°ν™”
if 'openai_api_key' not in st.session_state:
st.session_state.openai_api_key = None
st.session_state.openai_client = None
# μ—¬λŸ¬ λ°©λ²•μœΌλ‘œ API ν‚€ λ‘œλ“œ μ‹œλ„
load_dotenv() # .env νŒŒμΌμ—μ„œ λ‘œλ“œ μ‹œλ„
# 1. ν™˜κ²½ λ³€μˆ˜μ—μ„œ API ν‚€ 확인
if os.environ.get('OPENAI_API_KEY'):
st.session_state.openai_api_key = os.environ.get('OPENAI_API_KEY')
try:
# proxies 인자 없이 ν΄λΌμ΄μ–ΈνŠΈ 생성
st.session_state.openai_client = OpenAI(api_key=st.session_state.openai_api_key)
except Exception as e:
st.error(f"OpenAI ν΄λΌμ΄μ–ΈνŠΈ μ΄ˆκΈ°ν™” 였λ₯˜: {str(e)}")
# 2. Streamlit secretsμ—μ„œ API ν‚€ 확인 (try-except둜 였λ₯˜ λ°©μ§€)
if not st.session_state.openai_api_key:
try:
if 'OPENAI_API_KEY' in st.secrets:
st.session_state.openai_api_key = st.secrets['OPENAI_API_KEY']
try:
st.session_state.openai_client = OpenAI(api_key=st.session_state.openai_api_key)
except Exception as e:
st.error(f"OpenAI ν΄λΌμ΄μ–ΈνŠΈ μ΄ˆκΈ°ν™” 였λ₯˜: {str(e)}")
except Exception as e:
pass # secrets 파일이 없어도 였λ₯˜ λ°œμƒν•˜μ§€ μ•ŠμŒ
# μž„μ‹œ 디렉토리λ₯Ό μ‚¬μš©ν•˜λ„λ‘ NLTK 데이터 경둜 μ„€μ •
nltk_data_dir = '/tmp/nltk_data'
os.makedirs(nltk_data_dir, exist_ok=True)
nltk.data.path.insert(0, nltk_data_dir) # 이 경둜λ₯Ό μš°μ„  κ²€μƒ‰ν•˜λ„λ‘ μ„€μ •
# ν•„μš”ν•œ NLTK 데이터 λ‹€μš΄λ‘œλ“œ
try:
nltk.data.find('tokenizers/punkt')
except LookupError:
nltk.download('punkt', download_dir=nltk_data_dir)
try:
nltk.data.find('corpora/stopwords')
except LookupError:
nltk.download('stopwords', download_dir=nltk_data_dir)
# νŽ˜μ΄μ§€ μ„€μ •
st.set_page_config(page_title="λ‰΄μŠ€ 기사 도ꡬ", page_icon="πŸ“°", layout="wide")
# μ‚¬μ΄λ“œλ°”μ— API ν‚€ μž…λ ₯ ν•„λ“œ μΆ”κ°€
with st.sidebar:
st.title("λ‰΄μŠ€ 기사 도ꡬ")
menu = st.radio(
"메뉴 선택",
["λ‰΄μŠ€ 기사 크둀링", "기사 λΆ„μ„ν•˜κΈ°", "μƒˆ 기사 μƒμ„±ν•˜κΈ°", "λ‰΄μŠ€ 기사 μ˜ˆμ•½ν•˜κΈ°"]
)
st.divider()
api_key = st.text_input("OpenAI API ν‚€ μž…λ ₯", type="password")
if api_key:
st.session_state.openai_api_key = api_key
try:
# proxies 인자 없이 ν΄λΌμ΄μ–ΈνŠΈ 생성
st.session_state.openai_client = OpenAI(api_key=api_key)
st.success("API ν‚€κ°€ μ„€μ •λ˜μ—ˆμŠ΅λ‹ˆλ‹€!")
except Exception as e:
st.error(f"OpenAI ν΄λΌμ΄μ–ΈνŠΈ μ΄ˆκΈ°ν™” 였λ₯˜: {str(e)}")
# μ €μž₯된 기사λ₯Ό λΆˆλŸ¬μ˜€λŠ” ν•¨μˆ˜
def load_saved_articles():
if os.path.exists('/tmp/saved_articles/articles.json'):
with open('/tmp/saved_articles/articles.json', 'r', encoding='utf-8') as f:
return json.load(f)
return []
# 기사λ₯Ό μ €μž₯ν•˜λŠ” ν•¨μˆ˜
def save_articles(articles):
os.makedirs('/tmp/saved_articles', exist_ok=True)
with open('/tmp/saved_articles/articles.json', 'w', encoding='utf-8') as f:
json.dump(articles, f, ensure_ascii=False, indent=2)
@st.cache_data
def crawl_naver_news(keyword, num_articles=5):
"""
넀이버 λ‰΄μŠ€ 기사λ₯Ό μˆ˜μ§‘ν•˜λŠ” ν•¨μˆ˜
"""
url = f"https://search.naver.com/search.naver?where=news&query={keyword}"
results = []
try:
# νŽ˜μ΄μ§€ μš”μ²­
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
# λ‰΄μŠ€ μ•„μ΄ν…œ μ°ΎκΈ°
news_items = soup.select('div.sds-comps-base-layout.sds-comps-full-layout')
# 각 λ‰΄μŠ€ μ•„μ΄ν…œμ—μ„œ 정보 μΆ”μΆœ
for i, item in enumerate(news_items):
if i >= num_articles:
break
try:
# 제λͺ©κ³Ό 링크 μΆ”μΆœ
title_element = item.select_one('a.X0fMYp2dHd0TCUS2hjww span')
if not title_element:
continue
title = title_element.text.strip()
link_element = item.select_one('a.X0fMYp2dHd0TCUS2hjww')
link = link_element['href'] if link_element else ""
# 언둠사 μΆ”μΆœ
press_element = item.select_one('div.sds-comps-profile-info-title span.sds-comps-text-type-body2')
source = press_element.text.strip() if press_element else "μ•Œ 수 μ—†μŒ"
# λ‚ μ§œ μΆ”μΆœ
date_element = item.select_one('span.r0VOr')
date = date_element.text.strip() if date_element else "μ•Œ 수 μ—†μŒ"
# 미리보기 λ‚΄μš© μΆ”μΆœ
desc_element = item.select_one('a.X0fMYp2dHd0TCUS2hjww.IaKmSOGPdofdPwPE6cyU > span')
description = desc_element.text.strip() if desc_element else "λ‚΄μš© μ—†μŒ"
results.append({
'title': title,
'link': link,
'description': description,
'source': source,
'date': date,
'content': "" # λ‚˜μ€‘μ— 원문 λ‚΄μš©μ„ μ €μž₯ν•  ν•„λ“œ
})
except Exception as e:
st.error(f"기사 정보 μΆ”μΆœ 쀑 였λ₯˜ λ°œμƒ: {str(e)}")
continue
except Exception as e:
st.error(f"νŽ˜μ΄μ§€ μš”μ²­ 쀑 였λ₯˜ λ°œμƒ: {str(e)}")
return results
# 기사 원문 κ°€μ Έμ˜€κΈ°
def get_article_content(url):
try:
response = requests.get(url, timeout=5)
soup = BeautifulSoup(response.text, 'html.parser')
# 넀이버 λ‰΄μŠ€ λ³Έλ¬Έ μ°ΎκΈ°
content = soup.select_one('#dic_area')
if content:
text = content.text.strip()
text = re.sub(r'\s+', ' ', text) # μ—¬λŸ¬ 곡백 제거
return text
# λ‹€λ₯Έ λ‰΄μŠ€ μ‚¬μ΄νŠΈ λ³Έλ¬Έ μ°ΎκΈ° (μ—¬λŸ¬ μ‚¬μ΄νŠΈ λŒ€μ‘ ν•„μš”)
content = soup.select_one('.article_body, .article-body, .article-content, .news-content-inner')
if content:
text = content.text.strip()
text = re.sub(r'\s+', ' ', text)
return text
return "본문을 κ°€μ Έμ˜¬ 수 μ—†μŠ΅λ‹ˆλ‹€."
except Exception as e:
return f"였λ₯˜ λ°œμƒ: {str(e)}"
# NLTKλ₯Ό μ΄μš©ν•œ ν‚€μ›Œλ“œ 뢄석
def analyze_keywords(text, top_n=10):
# ν•œκ΅­μ–΄ λΆˆμš©μ–΄ λͺ©λ‘ (직접 μ •μ˜ν•΄μ•Ό ν•©λ‹ˆλ‹€)
korean_stopwords = ['이', 'κ·Έ', 'μ €', '것', '및', 'λ“±', 'λ₯Ό', '을', '에', 'μ—μ„œ', '의', '으둜', '둜']
tokens = word_tokenize(text)
tokens = [word for word in tokens if word.isalnum() and len(word) > 1 and word not in korean_stopwords]
word_count = Counter(tokens)
top_keywords = word_count.most_common(top_n)
return top_keywords
#μ›Œλ“œ ν΄λΌμš°λ“œμš© 뢄석
def extract_keywords_for_wordcloud(text, top_n=50):
if not text or len(text.strip()) < 10:
return {}
try:
try:
tokens = word_tokenize(text.lower())
except Exception as e:
st.warning(f"{str(e)} 였λ₯˜λ°œμƒ")
tokens = text.lower().split()
stop_words = set()
try:
stop_words = set(stopwords.words('english'))
except Exception:
pass
korea_stop_words = {
'및', 'λ“±', 'λ₯Ό', '이', '의', 'κ°€', '에', 'λŠ”', '으둜', 'μ—μ„œ', 'κ·Έ', '또', 'λ˜λŠ”', 'ν•˜λŠ”', 'ν• ', 'ν•˜κ³ ',
'μžˆλ‹€', '이닀', 'μœ„ν•΄', '것이닀', '것은', 'λŒ€ν•œ', 'λ•Œλ¬Έ', '그리고', 'ν•˜μ§€λ§Œ', 'κ·ΈλŸ¬λ‚˜', 'κ·Έλž˜μ„œ',
'μž…λ‹ˆλ‹€', 'ν•©λ‹ˆλ‹€', 'μŠ΅λ‹ˆλ‹€', 'μš”', 'μ£ ', 'κ³ ', 'κ³Ό', '와', '도', '은', '수', '것', 'λ“€', '제', 'μ €',
'λ…„', 'μ›”', '일', 'μ‹œ', 'λΆ„', '초', 'μ§€λ‚œ', 'μ˜¬ν•΄', 'λ‚΄λ…„', '졜근', 'ν˜„μž¬', '였늘', '내일', 'μ–΄μ œ',
'μ˜€μ „', 'μ˜€ν›„', 'λΆ€ν„°', 'κΉŒμ§€', 'μ—κ²Œ', 'κ»˜μ„œ', '이라고', '라고', 'ν•˜λ©°', 'ν•˜λ©΄μ„œ', '따라', '톡해',
'κ΄€λ ¨', 'ν•œνŽΈ', '특히', 'κ°€μž₯', '맀우', '더', '덜', '많이', '쑰금', '항상', '자주', '가끔', '거의',
'μ „ν˜€', 'λ°”λ‘œ', '정말', 'λ§Œμ•½', 'λΉ„λ‘―ν•œ', '등을', '등이', 'λ“±μ˜', 'λ“±κ³Ό', '등도', '등에', 'λ“±μ—μ„œ',
'기자', 'λ‰΄μŠ€', '사진', 'μ—°ν•©λ‰΄μŠ€', 'λ‰΄μ‹œμŠ€', '제곡', '무단', 'μ „μž¬', '재배포', 'κΈˆμ§€', '액컀', '멘트',
'일보', '데일리', '경제', 'μ‚¬νšŒ', 'μ •μΉ˜', '세계', 'κ³Όν•™', '아이티', 'λ‹·μ»΄', '씨넷', 'λΈ”λ‘œν„°', 'μ „μžμ‹ λ¬Έ'
}
stop_words.update(korea_stop_words)
# 1κΈ€μž 이상이고 λΆˆμš©μ–΄κ°€ μ•„λ‹Œ ν† ν°λ§Œ 필터링
filtered_tokens = [word for word in tokens if len(word) > 1 and word not in stop_words]
# 단어 λΉˆλ„ 계산
word_freq = {}
for word in filtered_tokens:
if word.isalnum(): # μ•ŒνŒŒλ²³κ³Ό 숫자만 ν¬ν•¨λœ λ‹¨μ–΄λ§Œ ν—ˆμš©
word_freq[word] = word_freq.get(word, 0) + 1
# λΉˆλ„μˆœμœΌλ‘œ μ •λ ¬ν•˜μ—¬ μƒμœ„ n개 λ°˜ν™˜
sorted_words = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)
if not sorted_words:
return {"data": 1, "analysis": 1, "news": 1}
return dict(sorted_words[:top_n])
except Exception as e:
st.error(f"였λ₯˜λ°œμƒ {str(e)}")
return {"data": 1, "analysis": 1, "news": 1}
# μ›Œλ“œ ν΄λΌμš°λ“œ 생성 ν•¨μˆ˜
def generate_wordcloud(keywords_dict):
if not WordCloud:
st.warning("μ›Œλ“œν΄λΌμš°λ“œ μ„€μΉ˜μ•ˆλ˜μ–΄ μžˆμŠ΅λ‹ˆλ‹€.")
return None
try:
wc= WordCloud(
width=800,
height=400,
background_color = 'white',
colormap = 'viridis',
max_font_size=150,
random_state=42
).generate_from_frequencies(keywords_dict)
try:
possible_font_paths=["NanumGothic.ttf", "이름"]
font_path = None
for path in possible_font_paths:
if os.path.exists(path):
font_path = path
break
if font_path:
wc= WordCloud(
font_path=font_path,
width=800,
height=400,
background_color = 'white',
colormap = 'viridis',
max_font_size=150,
random_state=42
).generate_from_frequencies(keywords_dict)
except Exception as e:
print(f"였λ₯˜λ°œμƒ {str(e)}")
return wc
except Exception as e:
st.error(f"였λ₯˜λ°œμƒ {str(e)}")
return None
# λ‰΄μŠ€ 뢄석 ν•¨μˆ˜
def analyze_news_content(news_df):
if news_df.empty:
return "데이터가 μ—†μŠ΅λ‹ˆλ‹€"
results = {}
#μΉ΄ν…Œκ³ λ¦¬λ³„
if 'source' in news_df.columns:
results['source_counts'] = news_df['source'].value_counts().to_dict()
#μΉ΄ν…Œκ³ λ¦¬λ³„
if 'date' in news_df.columns:
results['date_counts'] = news_df['date'].value_counts().to_dict()
#ν‚€μ›Œλ“œλΆ„μ„
all_text = " ".join(news_df['title'].fillna('') + " " + news_df['content'].fillna(''))
if len(all_text.strip()) > 0:
results['top_keywords_for_wordcloud']= extract_keywords_for_wordcloud(all_text, top_n=50)
results['top_keywords'] = analyze_keywords(all_text)
else:
results['top_keywords_for_wordcloud']={}
results['top_keywords'] = []
return results
# OpenAI APIλ₯Ό μ΄μš©ν•œ μƒˆ 기사 생성 (μ΅œμ‹  방식)
def generate_article(original_content, prompt_text):
try:
if not st.session_state.openai_client:
return "OpenAI API ν‚€κ°€ μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€."
response = st.session_state.openai_client.chat.completions.create(
model="gpt-4.1-mini",
messages=[
{"role": "system", "content": "당신은 전문적인 λ‰΄μŠ€ κΈ°μžμž…λ‹ˆλ‹€. μ£Όμ–΄μ§„ λ‚΄μš©μ„ λ°”νƒ•μœΌλ‘œ μƒˆλ‘œμš΄ 기사λ₯Ό μž‘μ„±ν•΄μ£Όμ„Έμš”."},
{"role": "user", "content": f"λ‹€μŒ λ‚΄μš©μ„ λ°”νƒ•μœΌλ‘œ {prompt_text}\n\n{original_content[:1000]}"}
],
max_tokens=2000
)
return response.choices[0].message.content
except Exception as e:
return f"기사 생성 였λ₯˜: {str(e)}"
# OpenAI APIλ₯Ό μ΄μš©ν•œ 이미지 생성 (μ΅œμ‹  방식)
def generate_image(prompt):
try:
if not st.session_state.openai_client:
return "OpenAI API ν‚€κ°€ μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€."
response = st.session_state.openai_client.images.generate(
model="dall-e-3", # λ˜λŠ” μ‚¬μš© κ°€λŠ₯ν•œ λͺ¨λΈ
prompt=prompt,
n=1,
size="1024x1024"
)
return response.data[0].url # μ΅œμ‹  APIλŠ” URL만 λ°˜ν™˜
except Exception as e:
return f"이미지 생성 였λ₯˜: {str(e)}"
# μŠ€μΌ€μ€„λŸ¬ κ΄€λ ¨ ν•¨μˆ˜λ“€
def get_next_run_time(hour, minute):
now = datetime.now()
next_run = now.replace(hour=hour, minute=minute, second=0, microsecond=0)
if next_run <= now:
next_run += timedelta(days=1)
return next_run
def run_scheduled_task():
try:
while global_scheduler_state.is_running:
schedule.run_pending()
time.sleep(1)
except Exception as e:
print(f"μŠ€μΌ€μ€„λŸ¬ μ—λŸ¬ λ°œμƒ: {e}")
traceback.print_exc()
def perform_news_task(task_type, keyword, num_articles, file_prefix):
try:
articles = crawl_naver_news(keyword, num_articles)
# 기사 λ‚΄μš© κ°€μ Έμ˜€κΈ°
for article in articles:
article['content'] = get_article_content(article['link'])
time.sleep(0.5) # μ„œλ²„ λΆ€ν•˜ λ°©μ§€
# κ²°κ³Ό μ €μž₯
os.makedirs('/tmp/scheduled_news', exist_ok=True)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"/tmp/scheduled_news/{file_prefix}_{task_type}_{timestamp}.json"
with open(filename, 'w', encoding='utf-8') as f:
json.dump(articles, f, ensure_ascii=False, indent=2)
global_scheduler_state.last_run = datetime.now()
print(f"{datetime.now()} - {task_type} λ‰΄μŠ€ 기사 μˆ˜μ§‘ μ™„λ£Œ: {keyword}")
# μ „μ—­ μƒνƒœμ— μˆ˜μ§‘ κ²°κ³Όλ₯Ό μ €μž₯ (UI μ—…λ°μ΄νŠΈμš©)
result_item = {
'task_type': task_type,
'keyword': keyword,
'timestamp': timestamp,
'num_articles': len(articles),
'filename': filename
}
global_scheduler_state.scheduled_results.append(result_item)
except Exception as e:
print(f"μž‘μ—… μ‹€ν–‰ 쀑 였λ₯˜ λ°œμƒ: {e}")
traceback.print_exc()
def start_scheduler(daily_tasks, interval_tasks):
if not global_scheduler_state.is_running:
schedule.clear()
global_scheduler_state.scheduled_jobs = []
# 일별 νƒœμŠ€ν¬ 등둝
for task in daily_tasks:
hour = task['hour']
minute = task['minute']
keyword = task['keyword']
num_articles = task['num_articles']
job_id = f"daily_{keyword}_{hour}_{minute}"
schedule.every().day.at(f"{hour:02d}:{minute:02d}").do(
perform_news_task, "daily", keyword, num_articles, job_id
).tag(job_id)
global_scheduler_state.scheduled_jobs.append({
'id': job_id,
'type': 'daily',
'time': f"{hour:02d}:{minute:02d}",
'keyword': keyword,
'num_articles': num_articles
})
# μ‹œκ°„ 간격 νƒœμŠ€ν¬ 등둝
for task in interval_tasks:
interval_minutes = task['interval_minutes']
keyword = task['keyword']
num_articles = task['num_articles']
run_immediately = task['run_immediately']
job_id = f"interval_{keyword}_{interval_minutes}"
if run_immediately:
# μ¦‰μ‹œ μ‹€ν–‰
perform_news_task("interval", keyword, num_articles, job_id)
# λΆ„ κ°„κ²©μœΌλ‘œ μ˜ˆμ•½
schedule.every(interval_minutes).minutes.do(
perform_news_task, "interval", keyword, num_articles, job_id
).tag(job_id)
global_scheduler_state.scheduled_jobs.append({
'id': job_id,
'type': 'interval',
'interval': f"{interval_minutes}λΆ„λ§ˆλ‹€",
'keyword': keyword,
'num_articles': num_articles,
'run_immediately': run_immediately
})
# λ‹€μŒ μ‹€ν–‰ μ‹œκ°„ 계산
next_run = schedule.next_run()
if next_run:
global_scheduler_state.next_run = next_run
# μŠ€μΌ€μ€„λŸ¬ μ“°λ ˆλ“œ μ‹œμž‘
global_scheduler_state.is_running = True
global_scheduler_state.thread = threading.Thread(
target=run_scheduled_task, daemon=True
)
global_scheduler_state.thread.start()
# μƒνƒœλ₯Ό μ„Έμ…˜ μƒνƒœλ‘œλ„ 볡사 (UI ν‘œμ‹œμš©)
if 'scheduler_status' not in st.session_state:
st.session_state.scheduler_status = {}
st.session_state.scheduler_status = {
'is_running': global_scheduler_state.is_running,
'last_run': global_scheduler_state.last_run,
'next_run': global_scheduler_state.next_run,
'jobs_count': len(global_scheduler_state.scheduled_jobs)
}
def stop_scheduler():
if global_scheduler_state.is_running:
global_scheduler_state.is_running = False
schedule.clear()
if global_scheduler_state.thread:
global_scheduler_state.thread.join(timeout=1)
global_scheduler_state.next_run = None
global_scheduler_state.scheduled_jobs = []
# UI μƒνƒœ μ—…λ°μ΄νŠΈ
if 'scheduler_status' in st.session_state:
st.session_state.scheduler_status['is_running'] = False
# 메뉴에 λ”°λ₯Έ ν™”λ©΄ ν‘œμ‹œ
if menu == "λ‰΄μŠ€ 기사 크둀링":
st.header("λ‰΄μŠ€ 기사 크둀링")
keyword = st.text_input("검색어 μž…λ ₯", "인곡지λŠ₯")
num_articles = st.slider("κ°€μ Έμ˜¬ 기사 수", min_value=1, max_value=20, value=5)
if st.button("기사 κ°€μ Έμ˜€κΈ°"):
with st.spinner("기사λ₯Ό μˆ˜μ§‘ μ€‘μž…λ‹ˆλ‹€..."):
articles = crawl_naver_news(keyword, num_articles)
# 기사 λ‚΄μš© κ°€μ Έμ˜€κΈ°
for i, article in enumerate(articles):
st.progress((i + 1) / len(articles))
article['content'] = get_article_content(article['link'])
time.sleep(0.5) # μ„œλ²„ λΆ€ν•˜ λ°©μ§€
# κ²°κ³Ό μ €μž₯ 및 ν‘œμ‹œ
save_articles(articles)
st.success(f"{len(articles)}개의 기사λ₯Ό μˆ˜μ§‘ν–ˆμŠ΅λ‹ˆλ‹€!")
# μˆ˜μ§‘ν•œ 기사 ν‘œμ‹œ
for article in articles:
with st.expander(f"{article['title']} - {article['source']}"):
st.write(f"**좜처:** {article['source']}")
st.write(f"**λ‚ μ§œ:** {article['date']}")
st.write(f"**μš”μ•½:** {article['description']}")
st.write(f"**링크:** {article['link']}")
st.write("**본문 미리보기:**")
st.write(article['content'][:300] + "..." if len(article['content']) > 300 else article['content'])
elif menu == "기사 λΆ„μ„ν•˜κΈ°":
st.header("기사 λΆ„μ„ν•˜κΈ°")
articles = load_saved_articles()
if not articles:
st.warning("μ €μž₯된 기사가 μ—†μŠ΅λ‹ˆλ‹€. λ¨Όμ € 'λ‰΄μŠ€ 기사 크둀링' λ©”λ‰΄μ—μ„œ 기사λ₯Ό μˆ˜μ§‘ν•΄μ£Όμ„Έμš”.")
else:
# 기사 선택
titles = [article['title'] for article in articles]
selected_title = st.selectbox("뢄석할 기사 선택", titles)
selected_article = next((a for a in articles if a['title'] == selected_title), None)
if selected_article:
st.write(f"**제λͺ©:** {selected_article['title']}")
st.write(f"**좜처:** {selected_article['source']}")
# λ³Έλ¬Έ ν‘œμ‹œ
with st.expander("기사 λ³Έλ¬Έ 보기"):
st.write(selected_article['content'])
# 뢄석 방법 선택
analysis_type = st.radio(
"뢄석 방법",
["ν‚€μ›Œλ“œ 뢄석", "감정 뢄석", "ν…μŠ€νŠΈ 톡계"]
)
if analysis_type == "ν‚€μ›Œλ“œ 뢄석":
if st.button("ν‚€μ›Œλ“œ λΆ„μ„ν•˜κΈ°"):
with st.spinner("ν‚€μ›Œλ“œλ₯Ό 뢄석 μ€‘μž…λ‹ˆλ‹€..."):
keyword_tab1, keyword_tab2 = st.tabs(["ν‚€μ›Œλ“œ λΉˆλ„", "μ›Œλ“œν΄λΌμš°λ“œ"])
with keyword_tab1:
keywords = analyze_keywords(selected_article['content'])
# μ‹œκ°ν™”
df = pd.DataFrame(keywords, columns=['단어', 'λΉˆλ„μˆ˜'])
st.bar_chart(df.set_index('단어'))
st.write("**μ£Όμš” ν‚€μ›Œλ“œ:**")
for word, count in keywords:
st.write(f"- {word}: {count}회")
with keyword_tab2:
keyword_dict = extract_keywords_for_wordcloud(selected_article['content'])
wc = generate_wordcloud(keyword_dict)
if wc:
fig, ax = plt.subplots(figsize=(10, 5))
ax.imshow(wc, interpolation='bilinear')
ax.axis('off')
st.pyplot(fig)
# ν‚€μ›Œλ“œ μƒμœ„ 20개 ν‘œμ‹œ
st.write("**μƒμœ„ 20개 ν‚€μ›Œλ“œ:**")
top_keywords = sorted(keyword_dict.items(), key=lambda x: x[1], reverse=True)[:20]
keyword_df = pd.DataFrame(top_keywords, columns=['ν‚€μ›Œλ“œ', 'λΉˆλ„'])
st.dataframe(keyword_df)
else:
st.error("μ›Œλ“œν΄λΌμš°λ“œλ₯Ό 생성할 수 μ—†μŠ΅λ‹ˆλ‹€.")
elif analysis_type == "ν…μŠ€νŠΈ 톡계":
if st.button("ν…μŠ€νŠΈ 톡계 뢄석"):
content = selected_article['content']
# ν…μŠ€νŠΈ 톡계 계산
word_count = len(re.findall(r'\b\w+\b', content))
char_count = len(content)
sentence_count = len(re.split(r'[.!?]+', content))
avg_word_length = sum(len(word) for word in re.findall(r'\b\w+\b', content)) / word_count if word_count > 0 else 0
avg_sentence_length = word_count / sentence_count if sentence_count > 0 else 0
# 톡계 ν‘œμ‹œ
st.subheader("ν…μŠ€νŠΈ 톡계")
col1, col2, col3 = st.columns(3)
with col1:
st.metric("단어 수", f"{word_count:,}")
with col2:
st.metric("문자 수", f"{char_count:,}")
with col3:
st.metric("λ¬Έμž₯ 수", f"{sentence_count:,}")
col1, col2 = st.columns(2)
with col1:
st.metric("평균 단어 길이", f"{avg_word_length:.1f}자")
with col2:
st.metric("평균 λ¬Έμž₯ 길이", f"{avg_sentence_length:.1f}단어")
# ν…μŠ€νŠΈ λ³΅μž‘μ„± 점수 (κ°„λ‹¨ν•œ μ˜ˆμ‹œ)
complexity_score = min(10, (avg_sentence_length / 10) * 5 + (avg_word_length / 5) * 5)
st.progress(complexity_score / 10)
st.write(f"ν…μŠ€νŠΈ λ³΅μž‘μ„± 점수: {complexity_score:.1f}/10")
# μΆœν˜„ λΉˆλ„ λ§‰λŒ€ κ·Έλž˜ν”„
st.subheader("ν’ˆμ‚¬λ³„ 뢄포 (ν•œκ΅­μ–΄/μ˜μ–΄ 지원)")
try:
# KoNLPy μ„€μΉ˜ 확인
try:
from konlpy.tag import Okt
konlpy_installed = True
except ImportError:
konlpy_installed = False
st.warning("ν•œκ΅­μ–΄ ν˜•νƒœμ†Œ 뢄석을 μœ„ν•΄ KoNLPyλ₯Ό μ„€μΉ˜ν•΄μ£Όμ„Έμš”: pip install konlpy")
# μ˜μ–΄ POS tagger μ€€λΉ„
from nltk import pos_tag
try:
nltk.data.find('taggers/averaged_perceptron_tagger')
except LookupError:
nltk.download('averaged_perceptron_tagger', download_dir=nltk_data_dir)
# μ–Έμ–΄ 감지 (κ°„λ‹¨ν•œ 방식)
is_korean = bool(re.search(r'[κ°€-힣]', content))
if is_korean and konlpy_installed:
# ν•œκ΅­μ–΄ ν˜•νƒœμ†Œ 뢄석
okt = Okt()
tagged = okt.pos(content)
# ν•œκ΅­μ–΄ ν’ˆμ‚¬ λ§€ν•‘
pos_dict = {
'Noun': 'λͺ…사', 'NNG': 'λͺ…사', 'NNP': '고유λͺ…사',
'Verb': '동사', 'VV': '동사', 'VA': 'ν˜•μš©μ‚¬',
'Adjective': 'ν˜•μš©μ‚¬',
'Adverb': '뢀사',
'Josa': '쑰사', 'Punctuation': 'ꡬ두점',
'Determiner': 'κ΄€ν˜•μ‚¬', 'Exclamation': '감탄사'
}
pos_counts = {'λͺ…사': 0, '동사': 0, 'ν˜•μš©μ‚¬': 0, '뢀사': 0, '쑰사': 0, 'ꡬ두점': 0, 'κ΄€ν˜•μ‚¬': 0, '감탄사': 0, '기타': 0}
for _, pos in tagged:
if pos in pos_dict:
pos_counts[pos_dict[pos]] += 1
elif pos.startswith('N'): # 기타 λͺ…사λ₯˜
pos_counts['λͺ…사'] += 1
elif pos.startswith('V'): # 기타 동사λ₯˜
pos_counts['동사'] += 1
else:
pos_counts['기타'] += 1
else:
# μ˜μ–΄ POS νƒœκΉ…
tokens = word_tokenize(content.lower())
tagged = pos_tag(tokens)
# μ˜μ–΄ ν’ˆμ‚¬ λ§€ν•‘
pos_dict = {
'NN': 'λͺ…사', 'NNS': 'λͺ…사', 'NNP': '고유λͺ…사', 'NNPS': '고유λͺ…사',
'VB': '동사', 'VBD': '동사', 'VBG': '동사', 'VBN': '동사', 'VBP': '동사', 'VBZ': '동사',
'JJ': 'ν˜•μš©μ‚¬', 'JJR': 'ν˜•μš©μ‚¬', 'JJS': 'ν˜•μš©μ‚¬',
'RB': '뢀사', 'RBR': '뢀사', 'RBS': '뢀사'
}
pos_counts = {'λͺ…사': 0, '동사': 0, 'ν˜•μš©μ‚¬': 0, '뢀사': 0, '기타': 0}
for _, pos in tagged:
if pos in pos_dict:
pos_counts[pos_dict[pos]] += 1
else:
pos_counts['기타'] += 1
# κ²°κ³Ό μ‹œκ°ν™”
pos_df = pd.DataFrame({
'ν’ˆμ‚¬': list(pos_counts.keys()),
'λΉˆλ„': list(pos_counts.values())
})
st.bar_chart(pos_df.set_index('ν’ˆμ‚¬'))
if is_korean:
st.info("ν•œκ΅­μ–΄ ν…μŠ€νŠΈκ°€ κ°μ§€λ˜μ—ˆμŠ΅λ‹ˆλ‹€.")
else:
st.info("μ˜μ–΄ ν…μŠ€νŠΈκ°€ κ°μ§€λ˜μ—ˆμŠ΅λ‹ˆλ‹€.")
except Exception as e:
st.error(f"ν’ˆμ‚¬ 뢄석 쀑 였λ₯˜ λ°œμƒ: {str(e)}")
st.error(traceback.format_exc())
elif analysis_type == "감정 뢄석":
if st.button("감정 λΆ„μ„ν•˜κΈ°"):
if st.session_state.openai_client:
with st.spinner("κΈ°μ‚¬μ˜ 감정을 뢄석 μ€‘μž…λ‹ˆλ‹€..."):
try:
# 감정 뢄석 ν”„λ‘¬ν”„νŠΈ μ„€μ • (μ΅œμ‹  방식)
response = st.session_state.openai_client.chat.completions.create(
model="gpt-4.1-mini",
messages=[
{"role": "system", "content": "당신은 ν…μŠ€νŠΈμ˜ 감정과 λ…Όμ‘°λ₯Ό λΆ„μ„ν•˜λŠ” μ „λ¬Έκ°€μž…λ‹ˆλ‹€. λ‹€μŒ λ‰΄μŠ€ κΈ°μ‚¬μ˜ 감정과 λ…Όμ‘°λ₯Ό λΆ„μ„ν•˜κ³ , '긍정적', '뢀정적', '쀑립적' 쀑 ν•˜λ‚˜λ‘œ λΆ„λ₯˜ν•΄ μ£Όμ„Έμš”. λ˜ν•œ κΈ°μ‚¬μ—μ„œ λ“œλŸ¬λ‚˜λŠ” 핡심 감정 ν‚€μ›Œλ“œλ₯Ό 5개 μΆ”μΆœν•˜κ³ , 각 ν‚€μ›Œλ“œλ³„λ‘œ 1-10 μ‚¬μ΄μ˜ 강도 점수λ₯Ό λ§€κ²¨μ£Όμ„Έμš”. JSON ν˜•μ‹μœΌλ‘œ λ‹€μŒκ³Ό 같이 μ‘λ‹΅ν•΄μ£Όμ„Έμš”: {'sentiment': '긍정적/뢀정적/쀑립적', 'reason': '이유 μ„€λͺ…...', 'keywords': [{'word': 'ν‚€μ›Œλ“œ1', 'score': 8}, {'word': 'ν‚€μ›Œλ“œ2', 'score': 7}, ...]}"},
{"role": "user", "content": f"λ‹€μŒ λ‰΄μŠ€ 기사λ₯Ό 뢄석해 μ£Όμ„Έμš”:\n\n제λͺ©: {selected_article['title']}\n\nλ‚΄μš©: {selected_article['content'][:1500]}"}
],
max_tokens=800,
response_format={"type": "json_object"}
)
# JSON νŒŒμ‹± (μ΅œμ‹  방식)
analysis_result = json.loads(response.choices[0].message.content)
# κ²°κ³Ό μ‹œκ°ν™”
st.subheader("감정 뢄석 κ²°κ³Ό")
# 1. 감정 νƒ€μž…μ— λ”°λ₯Έ μ‹œκ°μ  ν‘œν˜„
sentiment_type = analysis_result.get('sentiment', '쀑립적')
col1, col2, col3 = st.columns([1, 3, 1])
with col2:
if sentiment_type == "긍정적":
st.markdown(f"""
<div style="background-color:#DCEDC8; padding:20px; border-radius:10px; text-align:center;">
<h1 style="color:#388E3C; font-size:28px;">πŸ˜€ 긍정적 λ…Όμ‘° πŸ˜€</h1>
<p style="font-size:16px;">감정 강도: λ†’μŒ</p>
</div>
""", unsafe_allow_html=True)
elif sentiment_type == "뢀정적":
st.markdown(f"""
<div style="background-color:#FFCDD2; padding:20px; border-radius:10px; text-align:center;">
<h1 style="color:#D32F2F; font-size:28px;">😞 뢀정적 λ…Όμ‘° 😞</h1>
<p style="font-size:16px;">감정 강도: λ†’μŒ</p>
</div>
""", unsafe_allow_html=True)
else:
st.markdown(f"""
<div style="background-color:#E0E0E0; padding:20px; border-radius:10px; text-align:center;">
<h1 style="color:#616161; font-size:28px;">😐 쀑립적 λ…Όμ‘° 😐</h1>
<p style="font-size:16px;">감정 강도: 쀑간</p>
</div>
""", unsafe_allow_html=True)
# 2. 이유 μ„€λͺ…
st.markdown("### 뢄석 κ·Όκ±°")
st.markdown(f"<div style='background-color:#F5F5F5; padding:15px; border-radius:5px;'>{analysis_result.get('reason', '')}</div>", unsafe_allow_html=True)
# 3. 감정 ν‚€μ›Œλ“œ μ‹œκ°ν™”
st.markdown("### 핡심 감정 ν‚€μ›Œλ“œ")
# ν‚€μ›Œλ“œ 데이터 μ€€λΉ„
keywords = analysis_result.get('keywords', [])
if keywords:
# λ§‰λŒ€ 차트용 데이터
keyword_names = [item.get('word', '') for item in keywords]
keyword_scores = [item.get('score', 0) for item in keywords]
# λ ˆμ΄λ” 차트 생성
fig = go.Figure()
# 색상 μ„€μ •
if sentiment_type == "긍정적":
fill_color = 'rgba(76, 175, 80, 0.3)' # μ—°ν•œ μ΄ˆλ‘μƒ‰
line_color = 'rgba(76, 175, 80, 1)' # μ§„ν•œ μ΄ˆλ‘μƒ‰
elif sentiment_type == "뢀정적":
fill_color = 'rgba(244, 67, 54, 0.3)' # μ—°ν•œ 빨간색
line_color = 'rgba(244, 67, 54, 1)' # μ§„ν•œ 빨간색
else:
fill_color = 'rgba(158, 158, 158, 0.3)' # μ—°ν•œ νšŒμƒ‰
line_color = 'rgba(158, 158, 158, 1)' # μ§„ν•œ νšŒμƒ‰
# λ ˆμ΄λ” 차트 데이터 μ€€λΉ„ - λ§ˆμ§€λ§‰ 점이 첫 점과 μ—°κ²°λ˜λ„λ‘ 데이터 μΆ”κ°€
radar_keywords = keyword_names.copy()
radar_scores = keyword_scores.copy()
# λ ˆμ΄λ” 차트 생성
fig.add_trace(go.Scatterpolar(
r=radar_scores,
theta=radar_keywords,
fill='toself',
fillcolor=fill_color,
line=dict(color=line_color, width=2),
name='감정 ν‚€μ›Œλ“œ'
))
# λ ˆμ΄λ” 차트 λ ˆμ΄μ•„μ›ƒ μ„€μ •
fig.update_layout(
polar=dict(
radialaxis=dict(
visible=True,
range=[0, 10],
tickmode='linear',
tick0=0,
dtick=2
)
),
showlegend=False,
title={
'text': '감정 ν‚€μ›Œλ“œ λ ˆμ΄λ” 뢄석',
'y':0.95,
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'
},
height=500,
width=500,
margin=dict(l=80, r=80, t=80, b=80)
)
# 차트 쀑앙에 ν‘œμ‹œ
col1, col2, col3 = st.columns([1, 2, 1])
with col2:
st.plotly_chart(fig)
# ν‚€μ›Œλ“œ μΉ΄λ“œλ‘œ ν‘œμ‹œ
st.markdown("#### ν‚€μ›Œλ“œ μ„ΈλΆ€ μ„€λͺ…")
cols = st.columns(min(len(keywords), 5))
for i, keyword in enumerate(keywords):
with cols[i % len(cols)]:
word = keyword.get('word', '')
score = keyword.get('score', 0)
# μ μˆ˜μ— λ”°λ₯Έ 색상 계산
r, g, b = 0, 0, 0
if sentiment_type == "긍정적":
g = min(200 + score * 5, 255)
r = max(255 - score * 20, 100)
elif sentiment_type == "뢀정적":
r = min(200 + score * 5, 255)
g = max(255 - score * 20, 100)
else:
r = g = b = 128
# μΉ΄λ“œ 생성
st.markdown(f"""
<div style="background-color:rgba({r},{g},{b},0.2); padding:10px; border-radius:5px; text-align:center; margin:5px;">
<h3 style="margin:0;">{word}</h3>
<div style="background-color:#E0E0E0; border-radius:3px; margin-top:5px;">
<div style="width:{score*10}%; background-color:rgba({r},{g},{b},0.8); height:10px; border-radius:3px;"></div>
</div>
<p style="margin:2px; font-size:12px;">강도: {score}/10</p>
</div>
""", unsafe_allow_html=True)
else:
st.info("ν‚€μ›Œλ“œλ₯Ό μΆ”μΆœν•˜μ§€ λͺ»ν–ˆμŠ΅λ‹ˆλ‹€.")
# 4. μš”μ•½ 톡계
st.markdown("### μ£Όμš” 톡계")
col1, col2, col3 = st.columns(3)
with col1:
st.metric(label="긍정/λΆ€μ • 점수", value=f"{7 if sentiment_type == '긍정적' else 3 if sentiment_type == '뢀정적' else 5}/10")
with col2:
st.metric(label="ν‚€μ›Œλ“œ 수", value=len(keywords))
with col3:
avg_score = sum(keyword_scores) / len(keyword_scores) if keyword_scores else 0
st.metric(label="평균 강도", value=f"{avg_score:.1f}/10")
except Exception as e:
st.error(f"감정 뢄석 였λ₯˜: {str(e)}")
st.code(traceback.format_exc())
else:
st.warning("OpenAI API ν‚€κ°€ μ„€μ •λ˜μ–΄ μžˆμ§€ μ•ŠμŠ΅λ‹ˆλ‹€. μ‚¬μ΄λ“œλ°”μ—μ„œ API ν‚€λ₯Ό μ„€μ •ν•΄μ£Όμ„Έμš”.")
elif menu == "μƒˆ 기사 μƒμ„±ν•˜κΈ°":
st.header("μƒˆ 기사 μƒμ„±ν•˜κΈ°")
articles = load_saved_articles()
if not articles:
st.warning("μ €μž₯된 기사가 μ—†μŠ΅λ‹ˆλ‹€. λ¨Όμ € 'λ‰΄μŠ€ 기사 크둀링' λ©”λ‰΄μ—μ„œ 기사λ₯Ό μˆ˜μ§‘ν•΄μ£Όμ„Έμš”.")
else:
# 기사 선택
titles = [article['title'] for article in articles]
selected_title = st.selectbox("원본 기사 선택", titles)
selected_article = next((a for a in articles if a['title'] == selected_title), None)
if selected_article:
st.write(f"**원본 제λͺ©:** {selected_article['title']}")
with st.expander("원본 기사 λ‚΄μš©"):
st.write(selected_article['content'])
prompt_text ="""λ‹€μŒ 기사 양식을 λ”°λΌμ„œ λ‹€μ‹œ μž‘μ„±ν•΄μ€˜.
μ—­ν• : 당신은 μ‹ λ¬Έμ‚¬μ˜ κΈ°μžμž…λ‹ˆλ‹€.
μž‘μ—…: 졜근 μΌμ–΄λ‚œ 사건에 λŒ€ν•œ λ³΄λ„μžλ£Œλ₯Ό μž‘μ„±ν•΄μ•Ό ν•©λ‹ˆλ‹€. μžλ£ŒλŠ” 사싀을 기반으둜 ν•˜λ©°, 객관적이고 μ •ν™•ν•΄μ•Ό ν•©λ‹ˆλ‹€.
μ§€μΉ¨:
제곡된 정보λ₯Ό λ°”νƒ•μœΌλ‘œ μ‹ λ¬Έ λ³΄λ„μžλ£Œ ν˜•μ‹μ— 맞좰 기사λ₯Ό μž‘μ„±ν•˜μ„Έμš”.
기사 제λͺ©μ€ 주제λ₯Ό λͺ…ν™•νžˆ λ°˜μ˜ν•˜κ³  λ…μžμ˜ 관심을 끌 수 μžˆλ„λ‘ μž‘μ„±ν•©λ‹ˆλ‹€.
기사 λ‚΄μš©μ€ μ •ν™•ν•˜κ³  κ°„κ²°ν•˜λ©° 섀득λ ₯ μžˆλŠ” λ¬Έμž₯으둜 κ΅¬μ„±ν•©λ‹ˆλ‹€.
κ΄€λ ¨μžμ˜ 인터뷰λ₯Ό 인용 ν˜•νƒœλ‘œ λ„£μ–΄μ£Όμ„Έμš”.
μœ„μ˜ 정보와 지침을 μ°Έκ³ ν•˜μ—¬ μ‹ λ¬Έ λ³΄λ„μžλ£Œ ν˜•μ‹μ˜ 기사λ₯Ό μž‘μ„±ν•΄ μ£Όμ„Έμš”"""
# 이미지 생성 μ—¬λΆ€ 선택 μ˜΅μ…˜ μΆ”κ°€
generate_image_too = st.checkbox("기사 생성 ν›„ 이미지도 ν•¨κ»˜ μƒμ„±ν•˜κΈ°", value=True)
if st.button("μƒˆ 기사 μƒμ„±ν•˜κΈ°"):
if st.session_state.openai_client:
with st.spinner("기사λ₯Ό 생성 μ€‘μž…λ‹ˆλ‹€..."):
new_article = generate_article(selected_article['content'], prompt_text)
st.write("**μƒμ„±λœ 기사:**")
st.write(new_article)
# 이미지 μƒμ„±ν•˜κΈ° (μ˜΅μ…˜μ΄ μ„ νƒλœ 경우)
if generate_image_too:
with st.spinner("기사 κ΄€λ ¨ 이미지λ₯Ό 생성 μ€‘μž…λ‹ˆλ‹€..."):
# 이미지 생성 ν”„λ‘¬ν”„νŠΈ μ€€λΉ„
image_prompt = f"""신문기사 제λͺ© "{selected_article['title']}" 을 보고 이미지λ₯Ό λ§Œλ“€μ–΄μ€˜
μ΄λ―Έμ§€μ—λŠ” λ‹€μŒ μš”μ†Œκ°€ ν¬ν•¨λ˜μ–΄μ•Ό ν•©λ‹ˆλ‹€:
- 기사λ₯Ό 이해할 수 μžˆλŠ” 도식
- 기사 λ‚΄μš©κ³Ό κ΄€λ ¨λœ ν…μŠ€νŠΈ
- μ‹¬ν”Œν•˜κ²Œ 처리
"""
# 이미지 생성
image_url = generate_image(image_prompt)
if image_url and not image_url.startswith("이미지 생성 였λ₯˜"):
st.subheader("μƒμ„±λœ 이미지:")
st.image(image_url)
else:
st.error(image_url)
# μƒμ„±λœ 기사 μ €μž₯ μ˜΅μ…˜
if st.button("μƒμ„±λœ 기사 μ €μž₯"):
new_article_data = {
'title': f"[생성됨] {selected_article['title']}",
'source': f"AI 생성 (원본: {selected_article['source']})",
'date': datetime.now().strftime("%Y-%m-%d %H:%M"),
'description': new_article[:100] + "...",
'link': "",
'content': new_article
}
articles.append(new_article_data)
save_articles(articles)
st.success("μƒμ„±λœ 기사가 μ €μž₯λ˜μ—ˆμŠ΅λ‹ˆλ‹€!")
else:
st.warning("OpenAI API ν‚€λ₯Ό μ‚¬μ΄λ“œλ°”μ—μ„œ μ„€μ •ν•΄μ£Όμ„Έμš”.")
elif menu == "λ‰΄μŠ€ 기사 μ˜ˆμ•½ν•˜κΈ°":
st.header("λ‰΄μŠ€ 기사 μ˜ˆμ•½ν•˜κΈ°")
# νƒ­ 생성
tab1, tab2, tab3 = st.tabs(["일별 μ˜ˆμ•½", "μ‹œκ°„ 간격 μ˜ˆμ•½", "μŠ€μΌ€μ€„λŸ¬ μƒνƒœ"])
# 일별 μ˜ˆμ•½ νƒ­
with tab1:
st.subheader("맀일 μ •ν•΄μ§„ μ‹œκ°„μ— 기사 μˆ˜μ§‘ν•˜κΈ°")
# ν‚€μ›Œλ“œ μž…λ ₯
daily_keyword = st.text_input("검색 ν‚€μ›Œλ“œ", value="인곡지λŠ₯", key="daily_keyword")
daily_num_articles = st.slider("μˆ˜μ§‘ν•  기사 수", min_value=1, max_value=20, value=5, key="daily_num_articles")
# μ‹œκ°„ μ„€μ •
daily_col1, daily_col2 = st.columns(2)
with daily_col1:
daily_hour = st.selectbox("μ‹œ", range(24), format_func=lambda x: f"{x:02d}μ‹œ", key="daily_hour")
with daily_col2:
daily_minute = st.selectbox("λΆ„", range(0, 60, 5), format_func=lambda x: f"{x:02d}λΆ„", key="daily_minute")
# 일별 μ˜ˆμ•½ 리슀트
if 'daily_tasks' not in st.session_state:
st.session_state.daily_tasks = []
if st.button("일별 μ˜ˆμ•½ μΆ”κ°€"):
st.session_state.daily_tasks.append({
'hour': daily_hour,
'minute': daily_minute,
'keyword': daily_keyword,
'num_articles': daily_num_articles
})
st.success(f"일별 μ˜ˆμ•½μ΄ μΆ”κ°€λ˜μ—ˆμŠ΅λ‹ˆλ‹€: 맀일 {daily_hour:02d}:{daily_minute:02d} - '{daily_keyword}'")
# μ˜ˆμ•½ λͺ©λ‘ ν‘œμ‹œ
if st.session_state.daily_tasks:
st.subheader("일별 μ˜ˆμ•½ λͺ©λ‘")
for i, task in enumerate(st.session_state.daily_tasks):
st.write(f"{i+1}. 맀일 {task['hour']:02d}:{task['minute']:02d} - '{task['keyword']}' ({task['num_articles']}개)")
if st.button("일별 μ˜ˆμ•½ μ΄ˆκΈ°ν™”"):
st.session_state.daily_tasks = []
st.warning("일별 μ˜ˆμ•½μ΄ λͺ¨λ‘ μ΄ˆκΈ°ν™”λ˜μ—ˆμŠ΅λ‹ˆλ‹€.")
# μ‹œκ°„ 간격 μ˜ˆμ•½ νƒ­
with tab2:
st.subheader("μ‹œκ°„ κ°„κ²©μœΌλ‘œ 기사 μˆ˜μ§‘ν•˜κΈ°")
# ν‚€μ›Œλ“œ μž…λ ₯
interval_keyword = st.text_input("검색 ν‚€μ›Œλ“œ", value="빅데이터", key="interval_keyword")
interval_num_articles = st.slider("μˆ˜μ§‘ν•  기사 수", min_value=1, max_value=20, value=5, key="interval_num_articles")
# μ‹œκ°„ 간격 μ„€μ •
interval_minutes = st.number_input("μ‹€ν–‰ 간격(λΆ„)", min_value=1, max_value=60*24, value=30, key="interval_minutes")
# μ¦‰μ‹œ μ‹€ν–‰ μ—¬λΆ€
run_immediately = st.checkbox("μ¦‰μ‹œ μ‹€ν–‰", value=True, help="μ²΄ν¬ν•˜λ©΄ μŠ€μΌ€μ€„λŸ¬ μ‹œμž‘ μ‹œ μ¦‰μ‹œ μ‹€ν–‰ν•©λ‹ˆλ‹€.")
# μ‹œκ°„ 간격 μ˜ˆμ•½ 리슀트
if 'interval_tasks' not in st.session_state:
st.session_state.interval_tasks = []
if st.button("μ‹œκ°„ 간격 μ˜ˆμ•½ μΆ”κ°€"):
st.session_state.interval_tasks.append({
'interval_minutes': interval_minutes,
'keyword': interval_keyword,
'num_articles': interval_num_articles,
'run_immediately': run_immediately
})
st.success(f"μ‹œκ°„ 간격 μ˜ˆμ•½μ΄ μΆ”κ°€λ˜μ—ˆμŠ΅λ‹ˆλ‹€: {interval_minutes}λΆ„λ§ˆλ‹€ - '{interval_keyword}'")
# μ˜ˆμ•½ λͺ©λ‘ ν‘œμ‹œ
if st.session_state.interval_tasks:
st.subheader("μ‹œκ°„ 간격 μ˜ˆμ•½ λͺ©λ‘")
for i, task in enumerate(st.session_state.interval_tasks):
immediate_text = "μ¦‰μ‹œ μ‹€ν–‰ ν›„ " if task['run_immediately'] else ""
st.write(f"{i+1}. {immediate_text}{task['interval_minutes']}λΆ„λ§ˆλ‹€ - '{task['keyword']}' ({task['num_articles']}개)")
if st.button("μ‹œκ°„ 간격 μ˜ˆμ•½ μ΄ˆκΈ°ν™”"):
st.session_state.interval_tasks = []
st.warning("μ‹œκ°„ 간격 μ˜ˆμ•½μ΄ λͺ¨λ‘ μ΄ˆκΈ°ν™”λ˜μ—ˆμŠ΅λ‹ˆλ‹€.")
# μŠ€μΌ€μ€„λŸ¬ μƒνƒœ νƒ­
with tab3:
st.subheader("μŠ€μΌ€μ€„λŸ¬ μ œμ–΄ 및 μƒνƒœ")
col1, col2 = st.columns(2)
with col1:
# μŠ€μΌ€μ€„λŸ¬ μ‹œμž‘/쀑지 λ²„νŠΌ
if not global_scheduler_state.is_running:
if st.button("μŠ€μΌ€μ€„λŸ¬ μ‹œμž‘"):
if not st.session_state.daily_tasks and not st.session_state.interval_tasks:
st.error("μ˜ˆμ•½λœ μž‘μ—…μ΄ μ—†μŠ΅λ‹ˆλ‹€. λ¨Όμ € 일별 μ˜ˆμ•½ λ˜λŠ” μ‹œκ°„ 간격 μ˜ˆμ•½μ„ μΆ”κ°€ν•΄μ£Όμ„Έμš”.")
else:
start_scheduler(st.session_state.daily_tasks, st.session_state.interval_tasks)
st.success("μŠ€μΌ€μ€„λŸ¬κ°€ μ‹œμž‘λ˜μ—ˆμŠ΅λ‹ˆλ‹€.")
else:
if st.button("μŠ€μΌ€μ€„λŸ¬ 쀑지"):
stop_scheduler()
st.warning("μŠ€μΌ€μ€„λŸ¬κ°€ μ€‘μ§€λ˜μ—ˆμŠ΅λ‹ˆλ‹€.")
with col2:
# μŠ€μΌ€μ€„λŸ¬ μƒνƒœ ν‘œμ‹œ
if 'scheduler_status' in st.session_state:
st.write(f"μƒνƒœ: {'싀행쀑' if global_scheduler_state.is_running else '쀑지'}")
if global_scheduler_state.last_run:
st.write(f"λ§ˆμ§€λ§‰ μ‹€ν–‰: {global_scheduler_state.last_run.strftime('%Y-%m-%d %H:%M:%S')}")
if global_scheduler_state.next_run and global_scheduler_state.is_running:
st.write(f"λ‹€μŒ μ‹€ν–‰: {global_scheduler_state.next_run.strftime('%Y-%m-%d %H:%M:%S')}")
else:
st.write("μƒνƒœ: 쀑지")
# μ˜ˆμ•½λœ μž‘μ—… λͺ©λ‘
if global_scheduler_state.scheduled_jobs:
st.subheader("ν˜„μž¬ μ‹€ν–‰ 쀑인 μ˜ˆμ•½ μž‘μ—…")
for i, job in enumerate(global_scheduler_state.scheduled_jobs):
if job['type'] == 'daily':
st.write(f"{i+1}. [일별] 맀일 {job['time']} - '{job['keyword']}' ({job['num_articles']}개)")
else:
immediate_text = "[μ¦‰μ‹œ μ‹€ν–‰ ν›„] " if job.get('run_immediately', False) else ""
st.write(f"{i+1}. [간격] {immediate_text}{job['interval']} - '{job['keyword']}' ({job['num_articles']}개)")
# μŠ€μΌ€μ€„λŸ¬ μ‹€ν–‰ κ²°κ³Ό
if global_scheduler_state.scheduled_results:
st.subheader("μŠ€μΌ€μ€„λŸ¬ μ‹€ν–‰ κ²°κ³Ό")
# κ²°κ³Όλ₯Ό UI에 ν‘œμ‹œν•˜κΈ° 전에 볡사
results_for_display = global_scheduler_state.scheduled_results.copy()
if results_for_display:
result_df = pd.DataFrame(results_for_display)
result_df['μ‹€ν–‰μ‹œκ°„'] = result_df['timestamp'].apply(lambda x: datetime.strptime(x, "%Y%m%d_%H%M%S").strftime("%Y-%m-%d %H:%M:%S"))
result_df = result_df.rename(columns={
'task_type': 'μž‘μ—…μœ ν˜•',
'keyword': 'ν‚€μ›Œλ“œ',
'num_articles': 'κΈ°μ‚¬μˆ˜',
'filename': '파일λͺ…'
})
result_df['μž‘μ—…μœ ν˜•'] = result_df['μž‘μ—…μœ ν˜•'].apply(lambda x: '일별' if x == 'daily' else 'μ‹œκ°„κ°„κ²©')
st.dataframe(
result_df[['μž‘μ—…μœ ν˜•', 'ν‚€μ›Œλ“œ', 'κΈ°μ‚¬μˆ˜', 'μ‹€ν–‰μ‹œκ°„', '파일λͺ…']],
hide_index=True
)
# μˆ˜μ§‘λœ 파일 보기
if os.path.exists('/tmp/scheduled_news'):
files = [f for f in os.listdir('/tmp/scheduled_news') if f.endswith('.json')]
if files:
st.subheader("μˆ˜μ§‘λœ 파일 μ—΄κΈ°")
selected_file = st.selectbox("파일 선택", files, index=len(files)-1)
if selected_file and st.button("파일 λ‚΄μš© 보기"):
with open(os.path.join('/tmp/scheduled_news', selected_file), 'r', encoding='utf-8') as f:
articles = json.load(f)
st.write(f"**파일λͺ…:** {selected_file}")
st.write(f"**μˆ˜μ§‘ 기사 수:** {len(articles)}개")
for article in articles:
with st.expander(f"{article['title']} - {article['source']}"):
st.write(f"**좜처:** {article['source']}")
st.write(f"**λ‚ μ§œ:** {article['date']}")
st.write(f"**링크:** {article['link']}")
st.write("**λ³Έλ¬Έ:**")
st.write(article['content'][:500] + "..." if len(article['content']) > 500 else article['content'])
# ν‘Έν„°
st.markdown("---")
st.markdown("Β© λ‰΄μŠ€ 기사 도ꡬ @conanssam")