|
import io |
|
import re |
|
import os |
|
import glob |
|
import asyncio |
|
import hashlib |
|
import unicodedata |
|
import streamlit as st |
|
from PIL import Image |
|
import fitz |
|
import edge_tts |
|
from reportlab.lib.pagesizes import A4 |
|
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle |
|
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle |
|
from reportlab.lib import colors |
|
from reportlab.pdfbase import pdfmetrics |
|
from reportlab.pdfbase.ttfonts import TTFont |
|
from datetime import datetime |
|
import pytz |
|
|
|
st.set_page_config(layout="wide", initial_sidebar_state="collapsed") |
|
|
|
def get_timestamp_prefix(): |
|
central = pytz.timezone("US/Central") |
|
now = datetime.now(central) |
|
return now.strftime("%a %m%d %I%M%p").upper() |
|
|
|
def clean_for_speech(text): |
|
text = text.replace("#", "") |
|
emoji_pattern = re.compile( |
|
r"[\U0001F300-\U0001F5FF" |
|
r"\U0001F600-\U0001F64F" |
|
r"\U0001F680-\U0001F6FF" |
|
r"\U0001F700-\U0001F77F" |
|
r"\U0001F780-\U0001F7FF" |
|
r"\U0001F800-\U0001F8FF" |
|
r"\U0001F900-\U0001F9FF" |
|
r"\U0001FA00-\U0001FA6F" |
|
r"\U0001FA70-\U0001FAFF" |
|
r"\u2600-\u26FF" |
|
r"\u2700-\u27BF]+", flags=re.UNICODE) |
|
text = emoji_pattern.sub('', text) |
|
return text |
|
|
|
def trim_emojis_except_numbered(markdown_text): |
|
emoji_pattern = re.compile( |
|
r"[\U0001F300-\U0001F5FF" |
|
r"\U0001F600-\U0001F64F" |
|
r"\U0001F680-\U0001F6FF" |
|
r"\U0001F700-\U0001F77F" |
|
r"\U0001F780-\U0001F7FF" |
|
r"\U0001F800-\U0001F8FF" |
|
r"\U0001F900-\U0001F9FF" |
|
r"\U0001FAD0-\U0001FAD9" |
|
r"\U0001FA00-\U0001FA6F" |
|
r"\U0001FA70-\U0001FAFF" |
|
r"\u2600-\u26FF" |
|
r"\u2700-\u27BF]+" |
|
) |
|
number_pattern = re.compile(r'^\d+\.\s') |
|
lines = markdown_text.strip().split('\n') |
|
processed_lines = [] |
|
|
|
for line in lines: |
|
if number_pattern.match(line): |
|
|
|
processed_lines.append(line) |
|
else: |
|
|
|
processed_lines.append(emoji_pattern.sub('', line)) |
|
|
|
return '\n'.join(processed_lines) |
|
|
|
async def generate_audio(text, voice, filename): |
|
communicate = edge_tts.Communicate(text, voice) |
|
await communicate.save(filename) |
|
return filename |
|
|
|
def detect_and_convert_links(text): |
|
url_pattern = re.compile( |
|
r'(https?://|www\.)[^\s\[\]()<>{}]+(\.[^\s\[\]()<>{}]+)+(/[^\s\[\]()<>{}]*)?', |
|
re.IGNORECASE |
|
) |
|
md_link_pattern = re.compile(r'\[(.*?)\]\((https?://[^\s\[\]()<>{}]+)\)') |
|
text = md_link_pattern.sub(r'<a href="\2">\1</a>', text) |
|
start_idx = 0 |
|
result = [] |
|
while start_idx < len(text): |
|
match = url_pattern.search(text, start_idx) |
|
if not match: |
|
result.append(text[start_idx:]) |
|
break |
|
prev_text = text[start_idx:match.start()] |
|
tag_balance = prev_text.count('<a') - prev_text.count('</a') |
|
if tag_balance > 0: |
|
result.append(text[start_idx:match.end()]) |
|
else: |
|
result.append(text[start_idx:match.start()]) |
|
url = match.group(0) |
|
if url.startswith('www.'): |
|
url_with_prefix = 'http://' + url |
|
else: |
|
url_with_prefix = url |
|
result.append(f'<a href="{url_with_prefix}">{url}</a>') |
|
start_idx = match.end() |
|
return ''.join(result) |
|
|
|
def apply_emoji_font(text, emoji_font): |
|
link_pattern = re.compile(r'<a\s+href="([^"]+)">(.*?)</a>') |
|
links = [] |
|
def save_link(match): |
|
link_idx = len(links) |
|
links.append((match.group(1), match.group(2))) |
|
return f"###LINK_{link_idx}###" |
|
text = link_pattern.sub(save_link, text) |
|
text = re.sub(r'<b>(.*?)</b>', lambda m: f'###BOLD_START###{m.group(1)}###BOLD_END###', text) |
|
emoji_pattern = re.compile( |
|
r"([\U0001F300-\U0001F5FF" |
|
r"\U0001F600-\U0001F64F" |
|
r"\U0001F680-\U0001F6FF" |
|
r"\U0001F700-\U0001F77F" |
|
r"\U0001F780-\U0001F7FF" |
|
r"\U0001F800-\U0001F8FF" |
|
r"\U0001F900-\U0001F9FF" |
|
r"\U0001FAD0-\U0001FAD9" |
|
r"\U0001FA00-\U0001FA6F" |
|
r"\U0001FA70-\U0001FAFF" |
|
r"\u2600-\u26FF" |
|
r"\u2700-\u27BF]+)" |
|
) |
|
def replace_emoji(match): |
|
emoji = match.group(1) |
|
emoji = unicodedata.normalize('NFC', emoji) |
|
return f'<font face="{emoji_font}">{emoji}</font>' |
|
segments = [] |
|
last_pos = 0 |
|
for match in emoji_pattern.finditer(text): |
|
start, end = match.span() |
|
if last_pos < start: |
|
segments.append(f'<font face="DejaVuSans">{text[last_pos:start]}</font>') |
|
segments.append(replace_emoji(match)) |
|
last_pos = end |
|
if last_pos < len(text): |
|
segments.append(f'<font face="DejaVuSans">{text[last_pos:]}</font>') |
|
combined_text = ''.join(segments) |
|
combined_text = combined_text.replace('###BOLD_START###', '</font><b><font face="DejaVuSans">') |
|
combined_text = combined_text.replace('###BOLD_END###', '</font></b><font face="DejaVuSans">') |
|
for i, (url, label) in enumerate(links): |
|
placeholder = f"###LINK_{i}###" |
|
if placeholder in combined_text: |
|
parts = combined_text.split(placeholder) |
|
if len(parts) == 2: |
|
before, after = parts |
|
if before.rfind('<font') > before.rfind('</font>'): |
|
link_html = f'</font><a href="{url}">{label}</a><font face="DejaVuSans">' |
|
combined_text = before + link_html + after |
|
else: |
|
combined_text = before + f'<a href="{url}">{label}</a>' + after |
|
return combined_text |
|
|
|
def markdown_to_pdf_content(markdown_text, render_with_bold, auto_bold_numbers): |
|
lines = markdown_text.strip().split('\n') |
|
pdf_content = [] |
|
number_pattern = re.compile(r'^\d+\.\s') |
|
for line in lines: |
|
line = line.strip() |
|
if not line or line.startswith('# '): |
|
continue |
|
line = detect_and_convert_links(line) |
|
if render_with_bold: |
|
line = re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', line) |
|
if auto_bold_numbers and number_pattern.match(line): |
|
if not (line.startswith("<b>") and line.endswith("</b>")): |
|
if "<b>" in line and "</b>" in line: |
|
line = re.sub(r'</?b>', '', line) |
|
line = f"<b>{line}</b>" |
|
else: |
|
line = f"<b>{line}</b>" |
|
pdf_content.append(line) |
|
total_lines = len(pdf_content) |
|
return pdf_content, total_lines |
|
|
|
def create_pdf(markdown_text, base_font_size, render_with_bold, auto_bold_numbers, enlarge_numbered, num_columns): |
|
buffer = io.BytesIO() |
|
page_width = A4[0] * 2 |
|
page_height = A4[1] |
|
doc = SimpleDocTemplate(buffer, pagesize=(page_width, page_height), leftMargin=36, rightMargin=36, topMargin=36, bottomMargin=36) |
|
styles = getSampleStyleSheet() |
|
spacer_height = 10 |
|
pdf_content, total_lines = markdown_to_pdf_content(markdown_text, render_with_bold, auto_bold_numbers) |
|
try: |
|
available_font_files = glob.glob("*.ttf") |
|
if not available_font_files: |
|
st.error("No .ttf font files found.") |
|
return |
|
selected_font_path = next((f for f in available_font_files if "NotoEmoji-Bold" in f), None) |
|
if selected_font_path: |
|
pdfmetrics.registerFont(TTFont("NotoEmoji-Bold", selected_font_path)) |
|
pdfmetrics.registerFont(TTFont("DejaVuSans", "DejaVuSans.ttf")) |
|
except Exception as e: |
|
st.error(f"Font registration error: {e}") |
|
return |
|
total_chars = sum(len(line) for line in pdf_content) |
|
hierarchy_weight = sum(1.5 if line.startswith("<b>") else 1 for line in pdf_content) |
|
content_density = total_lines * hierarchy_weight + total_chars / 50 |
|
usable_height = page_height - 72 - spacer_height |
|
usable_width = page_width - 72 |
|
avg_line_chars = total_chars / total_lines if total_lines > 0 else 50 |
|
ideal_lines_per_col = 20 |
|
suggested_columns = max(1, min(6, int(total_lines / ideal_lines_per_col) + 1)) |
|
num_columns = num_columns if num_columns != 0 else suggested_columns |
|
col_width = usable_width / num_columns |
|
min_font_size = 6 |
|
max_font_size = 16 |
|
lines_per_col = total_lines / num_columns if num_columns > 0 else total_lines |
|
target_height_per_line = usable_height / lines_per_col if lines_per_col > 0 else usable_height |
|
estimated_font_size = int(target_height_per_line / 1.5) |
|
adjusted_font_size = max(min_font_size, min(max_font_size, estimated_font_size)) |
|
if avg_line_chars > col_width / adjusted_font_size * 10: |
|
adjusted_font_size = int(col_width / (avg_line_chars / 10)) |
|
adjusted_font_size = max(min_font_size, adjusted_font_size) |
|
item_style = ParagraphStyle( |
|
'ItemStyle', parent=styles['Normal'], fontName="DejaVuSans", |
|
fontSize=adjusted_font_size, leading=adjusted_font_size * 1.15, spaceAfter=1, |
|
linkUnderline=True |
|
) |
|
numbered_bold_style = ParagraphStyle( |
|
'NumberedBoldStyle', parent=styles['Normal'], fontName="NotoEmoji-Bold", |
|
fontSize=adjusted_font_size + 1 if enlarge_numbered else adjusted_font_size, |
|
leading=(adjusted_font_size + 1) * 1.15 if enlarge_numbered else adjusted_font_size * 1.15, spaceAfter=1, |
|
linkUnderline=True |
|
) |
|
section_style = ParagraphStyle( |
|
'SectionStyle', parent=styles['Heading2'], fontName="DejaVuSans", |
|
textColor=colors.darkblue, fontSize=adjusted_font_size * 1.1, leading=adjusted_font_size * 1.32, spaceAfter=2, |
|
linkUnderline=True |
|
) |
|
columns = [[] for _ in range(num_columns)] |
|
lines_per_column = total_lines / num_columns if num_columns > 0 else total_lines |
|
current_line_count = 0 |
|
current_column = 0 |
|
number_pattern = re.compile(r'^\d+\.\s') |
|
for item in pdf_content: |
|
if current_line_count >= lines_per_column and current_column < num_columns - 1: |
|
current_column += 1 |
|
current_line_count = 0 |
|
columns[current_column].append(item) |
|
current_line_count += 1 |
|
column_cells = [[] for _ in range(num_columns)] |
|
for col_idx, column in enumerate(columns): |
|
for item in column: |
|
if isinstance(item, str) and item.startswith("<b>") and item.endswith("</b>"): |
|
content = item[3:-4].strip() |
|
if number_pattern.match(content): |
|
column_cells[col_idx].append(Paragraph(apply_emoji_font(content, "NotoEmoji-Bold"), numbered_bold_style)) |
|
else: |
|
column_cells[col_idx].append(Paragraph(apply_emoji_font(content, "NotoEmoji-Bold"), section_style)) |
|
else: |
|
column_cells[col_idx].append(Paragraph(apply_emoji_font(item, "DejaVuSans"), item_style)) |
|
max_cells = max(len(cells) for cells in column_cells) if column_cells else 0 |
|
for cells in column_cells: |
|
cells.extend([Paragraph("", item_style)] * (max_cells - len(cells))) |
|
table_data = list(zip(*column_cells)) if column_cells else [[]] |
|
table = Table(table_data, colWidths=[col_width] * num_columns, hAlign='CENTER') |
|
table.setStyle(TableStyle([ |
|
('VALIGN', (0, 0), (-1, -1), 'TOP'), |
|
('ALIGN', (0, 0), (-1, -1), 'LEFT'), |
|
('BACKGROUND', (0, 0), (-1, -1), colors.white), |
|
('GRID', (0, 0), (-1, -1), 0, colors.white), |
|
('LINEAFTER', (0, 0), (num_columns-1, -1), 0.5, colors.grey), |
|
('LEFTPADDING', (0, 0), (-1, -1), 2), |
|
('RIGHTPADDING', (0, 0), (-1, -1), 2), |
|
('TOPPADDING', (0, 0), (-1, -1), 1), |
|
('BOTTOMPADDING', (0, 0), (-1, -1), 1), |
|
])) |
|
story = [Spacer(1, spacer_height), table] |
|
doc.build(story) |
|
buffer.seek(0) |
|
return buffer.getvalue() |
|
|
|
def pdf_to_image(pdf_bytes): |
|
try: |
|
doc = fitz.open(stream=pdf_bytes, filetype="pdf") |
|
images = [] |
|
for page in doc: |
|
pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0)) |
|
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) |
|
images.append(img) |
|
doc.close() |
|
return images |
|
except Exception as e: |
|
st.error(f"Failed to render PDF preview: {e}") |
|
return None |
|
|
|
md_files = [f for f in glob.glob("*.md") if os.path.basename(f) != "README.md"] |
|
md_options = [os.path.splitext(os.path.basename(f))[0] for f in md_files] |
|
|
|
with st.sidebar: |
|
st.markdown("### PDF Options") |
|
if md_options: |
|
selected_md = st.selectbox("Select Markdown File", options=md_options, index=0) |
|
with open(f"{selected_md}.md", "r", encoding="utf-8") as f: |
|
st.session_state.markdown_content = f.read() |
|
else: |
|
st.warning("No markdown file found. Please add one to your folder.") |
|
selected_md = None |
|
st.session_state.markdown_content = "" |
|
available_font_files = {os.path.splitext(os.path.basename(f))[0]: f for f in glob.glob("*.ttf")} |
|
selected_font_name = st.selectbox("Select Emoji Font", options=list(available_font_files.keys()), |
|
index=list(available_font_files.keys()).index("NotoEmoji-Bold") if "NotoEmoji-Bold" in available_font_files else 0) |
|
base_font_size = st.slider("Font Size (points)", min_value=6, max_value=16, value=8, step=1) |
|
render_with_bold = st.checkbox("Render with Bold Formatting (remove ** markers)", value=True, key="render_with_bold") |
|
auto_bold_numbers = st.checkbox("Auto Bold Numbered Lines", value=True, key="auto_bold_numbers") |
|
enlarge_numbered = st.checkbox("Enlarge Font Size for Numbered Lines", value=True, key="enlarge_numbered") |
|
|
|
auto_columns = st.checkbox("AutoColumns", value=False, key="auto_columns") |
|
|
|
|
|
if auto_columns and 'markdown_content' in st.session_state: |
|
current_markdown = st.session_state.markdown_content |
|
lines = current_markdown.strip().split('\n') |
|
longest_line_words = 0 |
|
for line in lines: |
|
if line.strip(): |
|
word_count = len(line.split()) |
|
longest_line_words = max(longest_line_words, word_count) |
|
|
|
|
|
if longest_line_words > 25: |
|
recommended_columns = 1 |
|
elif longest_line_words >= 18: |
|
recommended_columns = 2 |
|
elif longest_line_words >= 11: |
|
recommended_columns = 3 |
|
else: |
|
recommended_columns = "Auto" |
|
|
|
st.info(f"Longest line has {longest_line_words} words. Recommending {recommended_columns} columns.") |
|
else: |
|
recommended_columns = "Auto" |
|
|
|
column_options = ["Auto"] + list(range(1, 7)) |
|
num_columns = st.selectbox("Number of Columns", options=column_options, |
|
index=0 if recommended_columns == "Auto" else column_options.index(recommended_columns)) |
|
num_columns = 0 if num_columns == "Auto" else int(num_columns) |
|
st.info("Font size and columns adjust to fit one page.") |
|
|
|
|
|
edited_markdown = st.text_area("Input Markdown", value=st.session_state.markdown_content, height=300, key=f"markdown_{selected_md}_{selected_font_name}_{num_columns}") |
|
|
|
|
|
col1, col2 = st.columns(2) |
|
with col1: |
|
if st.button("ππ Update PDF"): |
|
st.session_state.markdown_content = edited_markdown |
|
if selected_md: |
|
with open(f"{selected_md}.md", "w", encoding="utf-8") as f: |
|
f.write(edited_markdown) |
|
st.rerun() |
|
|
|
|
|
with col2: |
|
if st.button("βοΈ Trim Emojis"): |
|
trimmed_content = trim_emojis_except_numbered(edited_markdown) |
|
st.session_state.markdown_content = trimmed_content |
|
if selected_md: |
|
with open(f"{selected_md}.md", "w", encoding="utf-8") as f: |
|
f.write(trimmed_content) |
|
st.rerun() |
|
|
|
prefix = get_timestamp_prefix() |
|
st.download_button( |
|
label="πΎπ Save Markdown", |
|
data=st.session_state.markdown_content, |
|
file_name=f"{prefix} {selected_md}.md" if selected_md else f"{prefix} default.md", |
|
mime="text/markdown" |
|
) |
|
st.markdown("### Text-to-Speech") |
|
VOICES = ["en-US-AriaNeural", "en-US-JennyNeural", "en-GB-SoniaNeural", "en-US-GuyNeural", "en-US-AnaNeural"] |
|
selected_voice = st.selectbox("Select Voice for TTS", options=VOICES, index=0) |
|
if st.button("Generate Audio"): |
|
cleaned_text = clean_for_speech(st.session_state.markdown_content) |
|
audio_filename = f"{prefix} {selected_md} {selected_voice}.mp3" if selected_md else f"{prefix} default {selected_voice}.mp3" |
|
audio_file = asyncio.run(generate_audio(cleaned_text, selected_voice, audio_filename)) |
|
st.audio(audio_file) |
|
with open(audio_file, "rb") as f: |
|
audio_bytes = f.read() |
|
st.download_button( |
|
label="πΎπ Save Audio", |
|
data=audio_bytes, |
|
file_name=audio_filename, |
|
mime="audio/mpeg" |
|
) |
|
|
|
with st.spinner("Generating PDF..."): |
|
pdf_bytes = create_pdf(st.session_state.markdown_content, base_font_size, render_with_bold, auto_bold_numbers, enlarge_numbered, num_columns) |
|
|
|
with st.container(): |
|
pdf_images = pdf_to_image(pdf_bytes) |
|
if pdf_images: |
|
for img in pdf_images: |
|
st.image(img, use_container_width=True) |
|
else: |
|
st.info("Download the PDF to view it locally.") |
|
|
|
with st.sidebar: |
|
st.download_button( |
|
label="πΎπ Save PDF", |
|
data=pdf_bytes, |
|
file_name=f"{prefix} {selected_md}.pdf" if selected_md else f"{prefix} output.pdf", |
|
mime="application/pdf" |
|
) |