Spaces:

awacke1
/

PDF-Paper-Maker-AI-UI-UX

Running

App Files Files Community

PDF-Paper-Maker-AI-UI-UX / backup9.app.py

awacke1

Create backup9.app.py

703c2b5 verified 15 days ago

raw

history blame contribute delete

19.1 kB

	import io
	import re
	import os
	import glob
	import asyncio
	import hashlib
	import unicodedata
	import streamlit as st
	from PIL import Image
	import fitz
	import edge_tts
	from reportlab.lib.pagesizes import A4
	from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle
	from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
	from reportlab.lib import colors
	from reportlab.pdfbase import pdfmetrics
	from reportlab.pdfbase.ttfonts import TTFont
	from datetime import datetime
	import pytz

	st.set_page_config(layout="wide", initial_sidebar_state="collapsed")

	def get_timestamp_prefix():
	central = pytz.timezone("US/Central")
	now = datetime.now(central)
	return now.strftime("%a %m%d %I%M%p").upper()

	def clean_for_speech(text):
	text = text.replace("#", "")
	emoji_pattern = re.compile(
	r"[\U0001F300-\U0001F5FF"
	r"\U0001F600-\U0001F64F"
	r"\U0001F680-\U0001F6FF"
	r"\U0001F700-\U0001F77F"
	r"\U0001F780-\U0001F7FF"
	r"\U0001F800-\U0001F8FF"
	r"\U0001F900-\U0001F9FF"
	r"\U0001FA00-\U0001FA6F"
	r"\U0001FA70-\U0001FAFF"
	r"\u2600-\u26FF"
	r"\u2700-\u27BF]+", flags=re.UNICODE)
	text = emoji_pattern.sub('', text)
	return text

	def trim_emojis_except_numbered(markdown_text):
	emoji_pattern = re.compile(
	r"[\U0001F300-\U0001F5FF"
	r"\U0001F600-\U0001F64F"
	r"\U0001F680-\U0001F6FF"
	r"\U0001F700-\U0001F77F"
	r"\U0001F780-\U0001F7FF"
	r"\U0001F800-\U0001F8FF"
	r"\U0001F900-\U0001F9FF"
	r"\U0001FAD0-\U0001FAD9"
	r"\U0001FA00-\U0001FA6F"
	r"\U0001FA70-\U0001FAFF"
	r"\u2600-\u26FF"
	r"\u2700-\u27BF]+"
	)
	number_pattern = re.compile(r'^\d+\.\s')
	lines = markdown_text.strip().split('\n')
	processed_lines = []

	for line in lines:
	if number_pattern.match(line):
	# Keep emojis in numbered lines
	processed_lines.append(line)
	else:
	# Remove emojis from other lines
	processed_lines.append(emoji_pattern.sub('', line))

	return '\n'.join(processed_lines)

	async def generate_audio(text, voice, filename):
	communicate = edge_tts.Communicate(text, voice)
	await communicate.save(filename)
	return filename

	def detect_and_convert_links(text):
	url_pattern = re.compile(
	r'(https?://\|www\.)[^\s\[\]()<>{}]+(\.[^\s\[\]()<>{}]+)+(/[^\s\[\]()<>{}]*)?',
	re.IGNORECASE
	)
	md_link_pattern = re.compile(r'\[(.*?)\]\((https?://[^\s\[\]()<>{}]+)\)')
	text = md_link_pattern.sub(r'<a href="\2">\1</a>', text)
	start_idx = 0
	result = []
	while start_idx < len(text):
	match = url_pattern.search(text, start_idx)
	if not match:
	result.append(text[start_idx:])
	break
	prev_text = text[start_idx:match.start()]
	tag_balance = prev_text.count('<a') - prev_text.count('</a')
	if tag_balance > 0:
	result.append(text[start_idx:match.end()])
	else:
	result.append(text[start_idx:match.start()])
	url = match.group(0)
	if url.startswith('www.'):
	url_with_prefix = 'http://' + url
	else:
	url_with_prefix = url
	result.append(f'<a href="{url_with_prefix}">{url}</a>')
	start_idx = match.end()
	return ''.join(result)

	def apply_emoji_font(text, emoji_font):
	link_pattern = re.compile(r'<a\s+href="([^"]+)">(.*?)</a>')
	links = []
	def save_link(match):
	link_idx = len(links)
	links.append((match.group(1), match.group(2)))
	return f"###LINK_{link_idx}###"
	text = link_pattern.sub(save_link, text)
	text = re.sub(r'<b>(.*?)</b>', lambda m: f'###BOLD_START###{m.group(1)}###BOLD_END###', text)
	emoji_pattern = re.compile(
	r"([\U0001F300-\U0001F5FF"
	r"\U0001F600-\U0001F64F"
	r"\U0001F680-\U0001F6FF"
	r"\U0001F700-\U0001F77F"
	r"\U0001F780-\U0001F7FF"
	r"\U0001F800-\U0001F8FF"
	r"\U0001F900-\U0001F9FF"
	r"\U0001FAD0-\U0001FAD9"
	r"\U0001FA00-\U0001FA6F"
	r"\U0001FA70-\U0001FAFF"
	r"\u2600-\u26FF"
	r"\u2700-\u27BF]+)"
	)
	def replace_emoji(match):
	emoji = match.group(1)
	emoji = unicodedata.normalize('NFC', emoji)
	return f'<font face="{emoji_font}">{emoji}</font>'
	segments = []
	last_pos = 0
	for match in emoji_pattern.finditer(text):
	start, end = match.span()
	if last_pos < start:
	segments.append(f'<font face="DejaVuSans">{text[last_pos:start]}</font>')
	segments.append(replace_emoji(match))
	last_pos = end
	if last_pos < len(text):
	segments.append(f'<font face="DejaVuSans">{text[last_pos:]}</font>')
	combined_text = ''.join(segments)
	combined_text = combined_text.replace('###BOLD_START###', '</font><b><font face="DejaVuSans">')
	combined_text = combined_text.replace('###BOLD_END###', '</font></b><font face="DejaVuSans">')
	for i, (url, label) in enumerate(links):
	placeholder = f"###LINK_{i}###"
	if placeholder in combined_text:
	parts = combined_text.split(placeholder)
	if len(parts) == 2:
	before, after = parts
	if before.rfind('<font') > before.rfind('</font>'):
	link_html = f'</font><a href="{url}">{label}</a><font face="DejaVuSans">'
	combined_text = before + link_html + after
	else:
	combined_text = before + f'<a href="{url}">{label}</a>' + after
	return combined_text

	def markdown_to_pdf_content(markdown_text, render_with_bold, auto_bold_numbers, add_space_before_numbered):
	lines = markdown_text.strip().split('\n')
	pdf_content = []
	number_pattern = re.compile(r'^\d+\.\s')

	# Track if we've seen the first numbered line already
	first_numbered_seen = False

	for line in lines:
	line = line.strip()
	if not line or line.startswith('# '):
	continue

	# Check if this is a numbered line
	is_numbered_line = number_pattern.match(line) is not None

	# Add a blank line before numbered lines (except the first one with "1.")
	if add_space_before_numbered and is_numbered_line:
	# Only add space if this isn't the first numbered line
	if first_numbered_seen and not line.startswith("1."):
	pdf_content.append("") # Add an empty line
	# Mark that we've seen a numbered line
	if not first_numbered_seen:
	first_numbered_seen = True

	line = detect_and_convert_links(line)
	if render_with_bold:
	line = re.sub(r'\\(.?)\\*', r'<b>\1</b>', line)
	if auto_bold_numbers and is_numbered_line:
	if not (line.startswith("<b>") and line.endswith("</b>")):
	if "<b>" in line and "</b>" in line:
	line = re.sub(r'</?b>', '', line)
	line = f"<b>{line}</b>"
	else:
	line = f"<b>{line}</b>"
	pdf_content.append(line)
	total_lines = len(pdf_content)
	return pdf_content, total_lines

	def create_pdf(markdown_text, base_font_size, render_with_bold, auto_bold_numbers, enlarge_numbered, num_columns, add_space_before_numbered):
	buffer = io.BytesIO()
	page_width = A4[0] * 2
	page_height = A4[1]
	doc = SimpleDocTemplate(buffer, pagesize=(page_width, page_height), leftMargin=36, rightMargin=36, topMargin=36, bottomMargin=36)
	styles = getSampleStyleSheet()
	spacer_height = 10
	pdf_content, total_lines = markdown_to_pdf_content(markdown_text, render_with_bold, auto_bold_numbers, add_space_before_numbered)
	try:
	available_font_files = glob.glob("*.ttf")
	if not available_font_files:
	st.error("No .ttf font files found.")
	return
	selected_font_path = next((f for f in available_font_files if "NotoEmoji-Bold" in f), None)
	if selected_font_path:
	pdfmetrics.registerFont(TTFont("NotoEmoji-Bold", selected_font_path))
	pdfmetrics.registerFont(TTFont("DejaVuSans", "DejaVuSans.ttf"))
	except Exception as e:
	st.error(f"Font registration error: {e}")
	return
	total_chars = sum(len(line) for line in pdf_content)
	hierarchy_weight = sum(1.5 if line.startswith("<b>") else 1 for line in pdf_content)
	content_density = total_lines * hierarchy_weight + total_chars / 50
	usable_height = page_height - 72 - spacer_height
	usable_width = page_width - 72
	avg_line_chars = total_chars / total_lines if total_lines > 0 else 50
	ideal_lines_per_col = 20
	suggested_columns = max(1, min(6, int(total_lines / ideal_lines_per_col) + 1))
	num_columns = num_columns if num_columns != 0 else suggested_columns
	col_width = usable_width / num_columns
	min_font_size = 6
	max_font_size = 16
	lines_per_col = total_lines / num_columns if num_columns > 0 else total_lines
	target_height_per_line = usable_height / lines_per_col if lines_per_col > 0 else usable_height
	estimated_font_size = int(target_height_per_line / 1.5)
	adjusted_font_size = max(min_font_size, min(max_font_size, estimated_font_size))
	if avg_line_chars > col_width / adjusted_font_size * 10:
	adjusted_font_size = int(col_width / (avg_line_chars / 10))
	adjusted_font_size = max(min_font_size, adjusted_font_size)
	item_style = ParagraphStyle(
	'ItemStyle', parent=styles['Normal'], fontName="DejaVuSans",
	fontSize=adjusted_font_size, leading=adjusted_font_size * 1.15, spaceAfter=1,
	linkUnderline=True
	)
	numbered_bold_style = ParagraphStyle(
	'NumberedBoldStyle', parent=styles['Normal'], fontName="NotoEmoji-Bold",
	fontSize=adjusted_font_size + 1 if enlarge_numbered else adjusted_font_size,
	leading=(adjusted_font_size + 1) * 1.15 if enlarge_numbered else adjusted_font_size * 1.15, spaceAfter=1,
	linkUnderline=True
	)
	section_style = ParagraphStyle(
	'SectionStyle', parent=styles['Heading2'], fontName="DejaVuSans",
	textColor=colors.darkblue, fontSize=adjusted_font_size * 1.1, leading=adjusted_font_size * 1.32, spaceAfter=2,
	linkUnderline=True
	)
	columns = [[] for _ in range(num_columns)]
	lines_per_column = total_lines / num_columns if num_columns > 0 else total_lines
	current_line_count = 0
	current_column = 0
	number_pattern = re.compile(r'^\d+\.\s')
	for item in pdf_content:
	if current_line_count >= lines_per_column and current_column < num_columns - 1:
	current_column += 1
	current_line_count = 0
	columns[current_column].append(item)
	current_line_count += 1
	column_cells = [[] for _ in range(num_columns)]
	for col_idx, column in enumerate(columns):
	for item in column:
	if isinstance(item, str) and item.startswith("<b>") and item.endswith("</b>"):
	content = item[3:-4].strip()
	if number_pattern.match(content):
	column_cells[col_idx].append(Paragraph(apply_emoji_font(content, "NotoEmoji-Bold"), numbered_bold_style))
	else:
	column_cells[col_idx].append(Paragraph(apply_emoji_font(content, "NotoEmoji-Bold"), section_style))
	else:
	column_cells[col_idx].append(Paragraph(apply_emoji_font(item, "DejaVuSans"), item_style))
	max_cells = max(len(cells) for cells in column_cells) if column_cells else 0
	for cells in column_cells:
	cells.extend([Paragraph("", item_style)] * (max_cells - len(cells)))
	table_data = list(zip(*column_cells)) if column_cells else [[]]
	table = Table(table_data, colWidths=[col_width] * num_columns, hAlign='CENTER')
	table.setStyle(TableStyle([
	('VALIGN', (0, 0), (-1, -1), 'TOP'),
	('ALIGN', (0, 0), (-1, -1), 'LEFT'),
	('BACKGROUND', (0, 0), (-1, -1), colors.white),
	('GRID', (0, 0), (-1, -1), 0, colors.white),
	('LINEAFTER', (0, 0), (num_columns-1, -1), 0.5, colors.grey),
	('LEFTPADDING', (0, 0), (-1, -1), 2),
	('RIGHTPADDING', (0, 0), (-1, -1), 2),
	('TOPPADDING', (0, 0), (-1, -1), 1),
	('BOTTOMPADDING', (0, 0), (-1, -1), 1),
	]))
	story = [Spacer(1, spacer_height), table]
	doc.build(story)
	buffer.seek(0)
	return buffer.getvalue()

	def pdf_to_image(pdf_bytes):
	try:
	doc = fitz.open(stream=pdf_bytes, filetype="pdf")
	images = []
	for page in doc:
	pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
	img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
	images.append(img)
	doc.close()
	return images
	except Exception as e:
	st.error(f"Failed to render PDF preview: {e}")
	return None

	md_files = [f for f in glob.glob("*.md") if os.path.basename(f) != "README.md"]
	md_options = [os.path.splitext(os.path.basename(f))[0] for f in md_files]

	with st.sidebar:
	st.markdown("### PDF Options")
	if md_options:
	selected_md = st.selectbox("Select Markdown File", options=md_options, index=0)
	with open(f"{selected_md}.md", "r", encoding="utf-8") as f:
	st.session_state.markdown_content = f.read()
	else:
	st.warning("No markdown file found. Please add one to your folder.")
	selected_md = None
	st.session_state.markdown_content = ""
	available_font_files = {os.path.splitext(os.path.basename(f))[0]: f for f in glob.glob("*.ttf")}
	selected_font_name = st.selectbox("Select Emoji Font", options=list(available_font_files.keys()),
	index=list(available_font_files.keys()).index("NotoEmoji-Bold") if "NotoEmoji-Bold" in available_font_files else 0)
	base_font_size = st.slider("Font Size (points)", min_value=6, max_value=16, value=8, step=1)
	render_with_bold = st.checkbox("Render with Bold Formatting (remove ** markers)", value=True, key="render_with_bold")
	auto_bold_numbers = st.checkbox("Auto Bold Numbered Lines", value=True, key="auto_bold_numbers")
	enlarge_numbered = st.checkbox("Enlarge Font Size for Numbered Lines", value=True, key="enlarge_numbered")
	add_space_before_numbered = st.checkbox("Add Space Ahead of Numbered Lines", value=False, key="add_space_before_numbered")

	# Add AutoColumns option to automatically determine column count based on line length
	auto_columns = st.checkbox("AutoColumns", value=False, key="auto_columns")

	# Auto-determine column count based on longest line if AutoColumns is checked
	if auto_columns and 'markdown_content' in st.session_state:
	current_markdown = st.session_state.markdown_content
	lines = current_markdown.strip().split('\n')
	longest_line_words = 0
	for line in lines:
	if line.strip(): # Skip empty lines
	word_count = len(line.split())
	longest_line_words = max(longest_line_words, word_count)

	# Set recommended columns based on word count
	if longest_line_words > 25:
	recommended_columns = 1 # Very long lines need a single column
	elif longest_line_words >= 18:
	recommended_columns = 2 # Long lines need 2 columns
	elif longest_line_words >= 11:
	recommended_columns = 3 # Medium lines can use 3 columns
	else:
	recommended_columns = "Auto" # Default to auto for shorter lines

	st.info(f"Longest line has {longest_line_words} words. Recommending {recommended_columns} columns.")
	else:
	recommended_columns = "Auto"

	column_options = ["Auto"] + list(range(1, 7))
	num_columns = st.selectbox("Number of Columns", options=column_options,
	index=0 if recommended_columns == "Auto" else column_options.index(recommended_columns))
	num_columns = 0 if num_columns == "Auto" else int(num_columns)
	st.info("Font size and columns adjust to fit one page.")

	# Changed label from "Modify the markdown content below:" to "Input Markdown"
	edited_markdown = st.text_area("Input Markdown", value=st.session_state.markdown_content, height=300, key=f"markdown_{selected_md}_{selected_font_name}_{num_columns}")

	# Added emoji to "Update PDF" button and created a two-column layout for buttons
	col1, col2 = st.columns(2)
	with col1:
	if st.button("🔄📄 Update PDF"):
	st.session_state.markdown_content = edited_markdown
	if selected_md:
	with open(f"{selected_md}.md", "w", encoding="utf-8") as f:
	f.write(edited_markdown)
	st.rerun()

	# Added "Trim Emojis" button in second column
	with col2:
	if st.button("✂️ Trim Emojis"):
	trimmed_content = trim_emojis_except_numbered(edited_markdown)
	st.session_state.markdown_content = trimmed_content
	if selected_md:
	with open(f"{selected_md}.md", "w", encoding="utf-8") as f:
	f.write(trimmed_content)
	st.rerun()

	prefix = get_timestamp_prefix()
	st.download_button(
	label="💾📝 Save Markdown",
	data=st.session_state.markdown_content,
	file_name=f"{prefix} {selected_md}.md" if selected_md else f"{prefix} default.md",
	mime="text/markdown"
	)
	st.markdown("### Text-to-Speech")
	VOICES = ["en-US-AriaNeural", "en-US-JennyNeural", "en-GB-SoniaNeural", "en-US-GuyNeural", "en-US-AnaNeural"]
	selected_voice = st.selectbox("Select Voice for TTS", options=VOICES, index=0)
	if st.button("Generate Audio"):
	cleaned_text = clean_for_speech(st.session_state.markdown_content)
	audio_filename = f"{prefix} {selected_md} {selected_voice}.mp3" if selected_md else f"{prefix} default {selected_voice}.mp3"
	audio_file = asyncio.run(generate_audio(cleaned_text, selected_voice, audio_filename))
	st.audio(audio_file)
	with open(audio_file, "rb") as f:
	audio_bytes = f.read()
	st.download_button(
	label="💾🔊 Save Audio",
	data=audio_bytes,
	file_name=audio_filename,
	mime="audio/mpeg"
	)

	with st.spinner("Generating PDF..."):
	pdf_bytes = create_pdf(st.session_state.markdown_content, base_font_size, render_with_bold, auto_bold_numbers, enlarge_numbered, num_columns, add_space_before_numbered)

	with st.container():
	pdf_images = pdf_to_image(pdf_bytes)
	if pdf_images:
	for img in pdf_images:
	st.image(img, use_container_width=True)
	else:
	st.info("Download the PDF to view it locally.")

	with st.sidebar:
	st.download_button(
	label="💾📄 Save PDF",
	data=pdf_bytes,
	file_name=f"{prefix} {selected_md}.pdf" if selected_md else f"{prefix} output.pdf",
	mime="application/pdf"
	)