John6666 commited on
Commit
50aaf68
·
verified ·
1 Parent(s): cd7f867

Delete src

Browse files
Files changed (2) hide show
  1. src/citrailmu/__init__.py +0 -422
  2. src/citrailmu/webui.py +0 -137
src/citrailmu/__init__.py DELETED
@@ -1,422 +0,0 @@
1
- import os
2
- import re
3
- import time
4
- import uuid
5
- import base64
6
- import requests
7
- import tempfile
8
- from datetime import datetime
9
- from pytubefix import YouTube
10
- from colorpaws import ColorPaws
11
- from pytubefix.cli import on_progress
12
- from google import generativeai as genai
13
- from moviepy.editor import AudioFileClip
14
- from markdown_pdf import MarkdownPdf, Section
15
-
16
- class CitraIlmu:
17
- """Copyright (C) 2025 Ikmal Said. All rights reserved"""
18
-
19
- def __init__(self, mode='default', api_key=None, model='gemini-1.5-flash-8b', yt_api=False, yt_api_key=None):
20
- """
21
- Initialize Citrailmu module.
22
-
23
- Parameters:
24
- mode (str): Startup mode ('default' or 'webui')
25
- api_key (str): API key for AI services
26
- model (str): AI model to use
27
- yt_api (bool): Use YouTube API
28
- yt_api_key (str): YouTube API key
29
- """
30
- self.logger = ColorPaws(name=self.__class__.__name__, log_on=True, log_to=None)
31
- self.aigc_model = model
32
- self.api_key = api_key
33
- self.yt_api = yt_api
34
- self.yt_api_key = yt_api_key
35
-
36
- self.logger.info("CitraIlmu is ready!")
37
-
38
- if mode != 'default':
39
- if mode == 'webui':
40
- self.start_webui()
41
- else:
42
- raise ValueError(f"Invalid startup mode: {mode}")
43
-
44
- def __is_youtube_url(self, url):
45
- """Check if the URL is a YouTube URL"""
46
- youtube_regex = r'(https?://)?(www\.)?(youtube|youtu|youtube-nocookie)\.(com|be)/'
47
- return bool(re.match(youtube_regex, url))
48
-
49
- def __is_url(self, url):
50
- """Check if string is a URL"""
51
- url_regex = r'https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+'
52
- return bool(re.match(url_regex, url))
53
-
54
- def __format_duration(self, seconds):
55
- """Convert seconds to HH:MM:SS format"""
56
- hours = int(seconds // 3600)
57
- minutes = int((seconds % 3600) // 60)
58
- seconds = int(seconds % 60)
59
- return f"{hours:02d}:{minutes:02d}:{seconds:02d}"
60
-
61
- def __compress_audio(self, filepath, task_id):
62
- """Compress audio to optimal size while maintaining quality"""
63
- self.logger.info(f"[{task_id}] Compressing audio: {filepath}")
64
-
65
- try:
66
- filename = re.sub(r'[^\w\-]', '_', os.path.splitext(os.path.basename(filepath))[0])
67
- temp_path = os.path.join(tempfile.gettempdir(), f"{filename}.mp3")
68
-
69
- audio = None
70
- try:
71
- audio = AudioFileClip(filepath)
72
- audio = audio.audio_fadeout(0.1)
73
- audio.write_audiofile(
74
- temp_path,
75
- fps=44100,
76
- nbytes=2,
77
- bitrate="16k",
78
- ffmpeg_params=["-ac", "1"],
79
- verbose=False,
80
- logger=None
81
- )
82
- return temp_path
83
- finally:
84
- if audio:
85
- audio.close()
86
-
87
- except Exception as e:
88
- self.logger.error(f"[{task_id}] Audio compression failed: {str(e)}")
89
- return None
90
-
91
- def __media_processor(self, input_path, task_id):
92
- """Process media input (local file, YouTube URL, or web URL)"""
93
- try:
94
- if os.path.isfile(input_path):
95
- return self.__compress_audio(input_path, task_id)
96
-
97
- elif self.__is_youtube_url(input_path):
98
- if self.yt_api:
99
- return self.__process_youtube_api(input_path, task_id)
100
- else:
101
- return self.__process_youtube(input_path, task_id)
102
-
103
- elif self.__is_url(input_path):
104
- return self.__process_web_url(input_path, task_id)
105
-
106
- else:
107
- self.logger.error(f"[{task_id}] Invalid input: not a file path or URL")
108
- return None
109
-
110
- except Exception as e:
111
- self.logger.error(f"[{task_id}] Media processing failed: {str(e)}")
112
- return None
113
-
114
- def __convert_b64(self, url):
115
- """Convert URL from base64 to string"""
116
- return base64.b64decode(url).decode('utf-8')
117
-
118
- def __process_youtube_api(self, url, task_id):
119
- """Process YouTube URL using API"""
120
- if self.yt_api_key:
121
- api_key = self.yt_api_key
122
- else:
123
- api_key = os.getenv('YT_API_KEY')
124
-
125
- if not api_key:
126
- raise ValueError("No API key available. Please set YT_API_KEY environment variable or provide it during initialization")
127
-
128
- self.logger.info(f"[{task_id}] Processing YouTube URL via API: {url}")
129
- try:
130
- endpoint = self.__convert_b64("eW91dHViZS12aWRlby1hbmQtc2hvcnRzLWRvd25sb2FkZXIxLnAucmFwaWRhcGkuY29t")
131
-
132
- api_url = f"https://{endpoint}/api/getYTVideo"
133
- payload = {"url": url}
134
- headers = {
135
- "x-rapidapi-key": api_key,
136
- "x-rapidapi-host": endpoint
137
- }
138
-
139
- response = requests.get(api_url, params=payload, headers=headers)
140
- response.raise_for_status()
141
-
142
- video_data = response.json()
143
- video_title = video_data.get("description")
144
-
145
- # Find the audio-only link with low quality
146
- download_link = None
147
- for link_data in video_data.get("links", []):
148
- if link_data.get("quality") == "video_render_480p (video+audio)":
149
- download_link = link_data.get("link")
150
- break
151
-
152
- if not download_link:
153
- raise ValueError("No audio-only URL found in the response")
154
-
155
- if not video_title:
156
- raise ValueError("No video title found in the response")
157
-
158
- self.logger.info(f"[{task_id}] Downloading video: '{video_title}'")
159
-
160
- clean_title = re.sub(r'[^\w\-]', '_', video_title)
161
- temp_path = os.path.join(tempfile.gettempdir(), f"{task_id}_{clean_title}.mp4")
162
-
163
- # Download with progress tracking and validation
164
- download_response = requests.get(download_link, stream=True)
165
- download_response.raise_for_status()
166
-
167
- block_size = 8192
168
- downloaded = 0
169
-
170
- with open(temp_path, 'wb') as f:
171
- for chunk in download_response.iter_content(chunk_size=block_size):
172
- if chunk:
173
- f.write(chunk)
174
- downloaded += len(chunk)
175
-
176
- # Validate downloaded file
177
- if not os.path.exists(temp_path) or os.path.getsize(temp_path) == 0:
178
- raise ValueError("Downloaded file is empty or does not exist")
179
-
180
- if os.path.getsize(temp_path) < 1024: # Less than 1KB is sus
181
- raise ValueError("Downloaded file is too small to be valid")
182
-
183
- compressed_audio = self.__compress_audio(temp_path, task_id)
184
-
185
- if os.path.exists(temp_path):
186
- os.unlink(temp_path)
187
-
188
- return compressed_audio
189
-
190
- except ValueError as e:
191
- self.logger.error(f"[{task_id}] Youtube API processing failed: {str(e)}")
192
- if os.path.exists(temp_path):
193
- os.unlink(temp_path)
194
- return None
195
-
196
- def __process_youtube(self, url, task_id):
197
- """Process YouTube URL"""
198
- self.logger.info(f"[{task_id}] Processing YouTube URL: {url}")
199
- try:
200
- yt = YouTube(url, on_progress_callback=on_progress)
201
- clean_title = re.sub(r'[^\w\-]', '_', yt.title)
202
- temp_filename = f"{task_id}_{clean_title}.m4a"
203
-
204
- self.logger.info(f"[{task_id}] Downloading video: '{yt.title}'")
205
-
206
- downloaded_file = yt.streams.get_audio_only().download(
207
- output_path=tempfile.gettempdir(),
208
- filename=temp_filename
209
- )
210
-
211
- compressed_audio = self.__compress_audio(downloaded_file, task_id)
212
-
213
- if os.path.exists(downloaded_file):
214
- os.unlink(downloaded_file)
215
-
216
- return compressed_audio
217
-
218
- except Exception as e:
219
- self.logger.error(f"[{task_id}] YouTube processing failed: {str(e)}")
220
- return None
221
-
222
- def __process_web_url(self, url, task_id):
223
- """Process web URL"""
224
- self.logger.info(f"[{task_id}] Processing web URL: {url}")
225
- try:
226
- filename = os.path.basename(url.split('?')[0]) or f"download_{int(time.time())}"
227
- temp_path = os.path.join(tempfile.gettempdir(), f"{filename}.mp4")
228
-
229
- with open(temp_path, 'wb') as f:
230
- response = requests.get(url, stream=True)
231
- if response.status_code == 200:
232
- for chunk in response.iter_content(chunk_size=8192):
233
- f.write(chunk)
234
-
235
- compressed_audio = self.__compress_audio(temp_path, task_id)
236
- if os.path.exists(temp_path):
237
- os.unlink(temp_path)
238
- return compressed_audio
239
-
240
- except Exception as e:
241
- self.logger.error(f"[{task_id}] URL processing failed: {str(e)}")
242
- return None
243
-
244
- def __clean_markdown(self, text):
245
- """Clean up markdown text"""
246
- text = re.sub(r'```[a-zA-Z]*\n', '', text)
247
- text = re.sub(r'```\n?', '', text)
248
- return text.strip()
249
-
250
- def __aigc_processor(self, input_path, target_language, processing_mode, task_id):
251
- """Process input path using AI"""
252
- try:
253
- if self.api_key:
254
- genai.configure(api_key=self.api_key)
255
-
256
- audio = AudioFileClip(input_path)
257
- duration = audio.duration
258
- formatted_duration = self.__format_duration(duration)
259
- audio.close()
260
-
261
- if processing_mode.lower() == 'analysis':
262
- prompt = f"You are an expert audio transcriber and content analyst. Your task is to provide a transcript of the given audio file from 00:00 to {formatted_duration}. You must list down every discussed topic, themes, points and reflections in {target_language}. You must begin with the most suitable title of the speech with overview of the speech and must end with the conclusion. Do not include any opening or closing remarks."
263
-
264
- elif processing_mode.lower() == 'transcript':
265
- prompt = f"You are an expert audio transcriber. Your task is to provide a transcript of the given audio file from 00:00 to {formatted_duration}. You must begin with the most suitable title of the speech before the speech starts. Do not include any opening or closing remarks."
266
-
267
- else:
268
- self.logger.error(f"[{task_id}] Invalid processing mode: {processing_mode}")
269
- return None
270
-
271
- self.logger.info(f"[{task_id}] Uploading audio for processing...")
272
- audio_file = genai.upload_file(path=input_path)
273
-
274
- self.logger.info(f"[{task_id}] Processing AI {processing_mode}...")
275
- model = genai.GenerativeModel(self.aigc_model)
276
- response = model.generate_content([prompt, audio_file])
277
-
278
- return self.__clean_markdown(response.text)
279
-
280
- except Exception as e:
281
- self.logger.error(f"[{task_id}] AI {processing_mode} processing failed: {str(e)}")
282
- return None
283
-
284
- def __markdown_to_pdf(self, markdown_text, original_path, target_language, processing_mode, task_id):
285
- """Convert markdown to PDF"""
286
- try:
287
- filename = re.sub(r'[^\w\-]', '_', os.path.splitext(os.path.basename(original_path))[0])
288
- clean_filename = f"{filename}_{processing_mode.lower()}" + (f"_{target_language.lower().replace(' ', '_')}" if processing_mode.lower() == 'analysis' else '')
289
- pdf_path = os.path.join(tempfile.gettempdir(), f"{clean_filename}.pdf")
290
-
291
- self.logger.info(f"[{task_id}] Generating PDF: {pdf_path}")
292
- pdf = MarkdownPdf(toc_level=3)
293
-
294
- # Add main content section with custom CSS
295
- css = """
296
- body {
297
- font-family: 'Segoe UI', sans-serif;
298
- text-align: justify;
299
- text-justify: inter-word;
300
- }
301
-
302
- table, th, td {
303
- border: 1px solid black;
304
- }
305
-
306
- h1 {
307
- text-align: center;
308
- color: #2c3e50;
309
- margin-top: 1.5em;
310
- margin-bottom: 0.8em;
311
- font-size: 1.25em;
312
- font-weight: 500;
313
- }
314
-
315
- h2, h3, h4, h5, h6 {
316
- color: #34495e;
317
- margin-top: 1.5em;
318
- margin-bottom: 0.8em;
319
- text-align: left;
320
- }
321
-
322
- p {
323
- margin: 0.8em 0;
324
- }
325
- """
326
-
327
- # Ensure the content starts with a level 1 header
328
- if not markdown_text.startswith('# '):
329
- if processing_mode.lower() == 'analysis':
330
- title = f"CitraIlmu Analysis ({target_language})"
331
-
332
- elif processing_mode.lower() == 'transcript':
333
- title = f"CitraIlmu Transcript ({target_language})"
334
-
335
- markdown_text = f"# {title}\n\n{markdown_text}"
336
-
337
- # Add the main content section
338
- main_section = Section(markdown_text, toc=True)
339
- pdf.add_section(main_section, user_css=css)
340
-
341
- # Set PDF metadata with Unicode support
342
- pdf.meta["title"] = title
343
- pdf.meta["subject"] = title
344
- pdf.meta["author"] = "Ikmal Said"
345
- pdf.meta["creator"] = "CitraIlmu"
346
-
347
- # Save the PDF
348
- pdf.save(pdf_path)
349
- return pdf_path
350
-
351
- except Exception as e:
352
- self.logger.error(f"[{task_id}] PDF generation failed: {str(e)}")
353
- return None
354
-
355
- def __get_taskid(self):
356
- """
357
- Generate a unique task ID for request tracking.
358
- Returns a combination of timestamp and UUID to ensure uniqueness.
359
- Format: YYYYMMDD_HHMMSS_UUID8
360
- """
361
- timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
362
- uuid_part = str(uuid.uuid4())[:8]
363
- task_id = f"{timestamp}_{uuid_part}"
364
- return task_id
365
-
366
- def process_media(self, input_path, target_language="Bahasa Malaysia", processing_mode="Analysis"):
367
- """Process media for specified target language and processing mode.
368
-
369
- Parameters:
370
- input_path (str): Path to the media file
371
- target_language (str): Target language for the analysis ('bahasa malaysia', 'arabic', 'english', 'mandarin', 'tamil')
372
- processing_mode (str): Processing mode ('analysis' or 'transcript')
373
- """
374
- if not input_path or input_path == "":
375
- raise ValueError("Input path is required!")
376
-
377
- elif target_language.lower() not in ["bahasa malaysia", "arabic", "english", "mandarin", "tamil"]:
378
- raise ValueError("Invalid target language!")
379
-
380
- elif processing_mode.lower() not in ["analysis", "transcript"]:
381
- raise ValueError("Invalid processing mode!")
382
-
383
- task_id = self.__get_taskid()
384
- self.logger.info(f"[{task_id}] Task started: {processing_mode}" + (f" in {target_language}" if processing_mode.lower() == 'analysis' else ''))
385
-
386
- try:
387
- compressed_file = self.__media_processor(input_path, task_id)
388
- if not compressed_file:
389
- return None, None, None
390
-
391
- markdown_text = self.__aigc_processor(compressed_file, target_language, processing_mode, task_id)
392
- if not markdown_text:
393
- return compressed_file, None, None
394
-
395
- pdf_file = self.__markdown_to_pdf(markdown_text, compressed_file, target_language, processing_mode, task_id)
396
- if not pdf_file:
397
- return compressed_file, None, markdown_text
398
-
399
- self.logger.info(f"[{task_id}] Task completed successfully!")
400
- return compressed_file, pdf_file, markdown_text
401
-
402
- except Exception as e:
403
- self.logger.error(f"[{task_id}] Task failed: {str(e)}")
404
- return None, None, None
405
-
406
- def start_webui(self, host: str = None, port: int = None, browser: bool = False, upload_size: str = "100MB",
407
- public: bool = False, limit: int = 10, quiet: bool = False):
408
- """
409
- Start Citrailmu WebUI with all features.
410
-
411
- Parameters:
412
- - host (str): Server host (default: None)
413
- - port (int): Server port (default: None)
414
- - browser (bool): Launch browser automatically (default: False)
415
- - upload_size (str): Maximum file size for uploads (default: "100MB")
416
- - public (bool): Enable public URL mode (default: False)
417
- - limit (int): Maximum number of concurrent requests (default: 10)
418
- - quiet (bool): Quiet mode (default: False)
419
- """
420
- from .webui import CitraIlmuWebUI
421
- CitraIlmuWebUI(self, host=host, port=port, browser=browser, upload_size=upload_size,
422
- public=public, limit=limit, quiet=quiet)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/citrailmu/webui.py DELETED
@@ -1,137 +0,0 @@
1
- import gradio as gr
2
-
3
- def CitraIlmuWebUI(client, host: str = None, port: int = None, browser: bool = True, upload_size: str = "4MB",
4
- public: bool = False, limit: int = 10, quiet: bool = False):
5
- """
6
- Start Citrailmu Web UI with all features.
7
-
8
- Parameters:
9
- - client (Client): Citrailmu instance
10
- - host (str): Server host
11
- - port (int): Server port
12
- - browser (bool): Launch browser automatically
13
- - upload_size (str): Maximum file size for uploads
14
- - public (bool): Enable public URL mode
15
- - limit (int): Maximum number of concurrent requests
16
- - quiet (bool): Quiet mode
17
- """
18
- try:
19
- def update_preview(url):
20
- if not url:
21
- return ""
22
-
23
- template = """
24
- <div style="border-radius: 8px; overflow: hidden; border: 1px solid; border-color: var(--block-border-color);">
25
- {content}
26
- </div>
27
- """
28
-
29
- if "youtube.com" in url or "youtu.be" in url:
30
- video_id = url.split("v=")[1].split("&")[0] if "youtube.com" in url else url.split("/")[-1].split("?")[0]
31
- content = f"""
32
- <iframe
33
- width="100%"
34
- height="315"
35
- src="https://www.youtube.com/embed/{video_id}?rel=0"
36
- frameborder="0"
37
- allowfullscreen
38
- style="display: block;">
39
- </iframe>
40
- """
41
- else:
42
- content = f"""
43
- <video width="100%" controls style="display: block;">
44
- <source src="{url}">
45
- Your browser does not support the video tag.
46
- </video>
47
- """
48
-
49
- return template.format(content=content)
50
-
51
- gr_css = """
52
- footer {
53
- display: none !important;
54
- }
55
- """
56
-
57
- gr_theme = gr.themes.Default(
58
- primary_hue="green",
59
- secondary_hue="green",
60
- neutral_hue=gr.themes.colors.zinc,
61
- font=["Amiri", "system-ui", "sans-serif"]
62
- )
63
-
64
- with gr.Blocks(title="CitraIlmu", analytics_enabled=False, theme=gr_theme, css=gr_css).queue(default_concurrency_limit=limit) as demo:
65
- gr.Markdown("## <br><center>CitraIlmu Web UI")
66
- gr.Markdown("<center>Made for #GodamSahur 2025 by Ikmal Said")
67
- gr.Markdown("<center>")
68
-
69
- with gr.Row():
70
- with gr.Column():
71
- with gr.Column(scale=1):
72
- with gr.Tabs():
73
- with gr.Tab('YouTube/Video URL'):
74
- input_url = gr.Textbox(
75
- label="Input URL",
76
- placeholder="Enter URL...",
77
- lines=1,
78
- max_lines=1,
79
- info="Enter YouTube or web URL here"
80
- )
81
- url_preview = gr.HTML()
82
- url_btn = gr.Button("Process URL", variant="primary")
83
-
84
- with gr.Tab('Video File'):
85
- input_video = gr.Video(label="Input Video File")
86
- video_btn = gr.Button("Process Video", variant="primary")
87
-
88
- with gr.Tab('Audio File'):
89
- input_audio = gr.Audio(label="Input Audio File", type='filepath')
90
- audio_btn = gr.Button("Process Audio", variant="primary")
91
-
92
- with gr.Column(scale=1):
93
- with gr.Tabs():
94
- with gr.Tab('Settings', elem_classes='test'):
95
- target_language = gr.Dropdown(
96
- value="Bahasa Malaysia",
97
- choices=["Bahasa Malaysia", "Arabic", "English", "Mandarin", "Tamil"],
98
- label="Target Analysis Language",
99
- info="Select the target language for the analysis"
100
- )
101
- processing_mode = gr.Radio(
102
- choices=["Analysis", "Transcript"],
103
- value="Analysis",
104
- label="Processing Mode",
105
- info="Analysis: Full content analysis with topics and themes | Transcript: Complete text from audio"
106
- )
107
-
108
- with gr.Column(scale=1):
109
- with gr.Tabs():
110
- with gr.Tab('Results'):
111
- audio_output = gr.Audio(label="Reference Audio")
112
- pdf_output = gr.File(label="Download Results as PDF")
113
- with gr.Accordion("Read Results as Text"):
114
- results_text = gr.Markdown(value="Please process media first for reading!", height=300)
115
-
116
- gr.Markdown("<center>")
117
- gr.Markdown("<center>CitraIlmu can make mistakes. Check important info.")
118
- gr.Markdown("<center>")
119
-
120
- # Setup event handlers
121
- input_url.change(fn=update_preview, inputs=[input_url], outputs=[url_preview])
122
- audio_btn.click(fn=client.process_media, inputs=[input_audio, target_language, processing_mode], outputs=[audio_output, pdf_output, results_text])
123
- video_btn.click(fn=client.process_media, inputs=[input_video, target_language, processing_mode], outputs=[audio_output, pdf_output, results_text])
124
- url_btn.click(fn=client.process_media, inputs=[input_url, target_language, processing_mode], outputs=[audio_output, pdf_output, results_text])
125
-
126
- demo.launch(
127
- server_name=host,
128
- server_port=port,
129
- inbrowser=browser,
130
- max_file_size=upload_size,
131
- share=public,
132
- quiet=quiet
133
- )
134
-
135
- except Exception as e:
136
- client.logger.error(f"{str(e)}")
137
- raise