Rosan144 commited on
Commit
be74931
Β·
verified Β·
1 Parent(s): d908361

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +91 -0
app.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound, VideoUnavailable
2
+ from urllib.parse import urlparse, parse_qs
3
+ import gradio as gr
4
+ from transformers import pipeline
5
+
6
+
7
+ # Load Hugging Face summarization model
8
+ text_summary = pipeline("summarization", model="sshleifer/distilbart-xsum-12-6")
9
+
10
+ # Extract video ID from YouTube URL
11
+ def get_video_id(youtube_url):
12
+ query = urlparse(youtube_url)
13
+ if query.hostname == 'youtu.be':
14
+ return query.path[1:]
15
+ elif query.hostname in ['www.youtube.com', 'youtube.com']:
16
+ if query.path == '/watch':
17
+ return parse_qs(query.query).get('v', [None])[0]
18
+ elif query.path.startswith('/embed/') or query.path.startswith('/v/'):
19
+ return query.path.split('/')[2]
20
+ return None
21
+
22
+ # Fetch transcript from video ID
23
+ def fetch_transcript(video_url):
24
+ video_id = get_video_id(video_url)
25
+ if not video_id:
26
+ return "❌ Invalid YouTube URL."
27
+
28
+ try:
29
+ transcript = YouTubeTranscriptApi.get_transcript(video_id)
30
+ return " ".join([entry['text'] for entry in transcript])
31
+ except (NoTranscriptFound, TranscriptsDisabled, VideoUnavailable) as e:
32
+ return f"❌ {str(e)}"
33
+ except Exception:
34
+ try:
35
+ transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
36
+ transcript = transcript_list.find_transcript(['en'])
37
+ return " ".join([entry['text'] for entry in transcript.fetch()])
38
+ except Exception as e2:
39
+ return f"❌ Error fetching transcript: {str(e2)}"
40
+
41
+ # Split long text safely into small chunks
42
+ def split_text(text, max_words=500):
43
+ words = text.split()
44
+ chunks = []
45
+ for i in range(0, len(words), max_words):
46
+ chunk = " ".join(words[i:i+max_words])
47
+ chunks.append(chunk)
48
+ return chunks
49
+
50
+ # Main function: fetch + summarize any transcript length
51
+ def summarize_youtube_video(url):
52
+ transcript = fetch_transcript(url)
53
+ if transcript.startswith("❌"):
54
+ return transcript
55
+
56
+ try:
57
+ words = transcript.split()
58
+ word_count = len(words)
59
+
60
+ if word_count <= 500:
61
+ summary = text_summary(transcript, max_length=150, min_length=60, do_sample=False)
62
+ return summary[0]['summary_text']
63
+
64
+ chunks = split_text(transcript, max_words=500)
65
+ partial_summaries = []
66
+
67
+ for chunk in chunks:
68
+ summary = text_summary(chunk, max_length=150, min_length=60, do_sample=False)
69
+ partial_summaries.append(summary[0]['summary_text'])
70
+
71
+ combined_summary = " ".join(partial_summaries)
72
+
73
+ # Final summary of all summaries
74
+ final_summary = text_summary(combined_summary, max_length=200, min_length=80, do_sample=False)
75
+ return final_summary[0]['summary_text']
76
+
77
+ except Exception as e:
78
+ return f"❌ Error during summarization: {str(e)}"
79
+
80
+ # Gradio UI
81
+ gr.close_all()
82
+
83
+ demo = gr.Interface(
84
+ fn=summarize_youtube_video,
85
+ inputs=gr.Textbox(label="Enter YouTube Video URL", lines=1, placeholder="https://youtu.be/..."),
86
+ outputs=gr.Textbox(label="Video Summary", lines=10),
87
+ title="@RosangenAi Project 2: YouTube Video Summarizer",
88
+ description="Paste any YouTube video link. This app will fetch and summarize even long transcripts using Hugging Face models."
89
+ )
90
+
91
+ demo.launch()