Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
import streamlit as st
|
2 |
from transformers import pipeline
|
|
|
3 |
import time
|
4 |
|
5 |
# تحميل النموذج
|
@@ -8,61 +9,110 @@ classifier = pipeline("zero-shot-classification", model="cross-encoder/nli-disti
|
|
8 |
# عنوان التطبيق
|
9 |
st.title("Text Classification App")
|
10 |
|
|
|
|
|
|
|
11 |
# إدخال الملف النصي
|
12 |
-
uploaded_file = st.file_uploader("Upload a text file
|
13 |
|
14 |
if uploaded_file is not None:
|
15 |
# قراءة الملف النصي
|
16 |
content = uploaded_file.read().decode("utf-8")
|
17 |
-
|
18 |
|
19 |
# تحديد الفئات
|
20 |
categories = ["shop", "game", "stream"]
|
21 |
|
22 |
-
# قوائم لتخزين
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
|
28 |
# متغيرات للتحكم في العملية
|
29 |
progress_bar = st.progress(0)
|
30 |
pause_button = st.button("Pause")
|
31 |
stop_button = st.button("Stop")
|
32 |
-
continue_button = st.button("Continue")
|
33 |
paused = False
|
34 |
stopped = False
|
35 |
-
current_index = 0
|
36 |
-
|
37 |
-
# دالة تصنيف
|
38 |
-
def classify_keywords(
|
39 |
-
global paused, stopped, current_index
|
40 |
-
|
41 |
-
for i,
|
42 |
-
current_index = i
|
43 |
if stopped:
|
44 |
break
|
45 |
if paused:
|
46 |
-
time.sleep(0.5)
|
47 |
continue
|
48 |
|
49 |
-
# تصنيف الكلمة
|
50 |
-
result = classifier(
|
51 |
best_category = result['labels'][0]
|
52 |
score = result['scores'][0]
|
53 |
|
54 |
-
# إضافة الكلمة إلى القائمة المناسبة
|
55 |
if best_category == "shop" and score > 0.5:
|
56 |
-
|
57 |
elif best_category == "game" and score > 0.5:
|
58 |
-
|
59 |
elif best_category == "stream" and score > 0.5:
|
60 |
-
|
61 |
else:
|
62 |
-
|
63 |
|
64 |
# تحديث شريط التقدم
|
65 |
-
progress = (current_index + 1) /
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
progress_bar.progress(progress)
|
67 |
|
68 |
# تحديث النتائج في الوقت الحقيقي
|
@@ -73,18 +123,28 @@ if uploaded_file is not None:
|
|
73 |
|
74 |
# دالة تحديث النتائج
|
75 |
def update_results():
|
76 |
-
# تحديث محتوى المربعات النصية
|
77 |
-
st.session_state.shopping_text = "\n".join(
|
78 |
-
st.session_state.gaming_text = "\n".join(
|
79 |
-
st.session_state.streaming_text = "\n".join(
|
80 |
-
st.session_state.unknown_text = "\n".join(
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
|
82 |
# زر البدء
|
83 |
if st.button("Start"):
|
84 |
stopped = False
|
85 |
paused = False
|
86 |
-
current_index = 0
|
87 |
-
|
|
|
|
|
|
|
|
|
88 |
|
89 |
# زر الإيقاف المؤقت
|
90 |
if pause_button:
|
@@ -95,33 +155,60 @@ if uploaded_file is not None:
|
|
95 |
if continue_button and paused:
|
96 |
paused = False
|
97 |
st.write("Classification resumed.")
|
98 |
-
|
|
|
|
|
|
|
99 |
|
100 |
# زر التوقف الكامل
|
101 |
if stop_button:
|
102 |
stopped = True
|
103 |
st.write("Classification stopped.")
|
104 |
|
105 |
-
# عرض النتائج
|
106 |
-
|
107 |
-
|
108 |
-
st.
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
st.
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
st.
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
st.
|
124 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
|
126 |
else:
|
127 |
-
st.warning("Please upload a text file to
|
|
|
1 |
import streamlit as st
|
2 |
from transformers import pipeline
|
3 |
+
import re
|
4 |
import time
|
5 |
|
6 |
# تحميل النموذج
|
|
|
9 |
# عنوان التطبيق
|
10 |
st.title("Text Classification App")
|
11 |
|
12 |
+
# اختيار العملية
|
13 |
+
operation = st.radio("Choose an operation:", ["Filter Keywords", "Extra & Filter Param (URLs)"])
|
14 |
+
|
15 |
# إدخال الملف النصي
|
16 |
+
uploaded_file = st.file_uploader("Upload a text file", type=["txt"])
|
17 |
|
18 |
if uploaded_file is not None:
|
19 |
# قراءة الملف النصي
|
20 |
content = uploaded_file.read().decode("utf-8")
|
21 |
+
items = [line.strip() for line in content.splitlines() if line.strip()]
|
22 |
|
23 |
# تحديد الفئات
|
24 |
categories = ["shop", "game", "stream"]
|
25 |
|
26 |
+
# قوائم لتخزين النتائج
|
27 |
+
shopping_items = []
|
28 |
+
gaming_items = []
|
29 |
+
streaming_items = []
|
30 |
+
unknown_items = []
|
31 |
+
|
32 |
+
# قوائم خاصة بالباراميترات
|
33 |
+
param_categories = {
|
34 |
+
"shop_params": [],
|
35 |
+
"game_params": [],
|
36 |
+
"stream_params": [],
|
37 |
+
"unknown_params": []
|
38 |
+
}
|
39 |
|
40 |
# متغيرات للتحكم في العملية
|
41 |
progress_bar = st.progress(0)
|
42 |
pause_button = st.button("Pause")
|
43 |
stop_button = st.button("Stop")
|
44 |
+
continue_button = st.button("Continue")
|
45 |
paused = False
|
46 |
stopped = False
|
47 |
+
current_index = 0
|
48 |
+
|
49 |
+
# دالة تصنيف العناصر (للكلمات المفتاحية)
|
50 |
+
def classify_keywords(items, categories, start_index=0):
|
51 |
+
global paused, stopped, current_index
|
52 |
+
total_items = len(items)
|
53 |
+
for i, item in enumerate(items[start_index:], start=start_index):
|
54 |
+
current_index = i
|
55 |
if stopped:
|
56 |
break
|
57 |
if paused:
|
58 |
+
time.sleep(0.5)
|
59 |
continue
|
60 |
|
61 |
+
# تصنيف الكلمة باستخدام zero-shot-classification
|
62 |
+
result = classifier(item, categories)
|
63 |
best_category = result['labels'][0]
|
64 |
score = result['scores'][0]
|
65 |
|
|
|
66 |
if best_category == "shop" and score > 0.5:
|
67 |
+
shopping_items.append(item)
|
68 |
elif best_category == "game" and score > 0.5:
|
69 |
+
gaming_items.append(item)
|
70 |
elif best_category == "stream" and score > 0.5:
|
71 |
+
streaming_items.append(item)
|
72 |
else:
|
73 |
+
unknown_items.append(item)
|
74 |
|
75 |
# تحديث شريط التقدم
|
76 |
+
progress = (current_index + 1) / total_items
|
77 |
+
progress_bar.progress(progress)
|
78 |
+
|
79 |
+
# تحديث النتائج في الوقت الحقيقي
|
80 |
+
update_results()
|
81 |
+
|
82 |
+
# إبطاء العملية قليلاً للسماح بتحديث الواجهة
|
83 |
+
time.sleep(0.1)
|
84 |
+
|
85 |
+
# دالة تصنيف الباراميترات
|
86 |
+
def classify_parameters(items, categories, start_index=0):
|
87 |
+
global paused, stopped, current_index
|
88 |
+
total_items = len(items)
|
89 |
+
for i, url in enumerate(items[start_index:], start=start_index):
|
90 |
+
current_index = i
|
91 |
+
if stopped:
|
92 |
+
break
|
93 |
+
if paused:
|
94 |
+
time.sleep(0.5)
|
95 |
+
continue
|
96 |
+
|
97 |
+
# استخراج الباراميترات من الرابط باستخدام RegEx
|
98 |
+
params = re.findall(r'(\w+)=\w+', url)
|
99 |
+
for param in params:
|
100 |
+
# تصنيف الباراميتر باستخدام zero-shot-classification
|
101 |
+
result = classifier(param, categories)
|
102 |
+
best_category = result['labels'][0]
|
103 |
+
score = result['scores'][0]
|
104 |
+
|
105 |
+
if best_category == "shop" and score > 0.5:
|
106 |
+
param_categories["shop_params"].append(f"{param}={re.search(param + r'=([^&]*)', url).group(1)}")
|
107 |
+
elif best_category == "game" and score > 0.5:
|
108 |
+
param_categories["game_params"].append(f"{param}={re.search(param + r'=([^&]*)', url).group(1)}")
|
109 |
+
elif best_category == "stream" and score > 0.5:
|
110 |
+
param_categories["stream_params"].append(f"{param}={re.search(param + r'=([^&]*)', url).group(1)}")
|
111 |
+
else:
|
112 |
+
param_categories["unknown_params"].append(f"{param}={re.search(param + r'=([^&]*)', url).group(1)}")
|
113 |
+
|
114 |
+
# تحديث شريط التقدم
|
115 |
+
progress = (current_index + 1) / total_items
|
116 |
progress_bar.progress(progress)
|
117 |
|
118 |
# تحديث النتائج في الوقت الحقيقي
|
|
|
123 |
|
124 |
# دالة تحديث النتائج
|
125 |
def update_results():
|
126 |
+
# تحديث محتوى المربعات النصية
|
127 |
+
st.session_state.shopping_text = "\n".join(shopping_items)
|
128 |
+
st.session_state.gaming_text = "\n".join(gaming_items)
|
129 |
+
st.session_state.streaming_text = "\n".join(streaming_items)
|
130 |
+
st.session_state.unknown_text = "\n".join(unknown_items)
|
131 |
+
|
132 |
+
# تحديث محتوى المربعات الخاصة بالباراميترات
|
133 |
+
st.session_state.shop_params = "\n".join(param_categories["shop_params"])
|
134 |
+
st.session_state.game_params = "\n".join(param_categories["game_params"])
|
135 |
+
st.session_state.stream_params = "\n".join(param_categories["stream_params"])
|
136 |
+
st.session_state.unknown_params = "\n".join(param_categories["unknown_params"])
|
137 |
|
138 |
# زر البدء
|
139 |
if st.button("Start"):
|
140 |
stopped = False
|
141 |
paused = False
|
142 |
+
current_index = 0
|
143 |
+
|
144 |
+
if operation == "Filter Keywords":
|
145 |
+
classify_keywords(items, categories, start_index=current_index)
|
146 |
+
elif operation == "Extra & Filter Param (URLs)":
|
147 |
+
classify_parameters(items, categories, start_index=current_index)
|
148 |
|
149 |
# زر الإيقاف المؤقت
|
150 |
if pause_button:
|
|
|
155 |
if continue_button and paused:
|
156 |
paused = False
|
157 |
st.write("Classification resumed.")
|
158 |
+
if operation == "Filter Keywords":
|
159 |
+
classify_keywords(items, categories, start_index=current_index)
|
160 |
+
elif operation == "Extra & Filter Param (URLs)":
|
161 |
+
classify_parameters(items, categories, start_index=current_index)
|
162 |
|
163 |
# زر التوقف الكامل
|
164 |
if stop_button:
|
165 |
stopped = True
|
166 |
st.write("Classification stopped.")
|
167 |
|
168 |
+
# عرض النتائج بناءً على الخيار المختار
|
169 |
+
if operation == "Filter Keywords":
|
170 |
+
# عرض النتائج للكلمات المفتاحية
|
171 |
+
st.header("Shopping Keywords")
|
172 |
+
if 'shopping_text' not in st.session_state:
|
173 |
+
st.session_state.shopping_text = ""
|
174 |
+
st.text_area("Copy the shopping keywords here:", value=st.session_state.shopping_text, height=200, key="shopping")
|
175 |
+
|
176 |
+
st.header("Gaming Keywords")
|
177 |
+
if 'gaming_text' not in st.session_state:
|
178 |
+
st.session_state.gaming_text = ""
|
179 |
+
st.text_area("Copy the gaming keywords here:", value=st.session_state.gaming_text, height=200, key="gaming")
|
180 |
+
|
181 |
+
st.header("Streaming Keywords")
|
182 |
+
if 'streaming_text' not in st.session_state:
|
183 |
+
st.session_state.streaming_text = ""
|
184 |
+
st.text_area("Copy the streaming keywords here:", value=st.session_state.streaming_text, height=200, key="streaming")
|
185 |
+
|
186 |
+
st.header("Unknown Keywords")
|
187 |
+
if 'unknown_text' not in st.session_state:
|
188 |
+
st.session_state.unknown_text = ""
|
189 |
+
st.text_area("Copy the unknown keywords here:", value=st.session_state.unknown_text, height=200, key="unknown")
|
190 |
+
|
191 |
+
elif operation == "Extra & Filter Param (URLs)":
|
192 |
+
# عرض النتائج للباراميترات
|
193 |
+
st.header("Shop Parameters")
|
194 |
+
if 'shop_params' not in st.session_state:
|
195 |
+
st.session_state.shop_params = ""
|
196 |
+
st.text_area("Copy the shop parameters here:", value=st.session_state.shop_params, height=200, key="shop_params")
|
197 |
+
|
198 |
+
st.header("Game Parameters")
|
199 |
+
if 'game_params' not in st.session_state:
|
200 |
+
st.session_state.game_params = ""
|
201 |
+
st.text_area("Copy the game parameters here:", value=st.session_state.game_params, height=200, key="game_params")
|
202 |
+
|
203 |
+
st.header("Stream Parameters")
|
204 |
+
if 'stream_params' not in st.session_state:
|
205 |
+
st.session_state.stream_params = ""
|
206 |
+
st.text_area("Copy the stream parameters here:", value=st.session_state.stream_params, height=200, key="stream_params")
|
207 |
+
|
208 |
+
st.header("Unknown Parameters")
|
209 |
+
if 'unknown_params' not in st.session_state:
|
210 |
+
st.session_state.unknown_params = ""
|
211 |
+
st.text_area("Copy the unknown parameters here:", value=st.session_state.unknown_params, height=200, key="unknown_params")
|
212 |
|
213 |
else:
|
214 |
+
st.warning("Please upload a text file to start classification.")
|