Spaces:
Paused
Paused
backup
Browse files
app.py
CHANGED
@@ -262,24 +262,29 @@ def get_statistics():
|
|
262 |
if not data_dir.exists():
|
263 |
return "No data directory found"
|
264 |
|
265 |
-
|
266 |
-
|
267 |
posts_with_responses = 0
|
268 |
total_responses = 0
|
269 |
responses_per_post = [] # List to track number of responses for each post
|
270 |
|
271 |
for metadata_file in data_dir.glob("*/metadata.json"):
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
|
|
|
|
|
|
|
|
|
|
283 |
|
284 |
# Calculate additional statistics
|
285 |
if responses_per_post:
|
@@ -295,17 +300,21 @@ def get_statistics():
|
|
295 |
stats = f"""
|
296 |
📊 Collection Statistics:
|
297 |
|
298 |
-
|
299 |
-
- Total Posts
|
|
|
|
|
|
|
|
|
|
|
300 |
- Posts with Responses: {posts_with_responses}
|
|
|
301 |
- Total Individual Responses: {total_responses}
|
302 |
-
- Completion Rate: {(posts_with_responses/len(VALID_DATASET_POST_IDS)*100):.2f}%
|
303 |
|
304 |
Response Distribution:
|
305 |
- Median Responses per Post: {median_responses}
|
306 |
- Average Responses per Post: {avg_responses:.2f}
|
307 |
- Maximum Responses for a Post: {max_responses}
|
308 |
-
- Posts with No Responses: {total_posts - posts_with_responses}
|
309 |
"""
|
310 |
return stats
|
311 |
|
|
|
262 |
if not data_dir.exists():
|
263 |
return "No data directory found"
|
264 |
|
265 |
+
total_expected_posts = len(VALID_DATASET_POST_IDS)
|
266 |
+
processed_post_ids = set()
|
267 |
posts_with_responses = 0
|
268 |
total_responses = 0
|
269 |
responses_per_post = [] # List to track number of responses for each post
|
270 |
|
271 |
for metadata_file in data_dir.glob("*/metadata.json"):
|
272 |
+
post_id = metadata_file.parent.name
|
273 |
+
if post_id in VALID_DATASET_POST_IDS: # Only count valid posts
|
274 |
+
processed_post_ids.add(post_id)
|
275 |
+
try:
|
276 |
+
with open(metadata_file, "r") as f:
|
277 |
+
metadata = json.load(f)
|
278 |
+
num_responses = len(metadata.get("responses", []))
|
279 |
+
responses_per_post.append(num_responses)
|
280 |
+
if num_responses > 0:
|
281 |
+
posts_with_responses += 1
|
282 |
+
total_responses += num_responses
|
283 |
+
except:
|
284 |
+
continue
|
285 |
+
|
286 |
+
missing_posts = set(map(str, VALID_DATASET_POST_IDS)) - processed_post_ids
|
287 |
+
total_processed = len(processed_post_ids)
|
288 |
|
289 |
# Calculate additional statistics
|
290 |
if responses_per_post:
|
|
|
300 |
stats = f"""
|
301 |
📊 Collection Statistics:
|
302 |
|
303 |
+
Dataset Coverage:
|
304 |
+
- Total Expected Posts: {total_expected_posts}
|
305 |
+
- Posts Processed: {total_processed}
|
306 |
+
- Missing Posts: {len(missing_posts)} ({', '.join(list(missing_posts)[:5])}{'...' if len(missing_posts) > 5 else ''})
|
307 |
+
- Coverage Rate: {(total_processed/total_expected_posts*100):.2f}%
|
308 |
+
|
309 |
+
Response Statistics:
|
310 |
- Posts with Responses: {posts_with_responses}
|
311 |
+
- Posts without Responses: {total_processed - posts_with_responses}
|
312 |
- Total Individual Responses: {total_responses}
|
|
|
313 |
|
314 |
Response Distribution:
|
315 |
- Median Responses per Post: {median_responses}
|
316 |
- Average Responses per Post: {avg_responses:.2f}
|
317 |
- Maximum Responses for a Post: {max_responses}
|
|
|
318 |
"""
|
319 |
return stats
|
320 |
|