Spaces:

taesiri
/

AIEditsSubmissions

Paused

App Files Files Community

taesiri commited on Feb 15

Commit

a3b292d

1 Parent(s): 602a574

backup

Browse files

Files changed (1) hide show

app.py +26 -17

app.py CHANGED Viewed

@@ -262,24 +262,29 @@ def get_statistics():
     if not data_dir.exists():
         return "No data directory found"
-    # Count folders with metadata.json
-    total_posts = 0
     posts_with_responses = 0
     total_responses = 0
     responses_per_post = []  # List to track number of responses for each post
     for metadata_file in data_dir.glob("*/metadata.json"):
-        total_posts += 1
-        try:
-            with open(metadata_file, "r") as f:
-                metadata = json.load(f)
-                num_responses = len(metadata.get("responses", []))
-                responses_per_post.append(num_responses)
-                if num_responses > 0:
-                    posts_with_responses += 1
-                    total_responses += num_responses
-        except:
-            continue
     # Calculate additional statistics
     if responses_per_post:
@@ -295,17 +300,21 @@ def get_statistics():
     stats = f"""
     📊 Collection Statistics:
-    Overall Progress:
-    - Total Posts Processed: {total_posts}
     - Posts with Responses: {posts_with_responses}
     - Total Individual Responses: {total_responses}
-    - Completion Rate: {(posts_with_responses/len(VALID_DATASET_POST_IDS)*100):.2f}%
     Response Distribution:
     - Median Responses per Post: {median_responses}
     - Average Responses per Post: {avg_responses:.2f}
     - Maximum Responses for a Post: {max_responses}
-    - Posts with No Responses: {total_posts - posts_with_responses}
     """
     return stats

     if not data_dir.exists():
         return "No data directory found"
+    total_expected_posts = len(VALID_DATASET_POST_IDS)
+    processed_post_ids = set()
     posts_with_responses = 0
     total_responses = 0
     responses_per_post = []  # List to track number of responses for each post
     for metadata_file in data_dir.glob("*/metadata.json"):
+        post_id = metadata_file.parent.name
+        if post_id in VALID_DATASET_POST_IDS:  # Only count valid posts
+            processed_post_ids.add(post_id)
+            try:
+                with open(metadata_file, "r") as f:
+                    metadata = json.load(f)
+                    num_responses = len(metadata.get("responses", []))
+                    responses_per_post.append(num_responses)
+                    if num_responses > 0:
+                        posts_with_responses += 1
+                        total_responses += num_responses
+            except:
+                continue
+    missing_posts = set(map(str, VALID_DATASET_POST_IDS)) - processed_post_ids
+    total_processed = len(processed_post_ids)
     # Calculate additional statistics
     if responses_per_post:
     stats = f"""
     📊 Collection Statistics:
+    Dataset Coverage:
+    - Total Expected Posts: {total_expected_posts}
+    - Posts Processed: {total_processed}
+    - Missing Posts: {len(missing_posts)} ({', '.join(list(missing_posts)[:5])}{'...' if len(missing_posts) > 5 else ''})
+    - Coverage Rate: {(total_processed/total_expected_posts*100):.2f}%
+    Response Statistics:
     - Posts with Responses: {posts_with_responses}
+    - Posts without Responses: {total_processed - posts_with_responses}
     - Total Individual Responses: {total_responses}
     Response Distribution:
     - Median Responses per Post: {median_responses}
     - Average Responses per Post: {avg_responses:.2f}
     - Maximum Responses for a Post: {max_responses}
     """
     return stats