Spaces:
Running
Running
Update evaluate_performance.py
Browse files- evaluate_performance.py +2 -13
evaluate_performance.py
CHANGED
@@ -23,15 +23,13 @@ import argparse
|
|
23 |
from datetime import datetime
|
24 |
import matplotlib.pyplot as plt
|
25 |
from tabulate import tabulate
|
26 |
-
import numpy as np
|
27 |
|
28 |
# Add the parent directory to sys.path if this script is run directly
|
29 |
if __name__ == "__main__":
|
30 |
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
31 |
|
32 |
-
# Import the agent
|
33 |
import agent
|
34 |
-
from utils.performance import PerformanceTracker
|
35 |
from utils.models import initialize_models
|
36 |
|
37 |
# IMPORTANT NOTE FOR DEVELOPERS:
|
@@ -193,8 +191,6 @@ def evaluate_claims(test_claims, eval_agent, limit=None):
|
|
193 |
- results (list): Detailed results for each claim
|
194 |
- metrics (dict): Aggregated performance metrics
|
195 |
"""
|
196 |
-
# Initialize performance tracker
|
197 |
-
performance_tracker = PerformanceTracker()
|
198 |
|
199 |
# Limit the number of claims if requested
|
200 |
if limit and limit > 0:
|
@@ -499,7 +495,6 @@ def main():
|
|
499 |
|
500 |
# Evaluate claims
|
501 |
results, metrics = evaluate_claims(TEST_CLAIMS, eval_agent, args.limit)
|
502 |
-
# results, metrics = evaluate_claims(TEST_CLAIMS, eval_agent, 1)
|
503 |
|
504 |
# Print summary
|
505 |
print_summary(metrics)
|
@@ -508,13 +503,7 @@ def main():
|
|
508 |
save_results(results, metrics, output_file)
|
509 |
|
510 |
# Create charts
|
511 |
-
|
512 |
-
from tabulate import tabulate
|
513 |
-
import matplotlib.pyplot as plt
|
514 |
-
create_charts(metrics, results_dir)
|
515 |
-
except ImportError:
|
516 |
-
print("\nCould not create charts. Please install matplotlib and tabulate packages:")
|
517 |
-
print("pip install matplotlib tabulate")
|
518 |
|
519 |
if __name__ == "__main__":
|
520 |
main()
|
|
|
23 |
from datetime import datetime
|
24 |
import matplotlib.pyplot as plt
|
25 |
from tabulate import tabulate
|
|
|
26 |
|
27 |
# Add the parent directory to sys.path if this script is run directly
|
28 |
if __name__ == "__main__":
|
29 |
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
30 |
|
31 |
+
# Import the agent
|
32 |
import agent
|
|
|
33 |
from utils.models import initialize_models
|
34 |
|
35 |
# IMPORTANT NOTE FOR DEVELOPERS:
|
|
|
191 |
- results (list): Detailed results for each claim
|
192 |
- metrics (dict): Aggregated performance metrics
|
193 |
"""
|
|
|
|
|
194 |
|
195 |
# Limit the number of claims if requested
|
196 |
if limit and limit > 0:
|
|
|
495 |
|
496 |
# Evaluate claims
|
497 |
results, metrics = evaluate_claims(TEST_CLAIMS, eval_agent, args.limit)
|
|
|
498 |
|
499 |
# Print summary
|
500 |
print_summary(metrics)
|
|
|
503 |
save_results(results, metrics, output_file)
|
504 |
|
505 |
# Create charts
|
506 |
+
create_charts(metrics, results_dir)
|
|
|
|
|
|
|
|
|
|
|
|
|
507 |
|
508 |
if __name__ == "__main__":
|
509 |
main()
|