Assessment Management
Managing and organizing your security assessments
Assessment Management
Organize, track, and manage your security assessments across multiple models with comprehensive tools for filtering, monitoring, and reporting.
Listing Assessments
View All Assessments
Retrieve and display all your security assessments with detailed information.
from modelred import ModelRed
async with ModelRed() as client:
# Get all assessments
assessments = await client.list_assessments()
print(f"📊 Found {len(assessments)} assessments:")
print("=" * 50)
for assessment in assessments:
print(f"🔍 Assessment ID: {assessment['assessment_id']}")
print(f" Model: {assessment['model_id']}")
print(f" Status: {assessment['status']}")
print(f" Score: {assessment.get('overall_score', 'N/A')}/10")
print(f" Risk Level: {assessment.get('risk_level', 'N/A')}")
print(f" Created: {assessment['created_at']}")
print(f" Progress: {assessment.get('progress', 0)}%")
if assessment.get('completed_at'):
print(f" Completed: {assessment['completed_at']}")
print(" " + "-" * 40)
Filtering and Searching
🔍 Advanced Assessment Queries
Filter assessments by various criteria to find exactly what you're looking for.
async def advanced_assessment_filtering():
async with ModelRed() as client:
# Filter by specific model
model_assessments = await client.list_assessments(
filters={
"model_id": "production-gpt-4"
}
)
print(f"📊 Assessments for production-gpt-4: {len(model_assessments)}")
# Filter by status
completed_assessments = await client.list_assessments(
filters={
"status": "COMPLETED"
}
)
print(f"✅ Completed assessments: {len(completed_assessments)}")
# Filter by date range
from datetime import datetime, timedelta
week_ago = datetime.now() - timedelta(days=7)
recent_assessments = await client.list_assessments(
filters={
"created_after": week_ago.isoformat(),
"status": ["COMPLETED", "FAILED"]
}
)
print(f"📅 Recent assessments (last 7 days): {len(recent_assessments)}")
# Filter by risk level
high_risk_assessments = await client.list_assessments(
filters={
"risk_level": ["HIGH", "CRITICAL"]
}
)
print(f"⚠️ High/Critical risk assessments: {len(high_risk_assessments)}")
# Combine multiple filters
critical_production = await client.list_assessments(
filters={
"model_id": "production-gpt-4",
"risk_level": "CRITICAL",
"status": "COMPLETED"
}
)
print(f"🚨 Critical production issues: {len(critical_production)}")
# Run the filtering examples
await advanced_assessment_filtering()
Pagination and Sorting
📄 Large Dataset Management
Handle large numbers of assessments efficiently with pagination and sorting.
async def paginated_assessment_listing():
async with ModelRed() as client:
page = 1
page_size = 20
total_processed = 0
print("📋 Processing all assessments with pagination:")
while True:
# Get page of assessments
response = await client.list_assessments(
page=page,
limit=page_size,
sort_by="created_at",
sort_order="desc" # Most recent first
)
assessments = response.get('assessments', [])
total = response.get('total', 0)
if not assessments:
break
print(f"\n📄 Page {page} ({len(assessments)} assessments):")
for assessment in assessments:
status_emoji = {
'COMPLETED': '✅',
'FAILED': '❌',
'RUNNING': '🔄',
'QUEUED': '⏳'
}.get(assessment['status'], '❓')
print(f" {status_emoji} {assessment['model_id'][:20]:<20} | "
f"Score: {assessment.get('overall_score', 'N/A'):>4} | "
f"Risk: {assessment.get('risk_level', 'N/A'):>8}")
total_processed += len(assessments)
page += 1
# Break if we've processed all assessments
if total_processed >= total:
break
print(f"\n✅ Processed {total_processed} total assessments")
# Alternative: Sort by different criteria
async def sorted_assessments():
async with ModelRed() as client:
# Sort by score (lowest first - needs attention)
by_score = await client.list_assessments(
sort_by="overall_score",
sort_order="asc",
limit=10
)
print("🔻 Lowest scoring assessments (need attention):")
for assessment in by_score['assessments']:
print(f" {assessment['model_id']}: {assessment.get('overall_score', 'N/A')}/10")
# Sort by completion time (most recent)
by_completion = await client.list_assessments(
filters={"status": "COMPLETED"},
sort_by="completed_at",
sort_order="desc",
limit=5
)
print("\n🕒 Most recently completed:")
for assessment in by_completion['assessments']:
print(f" {assessment['model_id']}: {assessment['completed_at']}")
await paginated_assessment_listing()
await sorted_assessments()
Assessment Statistics
📊 Usage Analytics & Insights
Get comprehensive statistics and insights about your assessment history.
async def comprehensive_assessment_analytics():
async with ModelRed() as client:
# Get overall statistics
stats = await client.get_assessment_stats()
print("📊 Assessment Analytics Dashboard")
print("=" * 50)
# Basic stats
print(f"📈 Overall Statistics:")
print(f" Total Assessments: {stats['total_assessments']}")
print(f" This Month: {stats['monthly_assessments']}")
print(f" Success Rate: {stats['success_rate']:.1f}%")
print(f" Average Score: {stats.get('average_score', 'N/A')}/10")
# Risk distribution
if 'risk_distribution' in stats:
print(f"\n⚠️ Risk Level Distribution:")
risk_dist = stats['risk_distribution']
total_risks = sum(risk_dist.values())
for risk_level, count in risk_dist.items():
percentage = (count / total_risks * 100) if total_risks > 0 else 0
emoji = {'LOW': '🟢', 'MEDIUM': '🟡', 'HIGH': '🟠', 'CRITICAL': '🔴'}.get(risk_level, '❓')
print(f" {emoji} {risk_level}: {count} ({percentage:.1f}%)")
# Popular test suites
if 'popular_test_suites' in stats:
print(f"\n🧪 Most Used Test Suites:")
for suite_name, usage_count in stats['popular_test_suites'].items():
print(f" • {suite_name}: {usage_count} assessments")
# Model performance overview
if 'model_performance' in stats:
print(f"\n🤖 Model Performance Summary:")
for model_id, perf in stats['model_performance'].items():
print(f" {model_id}:")
print(f" Assessments: {perf['count']}")
print(f" Avg Score: {perf['avg_score']:.1f}/10")
print(f" Latest Risk: {perf['latest_risk_level']}")
# Time-based trends
if 'monthly_trend' in stats:
print(f"\n📅 Monthly Assessment Trend:")
for month, count in stats['monthly_trend'].items():
print(f" {month}: {count} assessments")
await comprehensive_assessment_analytics()
Canceling Assessments
🛑 Stop Running Assessments
Cancel running assessments when needed, with support for individual and batch cancellation.
async def assessment_cancellation_management():
async with ModelRed() as client:
# Cancel a specific assessment
print("🛑 Canceling specific assessment...")
try:
success = await client.cancel_assessment("assessment-id-123")
if success:
print("✅ Assessment canceled successfully")
else:
print("❌ Failed to cancel assessment")
except Exception as e:
print(f"❌ Error canceling assessment: {e}")
# Cancel all running assessments for a specific model
print("\n🛑 Canceling all running assessments for model...")
model_id = "problematic-model"
running_assessments = await client.list_assessments(
filters={
"model_id": model_id,
"status": ["RUNNING", "QUEUED"]
}
)
canceled_count = 0
for assessment in running_assessments['assessments']:
try:
success = await client.cancel_assessment(assessment['assessment_id'])
if success:
canceled_count += 1
print(f" ✅ Canceled: {assessment['assessment_id']}")
else:
print(f" ❌ Failed: {assessment['assessment_id']}")
except Exception as e:
print(f" ❌ Error: {assessment['assessment_id']} - {e}")
print(f"\n📊 Canceled {canceled_count}/{len(running_assessments['assessments'])} assessments")
# Emergency: Cancel ALL running assessments
print("\n🚨 Emergency cancellation of ALL running assessments...")
all_running = await client.list_assessments(
filters={"status": ["RUNNING", "QUEUED"]}
)
if not all_running['assessments']:
print("✅ No running assessments to cancel")
return
# Confirm before mass cancellation
print(f"⚠️ Found {len(all_running['assessments'])} running assessments")
print("This will cancel ALL running assessments across all models!")
# In a real application, you might want user confirmation here
confirm_mass_cancel = True # Set this based on your logic
if confirm_mass_cancel:
mass_canceled = 0
for assessment in all_running['assessments']:
try:
success = await client.cancel_assessment(assessment['assessment_id'])
if success:
mass_canceled += 1
except Exception as e:
print(f" ❌ Failed to cancel {assessment['assessment_id']}: {e}")
print(f"🛑 Mass canceled {mass_canceled} assessments")
else:
print("❌ Mass cancellation aborted")
await assessment_cancellation_management()
Bulk Operations
⚡ Batch Assessment Management
Efficiently manage multiple assessments simultaneously with bulk operations.
import asyncio
from datetime import datetime, timedelta
class AssessmentBatchManager:
"""Manage multiple assessments efficiently"""
def __init__(self, client):
self.client = client
self.active_assessments = {}
async def start_bulk_assessments(self, models_config):
"""Start assessments for multiple models"""
print(f"🚀 Starting bulk assessments for {len(models_config)} models...")
assessment_ids = []
for config in models_config:
try:
result = await self.client.run_assessment(
model_id=config['model_id'],
test_suites=config.get('test_suites', ['basic_security']),
priority=config.get('priority', 'normal'),
wait_for_completion=False
)
assessment_ids.append(result.assessment_id)
self.active_assessments[result.assessment_id] = {
'model_id': config['model_id'],
'started_at': datetime.now(),
'config': config
}
print(f" ✅ Started: {config['model_id']} -> {result.assessment_id}")
except Exception as e:
print(f" ❌ Failed: {config['model_id']} - {e}")
print(f"📊 Successfully started {len(assessment_ids)} assessments")
return assessment_ids
async def monitor_bulk_assessments(self, check_interval=30):
"""Monitor all active assessments"""
print(f"👁️ Monitoring {len(self.active_assessments)} assessments...")
while self.active_assessments:
completed_this_round = []
for assessment_id, info in self.active_assessments.items():
try:
status_info = await self.client.get_assessment_status(assessment_id)
status = status_info['status']
progress = status_info.get('progress', 0)
# Update progress
elapsed = datetime.now() - info['started_at']
if status in ['COMPLETED', 'FAILED']:
completed_this_round.append(assessment_id)
if status == 'COMPLETED':
# Get final results
results = await self.client.get_assessment_results(assessment_id)
print(f"✅ COMPLETED: {info['model_id']}")
print(f" Score: {results.overall_score}/10")
print(f" Risk: {results.risk_level.value}")
print(f" Duration: {elapsed}")
else:
print(f"❌ FAILED: {info['model_id']} (Duration: {elapsed})")
else:
print(f"🔄 {info['model_id']}: {status} ({progress}%) - {elapsed}")
except Exception as e:
print(f"⚠️ Error checking {info['model_id']}: {e}")
# Remove completed assessments
for aid in completed_this_round:
del self.active_assessments[aid]
if self.active_assessments:
print(f"📊 {len(self.active_assessments)} assessments still running...")
await asyncio.sleep(check_interval)
print("🎉 All bulk assessments completed!")
async def generate_bulk_report(self, assessment_ids):
"""Generate summary report for multiple assessments"""
print("📋 Generating bulk assessment report...")
results = []
for assessment_id in assessment_ids:
try:
result = await self.client.get_assessment_results(assessment_id)
results.append(result)
except Exception as e:
print(f"⚠️ Failed to get results for {assessment_id}: {e}")
if not results:
print("❌ No results to report")
return
# Generate summary statistics
total_assessments = len(results)
avg_score = sum(r.overall_score for r in results) / total_assessments
risk_counts = {}
for result in results:
risk = result.risk_level.value
risk_counts[risk] = risk_counts.get(risk, 0) + 1
print(f"\n📊 Bulk Assessment Summary Report")
print("=" * 50)
print(f"Total Assessments: {total_assessments}")
print(f"Average Score: {avg_score:.2f}/10")
print(f"Risk Distribution:")
for risk, count in risk_counts.items():
percentage = (count / total_assessments) * 100
print(f" {risk}: {count} ({percentage:.1f}%)")
# Show top and bottom performers
sorted_results = sorted(results, key=lambda x: x.overall_score, reverse=True)
print(f"\n🏆 Top Performers:")
for result in sorted_results[:3]:
print(f" {result.model_id}: {result.overall_score}/10 ({result.risk_level.value})")
print(f"\n⚠️ Needs Attention:")
for result in sorted_results[-3:]:
print(f" {result.model_id}: {result.overall_score}/10 ({result.risk_level.value})")
# Usage example
async def bulk_management_example():
async with ModelRed() as client:
manager = AssessmentBatchManager(client)
# Define models to assess
models_config = [
{
'model_id': 'prod-gpt-4',
'test_suites': ['basic_security', 'content_safety'],
'priority': 'high'
},
{
'model_id': 'dev-claude',
'test_suites': ['basic_security'],
'priority': 'normal'
},
{
'model_id': 'test-llama',
'test_suites': ['basic_security', 'prompt_injection'],
'priority': 'low'
}
]
# Start all assessments
assessment_ids = await manager.start_bulk_assessments(models_config)
# Monitor until completion
await manager.monitor_bulk_assessments(check_interval=15)
# Generate final report
await manager.generate_bulk_report(assessment_ids)
# Run the bulk management example
await bulk_management_example()
Report Management
📄 Assessment Reports & Documentation
Access and manage detailed assessment reports with export capabilities.
async def comprehensive_report_management():
async with ModelRed() as client:
# Get assessment with detailed report
assessment_id = "your-assessment-id"
assessment = await client.get_assessment_results(assessment_id)
print("📋 Assessment Report Details:")
print("=" * 40)
print(f"Assessment ID: {assessment.assessment_id}")
print(f"Model: {assessment.model_id}")
print(f"Overall Score: {assessment.overall_score}/10")
print(f"Risk Level: {assessment.risk_level.value}")
print(f"Web Report URL: {assessment.report_url}")
# Access raw probe results if available
if hasattr(assessment, 'raw_results'):
print(f"\n🔬 Raw Results Available:")
print(f" Total Probes: {len(assessment.raw_results)}")
print(f" Execution Time: {assessment.execution_time}s")
# Analyze probe categories
probe_categories = {}
for probe_result in assessment.raw_results:
category = probe_result.get('category', 'unknown')
probe_categories[category] = probe_categories.get(category, 0) + 1
print(f" Probe Categories:")
for category, count in probe_categories.items():
print(f" {category}: {count}")
# Generate custom report formats
await generate_custom_reports(assessment)
async def generate_custom_reports(assessment):
"""Generate reports in multiple formats"""
# Executive Summary
print(f"\n📊 Executive Summary Report:")
print("=" * 30)
risk_color = {
'LOW': '🟢',
'MEDIUM': '🟡',
'HIGH': '🟠',
'CRITICAL': '🔴'
}.get(assessment.risk_level.value, '❓')
print(f"Model: {assessment.model_id}")
print(f"Security Score: {assessment.overall_score}/10")
print(f"Risk Level: {risk_color} {assessment.risk_level.value}")
print(f"Assessment Date: {assessment.completed_at}")
if assessment.overall_score >= 8:
recommendation = "✅ Model is ready for production deployment"
elif assessment.overall_score >= 6:
recommendation = "⚠️ Model needs minor security improvements"
elif assessment.overall_score >= 4:
recommendation = "🔧 Model requires significant security fixes"
else:
recommendation = "🚨 Model should not be deployed - critical security issues"
print(f"Recommendation: {recommendation}")
# Detailed breakdown
if assessment.categories:
print(f"\n📈 Category Breakdown:")
for category, score in assessment.categories.items():
status = "🟢" if score >= 7 else "🟡" if score >= 5 else "🔴"
print(f" {status} {category.replace('_', ' ').title()}: {score}/10")
# Action items
if assessment.recommendations:
print(f"\n💡 Priority Actions ({len(assessment.recommendations)}):")
for i, rec in enumerate(assessment.recommendations[:5], 1):
print(f" {i}. {rec}")
if len(assessment.recommendations) > 5:
print(f" ... and {len(assessment.recommendations) - 5} more recommendations")
await comprehensive_report_management()
Best Practices
💡 Assessment Management Best Practices