2.3: Traceability Automation
What You'll Learn
- Understand AI-powered traceability automation techniques
- Learn to implement automated trace link suggestion
- Master suspect link detection algorithms
- Apply machine learning to traceability management
Overview
Traceability automation uses AI and machine learning to reduce the manual effort required to establish and maintain trace links between artifacts. This chapter covers automated link suggestion, suspect detection, and intelligent traceability management.
Automated Trace Link Suggestion
ML-Based Link Recommendation
#!/usr/bin/env python3
"""
AI-Powered Trace Link Suggestion Engine.
Uses NLP and ML to recommend trace links between artifacts.
"""
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from dataclasses import dataclass
from typing import List, Dict, Tuple, Optional
from enum import Enum
import re
class ArtifactType(Enum):
"""Types of artifacts in the traceability model."""
STAKEHOLDER_REQ = "STKH"
SYSTEM_REQ = "SYS"
SOFTWARE_REQ = "SWE"
HARDWARE_REQ = "HWE"
ARCHITECTURE = "ARCH"
DESIGN = "DES"
CODE = "CODE"
TEST_CASE = "TC"
TEST_RESULT = "TR"
class LinkType(Enum):
"""Types of trace links."""
DERIVES = "derives"
SATISFIES = "satisfies"
VERIFIES = "verifies"
ALLOCATES = "allocates"
REFINES = "refines"
DEPENDS = "depends"
@dataclass
class Artifact:
"""Represents a traceable artifact."""
id: str
artifact_type: ArtifactType
title: str
description: str
attributes: Dict[str, str]
existing_links: List[str] # IDs of linked artifacts
@dataclass
class SuggestedLink:
"""Represents a suggested trace link."""
source_id: str
target_id: str
link_type: LinkType
confidence: float
reasoning: str
class TraceLinkSuggestionEngine:
"""
AI-powered engine for suggesting trace links.
Uses TF-IDF and cosine similarity for semantic matching.
Note: TfidfVectorizer parameters (max_features, ngram_range) are reasonable
defaults but may need tuning for specific corpora and domain vocabulary.
"""
def __init__(self):
self.vectorizer = TfidfVectorizer(
max_features=1000,
stop_words='english',
ngram_range=(1, 2)
)
self.artifacts: Dict[str, Artifact] = {}
self.tfidf_matrix = None
self.artifact_ids = []
def add_artifacts(self, artifacts: List[Artifact]) -> None:
"""Add artifacts to the suggestion engine."""
for artifact in artifacts:
self.artifacts[artifact.id] = artifact
# Build TF-IDF matrix
self._build_tfidf_matrix()
def _build_tfidf_matrix(self) -> None:
"""Build TF-IDF matrix from artifact texts."""
self.artifact_ids = list(self.artifacts.keys())
# Combine title and description for each artifact
texts = [
f"{self.artifacts[aid].title} {self.artifacts[aid].description}"
for aid in self.artifact_ids
]
self.tfidf_matrix = self.vectorizer.fit_transform(texts)
def suggest_links(
self,
source_id: str,
target_type: Optional[ArtifactType] = None,
link_type: Optional[LinkType] = None,
top_k: int = 5,
min_confidence: float = 0.3
) -> List[SuggestedLink]:
"""
Suggest trace links for a source artifact.
Args:
source_id: ID of the source artifact
target_type: Optional filter for target artifact type
link_type: Type of link to suggest
top_k: Number of suggestions to return
min_confidence: Minimum confidence threshold
Returns:
List of suggested links sorted by confidence
"""
if source_id not in self.artifacts:
raise ValueError(f"Artifact {source_id} not found")
source_artifact = self.artifacts[source_id]
source_idx = self.artifact_ids.index(source_id)
# Compute similarity with all other artifacts
similarities = cosine_similarity(
self.tfidf_matrix[source_idx:source_idx+1],
self.tfidf_matrix
)[0]
# Build suggestions
suggestions = []
for idx, similarity in enumerate(similarities):
target_id = self.artifact_ids[idx]
# Skip self-links
if target_id == source_id:
continue
# Skip existing links
if target_id in source_artifact.existing_links:
continue
target_artifact = self.artifacts[target_id]
# Apply type filter
if target_type and target_artifact.artifact_type != target_type:
continue
# Determine link type if not specified
suggested_link_type = link_type or self._infer_link_type(
source_artifact.artifact_type,
target_artifact.artifact_type
)
if suggested_link_type is None:
continue
# Apply confidence threshold
if similarity < min_confidence:
continue
# Generate reasoning
reasoning = self._generate_reasoning(
source_artifact,
target_artifact,
similarity
)
suggestions.append(SuggestedLink(
source_id=source_id,
target_id=target_id,
link_type=suggested_link_type,
confidence=float(similarity),
reasoning=reasoning
))
# Sort by confidence and return top_k
suggestions.sort(key=lambda x: x.confidence, reverse=True)
return suggestions[:top_k]
def _infer_link_type(
self,
source_type: ArtifactType,
target_type: ArtifactType
) -> Optional[LinkType]:
"""Infer appropriate link type based on artifact types."""
link_rules = {
(ArtifactType.STAKEHOLDER_REQ, ArtifactType.SYSTEM_REQ): LinkType.DERIVES,
(ArtifactType.SYSTEM_REQ, ArtifactType.SOFTWARE_REQ): LinkType.DERIVES,
(ArtifactType.SYSTEM_REQ, ArtifactType.HARDWARE_REQ): LinkType.DERIVES,
(ArtifactType.SOFTWARE_REQ, ArtifactType.ARCHITECTURE): LinkType.ALLOCATES,
(ArtifactType.ARCHITECTURE, ArtifactType.DESIGN): LinkType.REFINES,
(ArtifactType.DESIGN, ArtifactType.CODE): LinkType.SATISFIES,
(ArtifactType.TEST_CASE, ArtifactType.SOFTWARE_REQ): LinkType.VERIFIES,
(ArtifactType.TEST_CASE, ArtifactType.CODE): LinkType.VERIFIES,
}
return link_rules.get((source_type, target_type))
def _generate_reasoning(
self,
source: Artifact,
target: Artifact,
similarity: float
) -> str:
"""Generate human-readable reasoning for the suggestion."""
# Extract common keywords
source_text = f"{source.title} {source.description}".lower()
target_text = f"{target.title} {target.description}".lower()
# Find common important terms
source_words = set(re.findall(r'\b\w{4,}\b', source_text))
target_words = set(re.findall(r'\b\w{4,}\b', target_text))
common_words = source_words & target_words
# Limit to top 3 most relevant
common_words = list(common_words)[:3]
reasoning = f"Similarity: {similarity:.2f}. "
if common_words:
reasoning += f"Common terms: {', '.join(common_words)}. "
return reasoning
class SuspectLinkDetector:
"""
Detects potentially broken or inconsistent trace links.
Note: The min_similarity_drop threshold should be calibrated based on
your project's historical data. Start with 0.3 and adjust based on
false positive/negative rates observed during validation.
"""
def __init__(self, engine: TraceLinkSuggestionEngine):
self.engine = engine
def detect_suspects(
self,
min_similarity_drop: float = 0.3
) -> List[Dict[str, any]]:
"""
Detect suspect trace links.
A link is suspect if:
1. The semantic similarity between linked artifacts has dropped significantly
2. One artifact has changed substantially
3. Better link targets are now available
Args:
min_similarity_drop: Threshold for similarity drop
Returns:
List of suspect link records
"""
suspects = []
for artifact_id, artifact in self.engine.artifacts.items():
for linked_id in artifact.existing_links:
if linked_id not in self.engine.artifacts:
suspects.append({
'source_id': artifact_id,
'target_id': linked_id,
'reason': 'Target artifact no longer exists',
'severity': 'high'
})
continue
# Calculate current similarity
source_idx = self.engine.artifact_ids.index(artifact_id)
target_idx = self.engine.artifact_ids.index(linked_id)
current_similarity = cosine_similarity(
self.engine.tfidf_matrix[source_idx:source_idx+1],
self.engine.tfidf_matrix[target_idx:target_idx+1]
)[0][0]
# Check if similarity is too low
if current_similarity < 0.2:
suspects.append({
'source_id': artifact_id,
'target_id': linked_id,
'reason': f'Low similarity: {current_similarity:.2f}',
'severity': 'medium',
'current_similarity': current_similarity
})
# Check if better alternatives exist
suggestions = self.engine.suggest_links(
artifact_id,
top_k=1,
min_confidence=current_similarity + min_similarity_drop
)
if suggestions and suggestions[0].target_id != linked_id:
suspects.append({
'source_id': artifact_id,
'target_id': linked_id,
'reason': f'Better alternative found: {suggestions[0].target_id} '
f'(confidence: {suggestions[0].confidence:.2f})',
'severity': 'low',
'alternative': suggestions[0].target_id,
'alternative_confidence': suggestions[0].confidence
})
return suspects
class TraceabilityDashboard:
"""
Generates traceability metrics and dashboards.
"""
def __init__(self, engine: TraceLinkSuggestionEngine):
self.engine = engine
def calculate_coverage(self) -> Dict[str, float]:
"""Calculate trace coverage by artifact type."""
coverage = {}
for artifact_type in ArtifactType:
artifacts_of_type = [
a for a in self.engine.artifacts.values()
if a.artifact_type == artifact_type
]
if not artifacts_of_type:
continue
traced_count = sum(
1 for a in artifacts_of_type
if len(a.existing_links) > 0
)
coverage[artifact_type.value] = (
traced_count / len(artifacts_of_type) * 100
if len(artifacts_of_type) > 0 else 0
)
return coverage
def generate_trace_matrix(
self,
source_type: ArtifactType,
target_type: ArtifactType
) -> List[List[str]]:
"""
Generate a traceability matrix.
Returns:
2D matrix where matrix[i][j] is 'X' if linked, '' otherwise
"""
source_artifacts = [
a for a in self.engine.artifacts.values()
if a.artifact_type == source_type
]
target_artifacts = [
a for a in self.engine.artifacts.values()
if a.artifact_type == target_type
]
# Build matrix
matrix = [
['' for _ in target_artifacts]
for _ in source_artifacts
]
target_id_to_idx = {a.id: idx for idx, a in enumerate(target_artifacts)}
for source_idx, source in enumerate(source_artifacts):
for linked_id in source.existing_links:
if linked_id in target_id_to_idx:
target_idx = target_id_to_idx[linked_id]
matrix[source_idx][target_idx] = 'X'
return matrix
def identify_gaps(self) -> List[Dict[str, any]]:
"""Identify traceability gaps (unlinked artifacts)."""
gaps = []
for artifact in self.engine.artifacts.values():
if len(artifact.existing_links) == 0:
# Artifact has no trace links
gaps.append({
'artifact_id': artifact.id,
'artifact_type': artifact.artifact_type.value,
'title': artifact.title,
'severity': 'high' if artifact.artifact_type in [
ArtifactType.SOFTWARE_REQ,
ArtifactType.SYSTEM_REQ
] else 'medium'
})
return gaps
# Example usage and demonstration
if __name__ == "__main__":
# Create some sample artifacts
artifacts = [
Artifact(
id="SYS-001",
artifact_type=ArtifactType.SYSTEM_REQ,
title="Door lock response time",
description="The system shall respond to lock/unlock commands within 100ms",
attributes={"priority": "High", "status": "Approved"},
existing_links=[]
),
Artifact(
id="SWE-001",
artifact_type=ArtifactType.SOFTWARE_REQ,
title="Lock controller response",
description="The door lock controller software shall process lock and unlock "
"commands with a response time not exceeding 100 milliseconds",
attributes={"ASIL": "B", "status": "Draft"},
existing_links=["SYS-001"]
),
Artifact(
id="SWE-002",
artifact_type=ArtifactType.SOFTWARE_REQ,
title="CAN message handling",
description="The software shall receive and parse CAN messages for door control",
attributes={"ASIL": "B", "status": "Draft"},
existing_links=[]
),
Artifact(
id="TC-001",
artifact_type=ArtifactType.TEST_CASE,
title="Test lock response time",
description="Verify that lock command response is within 100ms under normal conditions",
attributes={"test_type": "Performance", "status": "Ready"},
existing_links=[]
),
Artifact(
id="TC-002",
artifact_type=ArtifactType.TEST_CASE,
title="Test CAN communication",
description="Verify CAN message reception and parsing for door lock control commands",
attributes={"test_type": "Integration", "status": "Ready"},
existing_links=[]
)
]
# Initialize suggestion engine
engine = TraceLinkSuggestionEngine()
engine.add_artifacts(artifacts)
# Suggest links for SWE-002
print("=== Trace Link Suggestions for SWE-002 ===")
suggestions = engine.suggest_links(
source_id="SWE-002",
top_k=3,
min_confidence=0.1
)
for suggestion in suggestions:
print(f"\nTarget: {suggestion.target_id}")
print(f"Link Type: {suggestion.link_type.value}")
print(f"Confidence: {suggestion.confidence:.3f}")
print(f"Reasoning: {suggestion.reasoning}")
# Suggest test case links
print("\n=== Suggested Test Coverage ===")
for tc_id in ["TC-001", "TC-002"]:
suggestions = engine.suggest_links(
source_id=tc_id,
target_type=ArtifactType.SOFTWARE_REQ,
link_type=LinkType.VERIFIES,
top_k=2
)
print(f"\n{tc_id} should verify:")
for suggestion in suggestions:
print(f" - {suggestion.target_id} (confidence: {suggestion.confidence:.3f})")
# Detect suspect links
print("\n=== Suspect Link Detection ===")
detector = SuspectLinkDetector(engine)
suspects = detector.detect_suspects()
if suspects:
for suspect in suspects:
print(f"\nSuspect Link: {suspect['source_id']} → {suspect['target_id']}")
print(f"Reason: {suspect['reason']}")
print(f"Severity: {suspect['severity']}")
else:
print("No suspect links detected")
# Generate coverage metrics
print("\n=== Traceability Coverage ===")
dashboard = TraceabilityDashboard(engine)
coverage = dashboard.calculate_coverage()
for artifact_type, coverage_pct in coverage.items():
print(f"{artifact_type}: {coverage_pct:.1f}%")
# Identify gaps
print("\n=== Traceability Gaps ===")
gaps = dashboard.identify_gaps()
for gap in gaps:
print(f"\n{gap['artifact_id']} ({gap['artifact_type']}): {gap['title']}")
print(f"Severity: {gap['severity']}")
Integration with Requirements Tools
OSLC-Based Integration
"""
OSLC integration for automated traceability.
"""
import requests
from datetime import datetime
from typing import List, Dict
from xml.etree import ElementTree as ET
class OSLCTraceabilityClient:
"""Client for OSLC-based traceability operations."""
def __init__(self, base_url: str, auth_token: str):
self.base_url = base_url
self.headers = {
'Authorization': f'Bearer {auth_token}',
'Accept': 'application/rdf+xml',
'Content-Type': 'application/rdf+xml',
'OSLC-Core-Version': '2.0'
}
def create_trace_link(
self,
source_id: str,
target_id: str,
link_type: str,
confidence: float,
auto_created: bool = True
) -> bool:
"""
Create a trace link via OSLC.
Args:
source_id: Source artifact ID
target_id: Target artifact ID
link_type: Type of trace link
confidence: AI confidence score
auto_created: Whether link was auto-created
Returns:
True if successful
"""
url = f"{self.base_url}/rm/links"
# Include AI metadata
link_data = f'''
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:oslc_rm="http://open-services.net/ns/rm#"
xmlns:dcterms="http://purl.org/dc/terms/">
<oslc_rm:RequirementLink>
<oslc_rm:source rdf:resource="{self.base_url}/rm/requirements/{source_id}"/>
<oslc_rm:target rdf:resource="{self.base_url}/rm/requirements/{target_id}"/>
<oslc_rm:linkType>{link_type}</oslc_rm:linkType>
<dcterms:creator>AI Trace Assistant</dcterms:creator>
<dcterms:created rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">
{datetime.now().isoformat()}
</dcterms:created>
<oslc_rm:confidence rdf:datatype="http://www.w3.org/2001/XMLSchema#float">
{confidence}
</oslc_rm:confidence>
<oslc_rm:autoCreated rdf:datatype="http://www.w3.org/2001/XMLSchema#boolean">
{str(auto_created).lower()}
</oslc_rm:autoCreated>
</oslc_rm:RequirementLink>
</rdf:RDF>
'''
response = requests.post(url, headers=self.headers, data=link_data)
return response.status_code == 201
def get_suggested_links_for_artifact(
self,
artifact_id: str,
threshold: float = 0.5
) -> List[Dict]:
"""Get AI-suggested links for an artifact."""
url = f"{self.base_url}/rm/requirements/{artifact_id}/suggested-links"
params = {'confidence_threshold': threshold}
response = requests.get(url, headers=self.headers, params=params)
response.raise_for_status()
# Parse response and return suggestions
return self._parse_suggestions(response.content)
def _parse_suggestions(self, content: bytes) -> List[Dict]:
"""Parse RDF suggestion response."""
# Implementation would use rdflib for proper RDF parsing
pass
CI/CD Integration
Automated Traceability Validation
# .github/workflows/traceability-check.yml
name: Traceability Validation
on:
pull_request:
paths:
- 'requirements/**'
- 'design/**'
- 'tests/**'
jobs:
check-traceability:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.11'
- name: Install dependencies
run: |
pip install -r requirements.txt
pip install scikit-learn numpy
- name: Export requirements from DOORS
run: |
python scripts/export_from_doors.py \
--output data/requirements.json
- name: Run trace suggestion engine
run: |
python scripts/suggest_traces.py \
--requirements data/requirements.json \
--output suggested_links.json \
--min-confidence 0.5
- name: Detect suspect links
run: |
python scripts/detect_suspects.py \
--requirements data/requirements.json \
--output suspect_links.json
- name: Calculate coverage
run: |
python scripts/calculate_coverage.py \
--requirements data/requirements.json \
--output coverage_report.json \
--fail-below 90
- name: Generate traceability report
run: |
python scripts/generate_trace_report.py \
--suggestions suggested_links.json \
--suspects suspect_links.json \
--coverage coverage_report.json \
--output traceability_report.html
- name: Upload report
uses: actions/upload-artifact@v4
with:
name: traceability-report
path: traceability_report.html
- name: Comment on PR
if: github.event_name == 'pull_request'
uses: actions/github-script@v7
with:
script: |
const fs = require('fs');
const coverage = JSON.parse(fs.readFileSync('coverage_report.json'));
const suspects = JSON.parse(fs.readFileSync('suspect_links.json'));
const comment = `## Traceability Check Results
**Coverage**: ${coverage.overall}%
**Suspect Links**: ${suspects.length}
${suspects.length > 0 ? '[WARN] Please review suspect links' : '[PASS] No suspect links detected'}
[Full Report](../artifacts/traceability-report)
`;
github.rest.issues.createComment({
issue_number: context.issue.number,
owner: context.repo.owner,
repo: context.repo.repo,
body: comment
});
Summary
Traceability automation significantly reduces manual effort:
- Automated Link Suggestion: ML-based recommendations using semantic similarity
- Suspect Detection: Identifies potentially broken or outdated links
- Coverage Analysis: Automated metrics and gap identification
- CI/CD Integration: Continuous validation in development pipeline
- HITL Pattern: Human-in-the-loop for link approval and verification
Key Benefits:
- 60-80% reduction in manual tracing effort
- Improved trace link quality and completeness
- Early detection of traceability gaps
- Continuous validation during development