Optimization Recipes¶
Ready-to-use scripts for code optimization analysis and reporting.
Quick Audit¶
Full Codebase Scan¶
from rejig import Rejig, DRYAnalyzer, LoopOptimizer
def audit_codebase(path: str):
"""Run a complete optimization audit."""
rj = Rejig(path)
print(f"Auditing: {path}")
print("=" * 50)
# DRY Analysis
dry = DRYAnalyzer(rj)
dry_issues = dry.find_all_issues()
print("\nDRY Violations:")
print(dry_issues.summary())
# Loop Analysis
loops = LoopOptimizer(rj)
loop_issues = loops.find_all_issues()
print("\nLoop Optimizations:")
print(loop_issues.summary())
# Summary
total = len(dry_issues) + len(loop_issues)
print(f"\nTotal: {total} optimization opportunities")
return dry_issues, loop_issues
audit_codebase("src/")
Single File Analysis¶
def analyze_file(rj: Rejig, file_path: str):
"""Analyze a single file for optimization opportunities."""
from pathlib import Path
dry = DRYAnalyzer(rj)
loops = LoopOptimizer(rj)
path = Path(file_path)
dry_issues = dry.find_all_issues().in_file(path)
loop_issues = loops.find_all_issues().in_file(path)
print(f"Analysis for {file_path}:")
if dry_issues:
print("\nDRY Issues:")
for issue in dry_issues:
print(f" Line {issue.line_number}: {issue.message}")
if loop_issues:
print("\nLoop Issues:")
for issue in loop_issues:
print(f" Line {issue.line_number}: {issue.message}")
print(f" Suggested: {issue.suggested_code}")
if not dry_issues and not loop_issues:
print(" No optimization opportunities found")
rj = Rejig("src/")
analyze_file(rj, "src/utils.py")
DRY Recipes¶
Extract Duplicate Code Report¶
from rejig import Rejig, DRYAnalyzer
from collections import defaultdict
def find_extraction_candidates(rj: Rejig):
"""Find code blocks that should be extracted to functions."""
dry = DRYAnalyzer(rj)
duplicates = dry.find_duplicate_code_blocks(
min_lines=5, # At least 5 lines
min_occurrences=2 # At least 2 occurrences
)
# Group by hash to find related duplicates
groups = defaultdict(list)
for dup in duplicates:
# Use the original code as a rough grouping key
key = dup.original_code[:100]
groups[key].append(dup)
print("Extraction Candidates:")
print("=" * 50)
for i, (_, group) in enumerate(groups.items(), 1):
print(f"\nCandidate {i}: {len(group)} occurrences")
print(f"Lines: {group[0].finding.context.get('line_count', '?')}")
print("Locations:")
for dup in group:
print(f" - {dup.location}")
print("Sample code:")
print(" " + group[0].original_code[:200].replace("\n", "\n "))
rj = Rejig("src/")
find_extraction_candidates(rj)
Magic Number Cleanup¶
def identify_magic_numbers(rj: Rejig):
"""Find magic numbers and suggest constant names."""
dry = DRYAnalyzer(rj)
literals = dry.find_duplicate_literals(min_occurrences=2)
# Filter to just numbers
numbers = [
lit for lit in literals
if lit.finding.context.get("literal_type") == "integer"
]
print("Magic Numbers Found:")
print("=" * 50)
# Group by value
by_value = {}
for num in numbers:
value = num.original_code
if value not in by_value:
by_value[value] = []
by_value[value].append(num)
# Sort by frequency
sorted_values = sorted(
by_value.items(),
key=lambda x: len(x[1]),
reverse=True
)
for value, occurrences in sorted_values:
count = len(occurrences)
print(f"\n{value} (used {count} times)")
# Suggest a constant name based on context
contexts = [o.finding.name for o in occurrences if o.finding.name]
if contexts:
suggested_name = suggest_constant_name(value, contexts)
print(f" Suggested: {suggested_name} = {value}")
print(" Locations:")
for occ in occurrences[:5]:
print(f" - {occ.location}")
if count > 5:
print(f" ... and {count - 5} more")
def suggest_constant_name(value: str, contexts: list[str]) -> str:
"""Suggest a constant name based on usage context."""
# Simple heuristic - in practice, use more sophisticated analysis
common_patterns = {
"60": "SECONDS_PER_MINUTE",
"3600": "SECONDS_PER_HOUR",
"86400": "SECONDS_PER_DAY",
"1000": "MILLISECONDS_PER_SECOND",
"1024": "BYTES_PER_KB",
"100": "PERCENTAGE_MAX",
}
return common_patterns.get(value, f"CONSTANT_{value}")
rj = Rejig("src/")
identify_magic_numbers(rj)
String Constant Extraction¶
def find_repeated_strings(rj: Rejig, min_length: int = 10):
"""Find repeated string literals that should be constants."""
dry = DRYAnalyzer(rj)
literals = dry.find_duplicate_literals(min_occurrences=2)
# Filter to strings of sufficient length
strings = [
lit for lit in literals
if lit.finding.context.get("literal_type") == "string"
and len(lit.original_code) >= min_length
]
print(f"Repeated Strings (>= {min_length} chars):")
print("=" * 50)
# Group by value
by_value = {}
for s in strings:
value = s.original_code
if value not in by_value:
by_value[value] = []
by_value[value].append(s)
for value, occurrences in sorted(by_value.items(), key=lambda x: -len(x[1])):
count = len(occurrences)
print(f"\n{value[:50]}{'...' if len(value) > 50 else ''}")
print(f" Used {count} times")
print(" Locations:")
for occ in occurrences[:3]:
print(f" - {occ.location}")
rj = Rejig("src/")
find_repeated_strings(rj)
Similar Function Consolidation¶
def find_consolidation_candidates(rj: Rejig):
"""Find similar functions that could be merged."""
dry = DRYAnalyzer(rj)
similar = dry.find_similar_functions()
if not similar:
print("No similar functions found")
return
print("Similar Function Groups:")
print("=" * 50)
# Group by similarity
seen = set()
groups = []
for func in similar:
if func.name in seen:
continue
similar_names = func.finding.context.get("similar_functions", [])
group = [func.name] + similar_names
for name in group:
seen.add(name)
groups.append({
"functions": group,
"location": func.location,
"params": func.finding.context.get("param_count"),
"statements": func.finding.context.get("statement_count"),
})
for i, group in enumerate(groups, 1):
print(f"\nGroup {i}:")
print(f" Functions: {', '.join(group['functions'])}")
print(f" Parameters: {group['params']}")
print(f" Statements: {group['statements']}")
print(" Suggestion: Consider merging into a single parameterized function")
rj = Rejig("src/")
find_consolidation_candidates(rj)
Loop Recipes¶
Comprehension Conversion Guide¶
def generate_comprehension_fixes(rj: Rejig):
"""Generate copy-paste ready comprehension conversions."""
loops = LoopOptimizer(rj)
comprehensions = loops.find_comprehension_opportunities()
if not comprehensions:
print("No loop-to-comprehension conversions found")
return
print("Loop to Comprehension Conversions:")
print("=" * 50)
for opt in comprehensions.sorted_by_location():
print(f"\n{opt.location}")
print("-" * 40)
print("Before:")
print(indent(opt.original_code, " "))
print("\nAfter:")
print(indent(opt.suggested_code, " "))
print(f"\nBenefit: {opt.finding.estimated_improvement}")
def indent(text: str, prefix: str) -> str:
"""Indent each line of text."""
return "\n".join(prefix + line for line in text.split("\n"))
rj = Rejig("src/")
generate_comprehension_fixes(rj)
Builtin Replacement Guide¶
def generate_builtin_fixes(rj: Rejig):
"""Generate builtin function replacements."""
loops = LoopOptimizer(rj)
builtins = loops.find_builtin_opportunities()
# Group by builtin type
by_type = builtins.group_by_type()
print("Builtin Function Replacements:")
print("=" * 50)
for opt_type, issues in by_type.items():
print(f"\n## {opt_type.name}")
print(f"Found: {len(issues)} occurrences\n")
for opt in issues:
print(f" {opt.location}")
print(f" {opt.suggested_code}")
rj = Rejig("src/")
generate_builtin_fixes(rj)
Performance Hotspot Finder¶
def find_performance_hotspots(rj: Rejig):
"""Find the most impactful performance improvements."""
loops = LoopOptimizer(rj)
all_issues = loops.find_all_issues(min_confidence=0.8)
# Prioritize by type (some optimizations have bigger impact)
high_impact_types = {
"SLOW_LOOP_TO_JOIN", # O(n) vs O(n²)
"SLOW_LOOP_TO_SUM", # C implementation
"SLOW_LOOP_TO_ANY_ALL", # Short-circuit
}
hotspots = [
opt for opt in all_issues
if opt.type.name in high_impact_types
]
print("Performance Hotspots:")
print("=" * 50)
if not hotspots:
print("No high-impact optimizations found")
return
for opt in hotspots:
print(f"\n{opt.location}")
print(f" Type: {opt.type.name}")
print(f" Impact: {opt.finding.estimated_improvement}")
print(f" Fix: {opt.suggested_code}")
rj = Rejig("src/")
find_performance_hotspots(rj)
Report Generation¶
Markdown Report¶
def generate_markdown_report(rj: Rejig) -> str:
"""Generate a comprehensive markdown report."""
dry = DRYAnalyzer(rj)
loops = LoopOptimizer(rj)
dry_issues = dry.find_all_issues()
loop_issues = loops.find_all_issues()
lines = [
"# Code Optimization Report",
"",
f"Generated for: `{rj.root}`",
"",
"## Summary",
"",
f"- **DRY violations:** {len(dry_issues)}",
f"- **Loop optimizations:** {len(loop_issues)}",
f"- **Total opportunities:** {len(dry_issues) + len(loop_issues)}",
"",
]
# DRY Section
lines.extend([
"## DRY Violations",
"",
])
if dry_issues:
for opt_type, issues in dry_issues.group_by_type().items():
lines.append(f"### {opt_type.name.replace('_', ' ').title()}")
lines.append("")
for issue in issues:
lines.append(f"- `{issue.location}`: {issue.message}")
lines.append("")
else:
lines.append("No DRY violations found.")
lines.append("")
# Loop Section
lines.extend([
"## Loop Optimizations",
"",
])
if loop_issues:
for opt_type, issues in loop_issues.group_by_type().items():
lines.append(f"### {opt_type.name.replace('_', ' ').title()}")
lines.append("")
for issue in issues:
lines.append(f"**{issue.location}**")
lines.append("")
lines.append("```python")
lines.append(f"# Before:")
lines.append(issue.original_code)
lines.append("")
lines.append(f"# After:")
lines.append(issue.suggested_code)
lines.append("```")
lines.append("")
else:
lines.append("No loop optimizations found.")
lines.append("")
return "\n".join(lines)
rj = Rejig("src/")
report = generate_markdown_report(rj)
print(report)
# Or save to file
with open("optimization-report.md", "w") as f:
f.write(report)
JSON Export¶
import json
from pathlib import Path
def export_to_json(rj: Rejig, output_path: str):
"""Export optimization findings to JSON."""
dry = DRYAnalyzer(rj)
loops = LoopOptimizer(rj)
dry_issues = dry.find_all_issues()
loop_issues = loops.find_all_issues()
data = {
"root": str(rj.root),
"summary": {
"dry_violations": len(dry_issues),
"loop_optimizations": len(loop_issues),
"total": len(dry_issues) + len(loop_issues),
},
"dry_issues": dry_issues.to_list_of_dicts(),
"loop_issues": loop_issues.to_list_of_dicts(),
}
with open(output_path, "w") as f:
json.dump(data, f, indent=2, default=str)
print(f"Exported to {output_path}")
rj = Rejig("src/")
export_to_json(rj, "optimization-findings.json")
HTML Report¶
def generate_html_report(rj: Rejig) -> str:
"""Generate an HTML report with syntax highlighting."""
dry = DRYAnalyzer(rj)
loops = LoopOptimizer(rj)
dry_issues = dry.find_all_issues()
loop_issues = loops.find_all_issues()
html = f"""<!DOCTYPE html>
<html>
<head>
<title>Optimization Report</title>
<style>
body {{ font-family: -apple-system, BlinkMacSystemFont, sans-serif; margin: 2em; }}
.issue {{ border: 1px solid #ddd; padding: 1em; margin: 1em 0; border-radius: 4px; }}
.location {{ color: #666; font-family: monospace; }}
.code {{ background: #f5f5f5; padding: 1em; border-radius: 4px; overflow-x: auto; }}
pre {{ margin: 0; }}
h2 {{ color: #333; }}
.count {{ color: #888; font-weight: normal; }}
</style>
</head>
<body>
<h1>Code Optimization Report</h1>
<p>Path: <code>{rj.root}</code></p>
<h2>Summary</h2>
<ul>
<li>DRY violations: {len(dry_issues)}</li>
<li>Loop optimizations: {len(loop_issues)}</li>
</ul>
<h2>Loop Optimizations <span class="count">({len(loop_issues)})</span></h2>
"""
for issue in loop_issues:
html += f"""
<div class="issue">
<div class="location">{issue.location}</div>
<p>{issue.message}</p>
<div class="code">
<pre><code># Before:
{issue.original_code}
# After:
{issue.suggested_code}</code></pre>
</div>
</div>
"""
html += """
</body>
</html>
"""
return html
rj = Rejig("src/")
html = generate_html_report(rj)
with open("report.html", "w") as f:
f.write(html)
CI Integration¶
Pre-commit Hook¶
#!/usr/bin/env python3
"""Pre-commit hook for optimization checks."""
import subprocess
import sys
from pathlib import Path
from rejig import Rejig, DRYAnalyzer, LoopOptimizer
def get_staged_files():
"""Get list of staged Python files."""
result = subprocess.run(
["git", "diff", "--cached", "--name-only", "--diff-filter=ACM"],
capture_output=True,
text=True
)
return [f for f in result.stdout.splitlines() if f.endswith(".py")]
def check_files(files: list[str]) -> int:
"""Check staged files for optimization issues."""
if not files:
return 0
rj = Rejig(".")
loops = LoopOptimizer(rj)
issues_found = 0
for file_path in files:
path = Path(file_path)
file_issues = loops.find_all_issues(min_confidence=0.9).in_file(path)
if file_issues:
print(f"\n{file_path}:")
for issue in file_issues:
print(f" Line {issue.line_number}: {issue.message}")
issues_found += 1
return issues_found
if __name__ == "__main__":
files = get_staged_files()
issues = check_files(files)
if issues:
print(f"\nFound {issues} optimization issues. Consider fixing before commit.")
sys.exit(1)
sys.exit(0)
GitHub Action¶
# .github/workflows/optimize.yml
name: Code Optimization Check
on:
pull_request:
paths:
- '**.py'
jobs:
optimize:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: '3.11'
- name: Install dependencies
run: pip install rejig
- name: Run optimization check
run: |
python - << 'EOF'
from rejig import Rejig, LoopOptimizer
rj = Rejig("src/")
loops = LoopOptimizer(rj)
issues = loops.find_all_issues(min_confidence=0.9)
if issues:
print("::warning::Found optimization opportunities:")
for issue in issues:
print(f"::warning file={issue.file_path},line={issue.line_number}::{issue.message}")
EOF
Interactive Analysis¶
REPL Exploration¶
# Start Python REPL and explore interactively
from rejig import Rejig, DRYAnalyzer, LoopOptimizer
rj = Rejig("src/")
dry = DRYAnalyzer(rj)
loops = LoopOptimizer(rj)
# Explore DRY issues
dry_issues = dry.find_all_issues()
print(dry_issues.summary())
# Drill into specific types
duplicates = dry_issues.dry_issues()
for d in duplicates[:5]:
print(f"{d.location}: {d.message}")
# Explore loop issues
loop_issues = loops.find_all_issues()
# Filter to specific file
file_issues = loop_issues.in_file("src/utils.py")
# Get suggestions
for issue in file_issues:
print(f"Line {issue.line_number}:")
print(f" Current: {issue.original_code}")
print(f" Better: {issue.suggested_code}")