37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785 | class CodeMapGenerator:
"""Generates code documentation based on LOD (Level of Detail)."""
def __init__(self, config: GenConfig, output_path: Path) -> None:
"""
Initialize the code map generator.
Args:
config: Generation configuration settings
output_path: Path to write the output
"""
self.config = config
self.output_path = output_path
def _generate_mermaid_diagram(self, entities: list[LODEntity]) -> str:
"""Generate a Mermaid diagram string for entity relationships using subgraphs."""
# Convert config strings to lower case for case-insensitive comparison
allowed_entities = {e.lower() for e in self.config.mermaid_entities} if self.config.mermaid_entities else None
allowed_relationships = (
{r.lower() for r in self.config.mermaid_relationships} if self.config.mermaid_relationships else None
)
# Helper to check if an entity type should be included
def should_include_entity(entity_type: EntityType) -> bool:
"""Determines if an entity type should be included in the diagram based on allowed entities.
If no allowed entities are specified in the config, all entities will be included. Otherwise,
only entities whose type matches one of the allowed entity types will be included.
Args:
entity_type: The type of entity to check for inclusion.
Returns:
bool: True if the entity should be included, False otherwise.
"""
if not allowed_entities:
return True # Include all if not specified
return entity_type.name.lower() in allowed_entities
def should_include_relationship(relationship_type: str) -> bool:
"""Determines if a relationship type should be included in the diagram based on allowed relationships.
If no allowed relationships are specified in the config, all relationships will be included. Otherwise,
only relationships whose type matches one of the allowed relationship types will be included.
Args:
relationship_type: The type of relationship to check for inclusion.
Returns:
bool: True if the relationship should be included, False otherwise.
"""
if not allowed_relationships:
return True # Include all if not specified
return relationship_type.lower() in allowed_relationships
# --- Data Structures --- #
# node_id -> (definition_line, class_name) for regular nodes
node_definitions: dict[str, tuple[str, str]] = {}
# subgraph_id -> (label, type, list_of_contained_node_ids)
subgraph_definitions: dict[str, tuple[str, str, list[str]]] = {}
# subgraph_id -> parent_subgraph_id (for nesting)
subgraph_hierarchy: dict[str, str] = {}
# Edges (parent_id, child_id, label, type)
edges: list[tuple[str, str, str, str]] = []
# Track processed entities/subgraphs to avoid duplicates
processed_ids = set()
# Map entity ID to entity object
entity_map: dict[str, LODEntity] = {}
# Track which nodes/subgraphs are connected by edges
connected_ids = set()
# Map simple function/method names to their full node IDs
name_to_node_ids: dict[str, list[str]] = {}
# Keep track of nodes defined outside any subgraph (like external imports)
global_nodes: set[str] = set()
internal_paths = {str(e.metadata.get("file_path")) for e in entities if e.metadata.get("file_path")}
def get_node_id(entity: LODEntity) -> str:
"""Generates a unique node ID for an entity in Mermaid diagram format.
The ID is constructed from the entity's file path, start line, and name/type,
and is sanitized to be Mermaid-compatible (alphanumeric + underscore only).
Args:
entity: The entity to generate an ID for.
Returns:
str: A Mermaid-compatible node ID string.
"""
file_path_str = entity.metadata.get("file_path", "unknown_file")
base_id = f"{file_path_str}_{entity.start_line}_{entity.name or entity.entity_type.name}"
# Ensure Mermaid compatibility (alphanumeric + underscore)
return "".join(c if c.isalnum() else "_" for c in base_id)
def process_entity_recursive(entity: LODEntity, current_subgraph_id: str | None = None) -> None:
"""Recursively processes an entity to build Mermaid diagram components.
This function handles:
- Creating subgraphs for modules and classes
- Creating nodes for functions, methods, variables and constants
- Processing dependencies (imports)
- Establishing relationships between entities
- Recursively processing child entities
Args:
entity: The entity to process
current_subgraph_id: The ID of the current subgraph context (if any)
Returns:
None: Modifies various data structures in the closure:
- node_definitions: Dictionary of node definitions
- subgraph_definitions: Dictionary of subgraph definitions
- subgraph_hierarchy: Dictionary of subgraph parent-child relationships
- edges: List of edges between nodes/subgraphs
- processed_ids: Set of processed entity IDs
- entity_map: Dictionary mapping node IDs to entities
- connected_ids: Set of connected node/subgraph IDs
- name_to_node_ids: Dictionary mapping names to node IDs
- global_nodes: Set of nodes defined outside subgraphs
"""
nonlocal processed_ids, connected_ids, global_nodes
entity_node_id = get_node_id(entity)
if entity.entity_type == EntityType.UNKNOWN or entity_node_id in processed_ids:
return
processed_ids.add(entity_node_id)
entity_map[entity_node_id] = entity
include_this_entity = should_include_entity(entity.entity_type)
next_subgraph_id = current_subgraph_id
# --- Handle Subgraphs (Module, Class) --- #
if entity.entity_type in (EntityType.MODULE, EntityType.CLASS) and include_this_entity:
subgraph_label = _escape_mermaid_label(
entity.name or Path(entity.metadata.get("file_path", "unknown")).name
)
subgraph_type = "moduleSubgraph" if entity.entity_type == EntityType.MODULE else "classSubgraph"
if entity.entity_type == EntityType.MODULE and current_subgraph_id: # Nested module
subgraph_type = "submoduleSubgraph"
subgraph_definitions[entity_node_id] = (subgraph_label, subgraph_type, [])
if current_subgraph_id:
subgraph_hierarchy[entity_node_id] = current_subgraph_id
# Mark the container as potentially connected if it has children or dependencies
connected_ids.add(entity_node_id)
next_subgraph_id = entity_node_id # Children belong to this new subgraph
# --- Handle Nodes (Functions, Methods, Vars, Consts, Imports) --- #
elif include_this_entity and entity.entity_type != EntityType.IMPORT: # Imports handled separately
node_definition = ""
node_class = ""
label = _escape_mermaid_label(entity.name or f"({entity.entity_type.name.lower()})")
if entity.entity_type in (EntityType.FUNCTION, EntityType.METHOD):
node_definition = f'{entity_node_id}("{label}")'
node_class = "funcNode"
elif entity.entity_type == EntityType.CONSTANT:
node_definition = f'{entity_node_id}["{label}"]'
node_class = "constNode"
elif entity.entity_type == EntityType.VARIABLE:
node_definition = f'{entity_node_id}["{label}"]'
node_class = "varNode"
# Add other types if needed
if node_definition and entity_node_id not in node_definitions:
node_definitions[entity_node_id] = (node_definition, node_class)
if current_subgraph_id:
subgraph_definitions[current_subgraph_id][2].append(entity_node_id)
else:
# Should not happen often if root is module, but handle just in case
global_nodes.add(entity_node_id)
# --- Add to Name Map (for call edges) --- #
if entity.entity_type in (EntityType.FUNCTION, EntityType.METHOD):
name = entity.name
if name:
if name not in name_to_node_ids:
name_to_node_ids[name] = []
name_to_node_ids[name].append(entity_node_id)
# --- Process Dependencies (Imports) --- #
dependencies = entity.metadata.get("dependencies", [])
# Imports are associated with the module/class they are in, or globally if nowhere else
if dependencies and should_include_relationship("imports"):
for dep in dependencies:
is_external = not dep.startswith(".") and not any(
dep.startswith(str(p).replace("\\", "/"))
for p in internal_paths
if p # Handle path separators
)
dep_id = "dep_" + "".join(c if c.isalnum() else "_" for c in dep)
dep_label = _escape_mermaid_label(dep)
if dep_id not in node_definitions and dep_id not in processed_ids:
processed_ids.add(dep_id) # Mark as processed to avoid duplicate definitions
dep_class = "externalImportNode" if is_external else "internalImportNode"
node_shape = f'(("{dep_label}"))' if is_external else f'["{dep_label}"]'
node_definitions[dep_id] = (f"{dep_id}{node_shape}", dep_class)
global_nodes.add(dep_id) # Imports are defined globally
# Add edge from the importing *container* (subgraph) to the dependency
# Use the entity_node_id of the *module* or *class* for the source of the import edge
source_node_id = (
entity_node_id
if entity.entity_type in (EntityType.MODULE, EntityType.CLASS)
else current_subgraph_id
)
if source_node_id and source_node_id != dep_id: # Check source_node_id validity
edge_tuple = (source_node_id, dep_id, "imports", "import")
if edge_tuple not in edges:
edges.append(edge_tuple)
connected_ids.add(source_node_id)
connected_ids.add(dep_id)
# --- Process Children Recursively --- #
for child in sorted(entity.children, key=lambda e: e.start_line):
process_entity_recursive(child, next_subgraph_id)
# --- Define Parent Edge (Declares) --- #
# Edge from container (subgraph) to child node/subgraph
child_node_id = get_node_id(child)
if (
next_subgraph_id # Ensure there is a parent subgraph
and child.entity_type != EntityType.UNKNOWN
and child_node_id in processed_ids # Ensure child was processed (not filtered out)
and should_include_relationship("declares")
):
# Only add edge if child is *directly* contained (node or subgraph)
is_child_node = child_node_id in node_definitions
is_child_subgraph = child_node_id in subgraph_definitions
if is_child_node or is_child_subgraph:
edge_tuple = (next_subgraph_id, child_node_id, "declares", "declare")
if edge_tuple not in edges:
edges.append(edge_tuple)
connected_ids.add(next_subgraph_id)
connected_ids.add(child_node_id)
# --- Main Processing Loop --- #
for entity in entities:
# Start processing from top-level modules
if entity.entity_type == EntityType.MODULE and entity.metadata.get("file_path"):
process_entity_recursive(entity, current_subgraph_id=None)
# --- Define Call Edges --- #
if should_include_relationship("calls"):
for caller_node_id, caller_entity in entity_map.items():
# Check if the caller is a function/method node that was actually defined
if caller_node_id in node_definitions and caller_entity.entity_type in (
EntityType.FUNCTION,
EntityType.METHOD,
):
calls = caller_entity.metadata.get("calls", [])
for called_name in calls:
# Try matching full name first, then simple name
possible_target_ids = []
if (
called_name in name_to_node_ids
): # Full name match? (e.g., class.method) - Less likely with simple parsing
possible_target_ids.extend(name_to_node_ids[called_name])
else:
simple_called_name = called_name.split(".")[-1]
if simple_called_name in name_to_node_ids:
possible_target_ids.extend(name_to_node_ids[simple_called_name])
for target_node_id in possible_target_ids:
# Ensure target is also a defined node and not the caller itself
if target_node_id in node_definitions and caller_node_id != target_node_id:
edge_tuple = (caller_node_id, target_node_id, "calls", "call")
if edge_tuple not in edges:
edges.append(edge_tuple)
connected_ids.add(caller_node_id)
connected_ids.add(target_node_id)
# --- Assemble Final Mermaid String --- #
mermaid_lines = ["graph LR"] # Or TD for Top-Down if preferred
# --- Define Style Strings (Instead of classDef) ---
style_map = {
# Node Styles
"funcNode": "fill:#007bff,stroke:#FFF,stroke-width:1px,color:white", # Blue
"constNode": "fill:#6f42c1,stroke:#FFF,stroke-width:1px,color:white", # Purple
"varNode": "fill:#fd7e14,stroke:#FFF,stroke-width:1px,color:white", # Orange
"internalImportNode": "fill:#20c997,stroke:#FFF,stroke-width:1px,color:white", # Teal
"externalImportNode": "fill:#ffc107,stroke:#333,stroke-width:1px,color:#333", # Yellow
# Subgraph Styles
"moduleSubgraph": "fill:#121630,color:#FFF", # Dark Grey BG
"submoduleSubgraph": "fill:#2a122e,color:#FFF", # Lighter Grey BG
"classSubgraph": "fill:#100f5e,color:#FFF", # Light Green BG
}
# --- Render Logic --- #
rendered_elements = set() # Track IDs of things actually rendered
output_lines = []
style_lines = [] # Collect style commands separately
used_style_keys = set() # Track which styles (funcNode, classSubgraph etc.) are used
# Function to recursively render subgraphs and their nodes
def render_subgraph(subgraph_id: str, indent: str = "") -> None:
"""Recursively renders a Mermaid subgraph and its contents.
Args:
subgraph_id: The ID of the subgraph to render
indent: String used for indentation in the output (default: "")
Returns:
None: Output is written to output_lines and style_lines lists
Side Effects:
- Adds to rendered_elements set to track rendered items
- Appends lines to output_lines for Mermaid graph definition
- Appends style commands to style_lines
- Updates used_style_keys with any styles actually used
"""
if subgraph_id in rendered_elements:
return
rendered_elements.add(subgraph_id)
label, sg_type, contained_node_ids = subgraph_definitions[subgraph_id]
output_lines.append(f'{indent}subgraph {subgraph_id}["{label}"]')
output_lines.append(f"{indent} direction LR") # Or TD
# Render nodes inside this subgraph
for node_id in contained_node_ids:
if node_id in node_definitions:
# Apply filtering if enabled
if self.config.mermaid_remove_unconnected and node_id not in connected_ids:
continue
if node_id in rendered_elements:
continue # Should not happen, but safeguard
rendered_elements.add(node_id)
definition, node_class = node_definitions[node_id]
output_lines.append(f"{indent} {definition}")
if node_class in style_map:
style_lines.append(f"{indent} style {node_id} {style_map[node_class]}")
used_style_keys.add(node_class) # Track used style
# Render nested subgraphs
nested_subgraphs = [sid for sid, parent_id in subgraph_hierarchy.items() if parent_id == subgraph_id]
for nested_id in sorted(nested_subgraphs): # Sort for consistent output
# Apply filtering if enabled - check if the subgraph itself or any node inside it is connected
is_nested_connected = subgraph_id in connected_ids or any(
nid in connected_ids for nid in subgraph_definitions[nested_id][2]
)
if self.config.mermaid_remove_unconnected and not is_nested_connected:
continue
render_subgraph(nested_id, indent + " ")
output_lines.append(f"{indent}end")
# Apply style definition to subgraph *after* end
if sg_type in style_map:
style_lines.append(f"{indent}style {subgraph_id} {style_map[sg_type]}")
used_style_keys.add(sg_type) # Track used style
# --- Define Global Nodes (Imports primarily) ---
output_lines.append("\n %% Global Nodes")
for node_id in sorted(global_nodes):
if node_id in node_definitions:
# Apply filtering if enabled
if self.config.mermaid_remove_unconnected and node_id not in connected_ids:
continue
if node_id in rendered_elements:
continue
rendered_elements.add(node_id)
definition, node_class = node_definitions[node_id]
output_lines.append(f" {definition}")
if node_class in style_map:
style_lines.append(f" style {node_id} {style_map[node_class]}")
used_style_keys.add(node_class) # Track used style
# --- Render Top-Level Subgraphs ---
output_lines.append("\n %% Subgraphs")
top_level_subgraphs = [sg_id for sg_id in subgraph_definitions if sg_id not in subgraph_hierarchy]
for sg_id in sorted(top_level_subgraphs):
# Apply filtering if enabled - check if the subgraph itself or any node inside it is connected
is_sg_connected = sg_id in connected_ids or any(
nid in connected_ids for nid in subgraph_definitions[sg_id][2]
)
if self.config.mermaid_remove_unconnected and not is_sg_connected:
continue
render_subgraph(sg_id)
# --- Render Edges --- #
output_lines.append("\n %% Edges")
link_styles = []
call_edge_indices = []
import_edge_indices = []
declare_edge_indices = []
filtered_edges = []
for _i, (source_id, target_id, label, edge_type) in enumerate(edges):
# Ensure both source and target were actually rendered (or are subgraphs that contain rendered nodes)
source_exists = source_id in rendered_elements or source_id in subgraph_definitions
target_exists = target_id in rendered_elements or target_id in subgraph_definitions
if source_exists and target_exists:
edge_str = ""
if edge_type == "import":
edge_str = f" {source_id} -.->|{label}| {target_id}"
import_edge_indices.append(len(filtered_edges)) # Index in the filtered list
elif edge_type == "call":
edge_str = f" {source_id} -->|{label}| {target_id}"
call_edge_indices.append(len(filtered_edges))
elif edge_type == "declare":
# Make declare edges less prominent
edge_str = f" {source_id} --- {target_id}" # Simple line, no label needed visually
declare_edge_indices.append(len(filtered_edges))
else: # Default or unknown edge type
edge_str = f" {source_id} --> {target_id}"
if edge_str:
filtered_edges.append(edge_str)
output_lines.extend(sorted(filtered_edges)) # Sort for consistency
# --- Apply Link Styles --- #
if call_edge_indices or import_edge_indices or declare_edge_indices:
output_lines.append("\n %% Link Styles")
link_styles.extend(
[f" linkStyle {idx} stroke:#28a745,stroke-width:2px;" for idx in call_edge_indices]
) # Green
link_styles.extend(
[
f" linkStyle {idx} stroke:#ffc107,stroke-width:1px,stroke-dasharray: 5 5;"
for idx in import_edge_indices
]
) # Yellow dashed
link_styles.extend(
[f" linkStyle {idx} stroke:#adb5bd,stroke-width:1px;" for idx in declare_edge_indices]
) # Gray thin
output_lines.extend(link_styles)
# --- Generate Dynamic Legend (if enabled) ---
legend_lines = []
legend_style_lines = []
if self.config.mermaid_show_legend and used_style_keys:
legend_lines.append("\n %% Legend")
legend_lines.append(" subgraph Legend")
legend_lines.append(" direction LR")
# Define all possible legend items and their corresponding style keys
legend_item_definitions = {
"legend_module": ("moduleSubgraph", '["Module/File"]'),
"legend_submodule": ("submoduleSubgraph", '["Sub-Module"]'),
"legend_class": ("classSubgraph", '["Class"]'),
"legend_func": ("funcNode", '("Function/Method")'),
"legend_const": ("constNode", '["Constant"]'),
"legend_var": ("varNode", '["Variable"]'),
"legend_import_int": ("internalImportNode", '["Internal Import"]'),
"legend_import_ext": ("externalImportNode", '(("External Import"))'),
}
for legend_id, (style_key, definition) in legend_item_definitions.items():
# Only add legend item if its corresponding style was actually used in the graph
if style_key in used_style_keys:
legend_lines.append(f" {legend_id}{definition}")
# Also add its style command to the list of styles
if style_key in style_map:
legend_style_lines.append(f" style {legend_id} {style_map[style_key]}")
legend_lines.append(" end")
legend_lines.append("") # Add a blank line after legend
# --- Assemble Final Output --- #
mermaid_lines.extend(legend_lines) # Add legend definitions (if any)
mermaid_lines.extend(output_lines) # Add main graph structure and edges
# Append all collected style commands at the end
all_style_lines = style_lines + legend_style_lines
if all_style_lines:
mermaid_lines.append("\n %% Styles")
mermaid_lines.extend(sorted(all_style_lines))
return "\n".join(mermaid_lines)
def generate_documentation(self, entities: list[LODEntity], metadata: dict) -> str:
"""
Generate markdown documentation from the processed LOD entities.
Args:
entities: List of LOD entities
metadata: Repository metadata
Returns:
Generated documentation as string
"""
content = []
# Add header with repository information
target_path_str = metadata.get("target_path", "")
original_path = metadata.get("original_path", "")
command_arg = metadata.get("command_arg", "")
# Debug logging to see what values we're receiving
logger.debug(
f"Metadata values for heading: "
f"command_arg='{command_arg}', "
f"original_path='{original_path}', "
f"target_path='{target_path_str}'"
)
# Use the exact command argument if available
if command_arg:
repo_name = command_arg
# Fall back to original path if available
elif original_path:
repo_name = original_path
# Further fallback to just the directory name
elif target_path_str:
repo_name = Path(target_path_str).name
else:
repo_name = metadata.get("name", "Repository")
content.append(f"# `{repo_name}` Documentation")
content.append(f"\nGenerated on: {datetime.now(UTC).strftime('%Y-%m-%d %H:%M:%S')}")
if "description" in metadata:
content.append("\n" + metadata.get("description", ""))
# Add repository statistics
if "stats" in metadata:
stats = metadata["stats"]
content.append("\n## Document Statistics")
content.append(f"- Total files scanned: {stats.get('total_files_scanned', 0)}")
content.append(f"- Total lines of code: {stats.get('total_lines', 0)}")
content.append(f"- Languages: {', '.join(stats.get('languages', []))}")
# Add directory structure if requested
if self.config.include_tree and "tree" in metadata:
content.append("\n## Directory Structure")
content.append("```")
content.append(metadata["tree"])
content.append("```")
# Add Mermaid diagram if entities exist and config enables it
if entities and self.config.include_entity_graph:
content.append("\n## Entity Relationships")
content.append("```mermaid")
mermaid_diagram = self._generate_mermaid_diagram(entities)
content.append(mermaid_diagram)
content.append("```")
# Add table of contents for the scanned files
content.append("\n## Scanned Files")
# Group entities by file
files: dict[Path, list[LODEntity]] = {}
# Get the target path from metadata
target_path_str = metadata.get("target_path", "")
target_path = Path(target_path_str) if target_path_str else None
for entity in entities:
file_path = Path(entity.metadata.get("file_path", ""))
if not file_path.name:
continue
if file_path not in files:
files[file_path] = []
files[file_path].append(entity)
# Create TOC entries with properly formatted relative paths
for i, file_path in enumerate(sorted(files.keys()), 1):
# Get path relative to the target directory
try:
if target_path and target_path.exists():
# Get the relative path from the target directory
rel_path = file_path.relative_to(target_path)
# Format the path with a leading slash for files directly in the target directory
rel_path_str = f"/{rel_path}"
# Create a clean anchor ID by converting to lowercase and removing all special characters
# including underscores, to create standard anchor IDs
filename = file_path.name
clean_filename = "".join(c.lower() if c.isalnum() else "" for c in filename)
# Handle paths with subdirectories
if len(rel_path.parts) > 1:
# Get directory name and filename
directory = rel_path.parts[-2] # Last directory before the file
anchor = f"{directory}{clean_filename}" # e.g., "raginitpy"
else:
# Just use the clean filename for files at root
anchor = clean_filename
else:
# Fall back to just the filename if target path is not available
rel_path_str = f"/{file_path.name}"
clean_filename = "".join(c.lower() if c.isalnum() else "" for c in file_path.name)
anchor = clean_filename
content.append(f"{i}. [{rel_path_str}](#{anchor})")
except ValueError:
# If relative_to fails, just use the filename
rel_path_str = f"/{file_path.name}"
clean_filename = "".join(c.lower() if c.isalnum() else "" for c in file_path.name)
content.append(f"{i}. [{rel_path_str}](#{clean_filename})")
# Add code documentation grouped by file
content.append("\n## Code Documentation")
# Helper function to format a single entity recursively
def format_entity_recursive(entity: LODEntity, level: int) -> list[str]:
"""Recursively formats an entity and its children into markdown documentation.
Args:
entity: The entity to format
level: The current indentation level in the hierarchy
Returns:
A list of markdown-formatted strings representing the entity and its children
"""
entity_content = []
indent = " " * level
list_prefix = f"{indent}- "
# Basic entry: Type and Name/Signature
entry_line = f"{list_prefix}**{entity.entity_type.name.capitalize()}**: `{entity.name}`"
if self.config.lod_level.value >= LODLevel.STRUCTURE.value and entity.signature:
entry_line = f"{list_prefix}**{entity.entity_type.name.capitalize()}**: `{entity.signature}`"
# Special handling for comments
elif entity.entity_type == EntityType.COMMENT and entity.content:
comment_lines = entity.content.strip().split("\n")
# Format as italicized blockquote
entity_content.extend([f"{indent}> *{line.strip()}*" for line in comment_lines])
entry_line = None # Don't print the default entry line
elif not entity.name and entity.entity_type == EntityType.MODULE:
# Skip module node if it has no name (handled by file heading)
# Don't add the list item itself
entry_line = None # Don't print the default entry line
# Add the generated entry line if it wasn't skipped
if entry_line:
entity_content.append(entry_line)
# Add Docstring if level is DOCS or FULL (and not a comment)
if (
entity.entity_type != EntityType.COMMENT
and self.config.lod_level.value >= LODLevel.DOCS.value
and entity.docstring
):
docstring_lines = entity.docstring.strip().split("\n")
# Format docstring lines with proper indentation
entity_content.append(f"{indent} >")
entity_content.extend([f"{indent} > {line}" for line in docstring_lines])
# Add Content if level is FULL
if self.config.lod_level.value >= LODLevel.FULL.value and entity.content:
content_lang = entity.language or ""
entity_content.append(f"{indent} ```{content_lang}")
# Indent content lines as well
content_lines = entity.content.strip().split("\n")
entity_content.extend([f"{indent} {line}" for line in content_lines])
entity_content.append(f"{indent} ```")
# Recursively format children
for child in sorted(entity.children, key=lambda e: e.start_line):
# Skip unknown children
if child.entity_type != EntityType.UNKNOWN:
entity_content.extend(format_entity_recursive(child, level + 1))
return entity_content
first_file = True
for i, (file_path, file_entities) in enumerate(sorted(files.items()), 1):
# Add a divider before each file section except the first one
if not first_file:
content.append("\n---") # Horizontal rule
first_file = False
# Get path relative to the target directory
try:
if target_path and target_path.exists():
# Get the relative path from the target directory
rel_path = file_path.relative_to(target_path)
# Format the path with a leading slash for files directly in the target directory
rel_path_str = f"/{rel_path}"
# Create a clean anchor ID by converting to lowercase and removing all special characters
# including underscores, to create standard anchor IDs
filename = file_path.name
clean_filename = "".join(c.lower() if c.isalnum() else "" for c in filename)
# Handle paths with subdirectories
if len(rel_path.parts) > 1:
# Get directory name and filename
directory = rel_path.parts[-2] # Last directory before the file
anchor = f"{directory}{clean_filename}" # e.g., "raginitpy"
else:
# Just use the clean filename for files at root
anchor = clean_filename
else:
# Fall back to just the filename if target path is not available
rel_path_str = f"/{file_path.name}"
clean_filename = "".join(c.lower() if c.isalnum() else "" for c in file_path.name)
anchor = clean_filename
# Add a custom ID to the heading to match our anchor
content.append(f"\n### {i}. {rel_path_str}")
except ValueError:
# If relative_to fails, just use the filename
rel_path_str = f"/{file_path.name}"
clean_filename = "".join(c.lower() if c.isalnum() else "" for c in file_path.name)
content.append(f"\n### {i}. {rel_path_str}")
# Sort top-level entities by line number
sorted_entities = sorted(file_entities, key=lambda e: e.start_line)
if self.config.lod_level == LODLevel.SIGNATURES:
# Level 1: Only top-level signatures
for entity in sorted_entities:
if entity.entity_type in (
EntityType.CLASS,
EntityType.FUNCTION,
EntityType.METHOD,
EntityType.INTERFACE,
EntityType.MODULE,
):
content.append(f"\n#### {entity.name or '(Module Level)'}")
if entity.signature:
sig_lang = entity.language or ""
content.append(f"\n```{sig_lang}")
content.append(entity.signature)
content.append("```")
else:
# Levels 2, 3, 4: Use recursive formatting
for entity in sorted_entities:
# Process top-level entities (usually MODULE, but could be others if file has only one class/func)
if entity.entity_type == EntityType.MODULE:
# If it's the module, start recursion from its children
for child in sorted(entity.children, key=lambda e: e.start_line):
# Skip unknown children
if child.entity_type != EntityType.UNKNOWN:
content.extend(format_entity_recursive(child, level=0))
# Handle cases where the top-level entity isn't MODULE (e.g., a file with just one class)
# Skip if unknown
elif entity.entity_type != EntityType.UNKNOWN:
content.extend(format_entity_recursive(entity, level=0))
return "\n".join(content)
|