Skip to content

Lod

Level of Detail (LOD) implementation for code analysis.

This module provides functionality for generating different levels of detail from source code using tree-sitter analysis. The LOD approach provides a hierarchical view of code, from high-level entity names to detailed implementations.

LOD levels: - LOD1: Just entity names and types in files (classes, functions, etc.) - LOD2: Entity names with docstrings - LOD3: Entity names, docstrings, and signatures - LOD4: Complete entity implementations

logger module-attribute

logger = getLogger(__name__)

LODLevel

Bases: Enum

Enumeration of Level of Detail levels.

Source code in src/codemap/processor/lod.py
32
33
34
35
36
37
38
class LODLevel(Enum):
	"""Enumeration of Level of Detail levels."""

	SIGNATURES = 1  # Top-level entity names, docstrings, and signatures
	STRUCTURE = 2  # All entity signatures, indented structure
	DOCS = 3  # Level 2 + Docstrings for all entities
	FULL = 4  # Level 3 + Full implementation content

SIGNATURES class-attribute instance-attribute

SIGNATURES = 1

STRUCTURE class-attribute instance-attribute

STRUCTURE = 2

DOCS class-attribute instance-attribute

DOCS = 3

FULL class-attribute instance-attribute

FULL = 4

LODEntity dataclass

Represents a code entity at a specific level of detail.

Source code in src/codemap/processor/lod.py
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
@dataclass
class LODEntity:
	"""Represents a code entity at a specific level of detail."""

	name: str
	"""Name of the entity."""

	entity_type: EntityType
	"""Type of entity (class, function, etc.)."""

	start_line: int
	"""Starting line number (1-indexed)."""

	end_line: int
	"""Ending line number (1-indexed)."""

	docstring: str = ""
	"""Entity docstring, if available."""

	signature: str = ""
	"""Entity signature (e.g., function parameters), if available."""

	content: str = ""
	"""Complete entity content/implementation."""

	children: list[LODEntity] = field(default_factory=list)
	"""Child entities contained within this entity."""

	language: str = ""
	"""Programming language of the entity."""

	metadata: dict[str, Any] = field(default_factory=dict)
	"""Additional metadata about the entity."""

__init__

__init__(
	name: str,
	entity_type: EntityType,
	start_line: int,
	end_line: int,
	docstring: str = "",
	signature: str = "",
	content: str = "",
	children: list[LODEntity] = list(),
	language: str = "",
	metadata: dict[str, Any] = dict(),
) -> None

name instance-attribute

name: str

Name of the entity.

entity_type instance-attribute

entity_type: EntityType

Type of entity (class, function, etc.).

start_line instance-attribute

start_line: int

Starting line number (1-indexed).

end_line instance-attribute

end_line: int

Ending line number (1-indexed).

docstring class-attribute instance-attribute

docstring: str = ''

Entity docstring, if available.

signature class-attribute instance-attribute

signature: str = ''

Entity signature (e.g., function parameters), if available.

content class-attribute instance-attribute

content: str = ''

Complete entity content/implementation.

children class-attribute instance-attribute

children: list[LODEntity] = field(default_factory=list)

Child entities contained within this entity.

language class-attribute instance-attribute

language: str = ''

Programming language of the entity.

metadata class-attribute instance-attribute

metadata: dict[str, Any] = field(default_factory=dict)

Additional metadata about the entity.

LODGenerator

Generates different levels of detail from source code.

Source code in src/codemap/processor/lod.py
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
class LODGenerator:
	"""Generates different levels of detail from source code."""

	def __init__(self, analyzer: TreeSitterAnalyzer | None = None) -> None:
		"""
		Initialize the LOD generator.

		Args:
			analyzer: Optional shared TreeSitterAnalyzer instance. If None, a new one is created.
		"""
		self.analyzer = analyzer or TreeSitterAnalyzer()

	def generate_lod(self, file_path: Path, level: LODLevel = LODLevel.STRUCTURE) -> LODEntity | None:
		"""
		Generate LOD representation for a file.

		Args:
		    file_path: Path to the file to analyze
		    level: Level of detail to generate (default changed to STRUCTURE)

		Returns:
		    LODEntity representing the file, or None if analysis failed

		"""
		# Analyze file with tree-sitter - analyzer now handles content reading & caching
		analysis_result = self.analyzer.analyze_file(file_path)  # Pass only file_path
		if not analysis_result:
			logger.warning(f"Failed to analyze {file_path}")
			return None

		# Convert analysis result to LOD, passing the file_path
		return self._convert_to_lod(analysis_result, level, file_path)

	def _convert_to_lod(
		self, analysis_result: dict[str, Any], level: LODLevel, file_path: Path | None = None
	) -> LODEntity:
		"""
		Convert tree-sitter analysis to LOD format.

		Args:
		    analysis_result: Tree-sitter analysis result
		    level: Level of detail to generate
		    file_path: Path to the file being analyzed (present for the root entity)

		Returns:
		    LODEntity representation

		"""
		entity_type_str = analysis_result.get("type", "UNKNOWN")
		try:
			entity_type = EntityType[entity_type_str]
		except KeyError:
			entity_type = EntityType.UNKNOWN

		location = analysis_result.get("location", {})
		start_line = location.get("start_line", 1)
		end_line = location.get("end_line", 1)

		entity = LODEntity(
			name=analysis_result.get("name", ""),
			entity_type=entity_type,
			start_line=start_line,
			end_line=end_line,
			language=analysis_result.get("language", ""),
		)

		if file_path:  # This indicates it's the root entity for the file
			entity.metadata["file_path"] = str(file_path)
			# If full_content_str is available from analyzer, store it in root entity metadata
			if "full_content_str" in analysis_result:
				entity.metadata["full_content_str"] = analysis_result["full_content_str"]

		if level.value >= LODLevel.DOCS.value:
			entity.docstring = analysis_result.get("docstring", "")

		if level.value >= LODLevel.SIGNATURES.value:
			# Extract signature from content if available
			content = analysis_result.get("content", "")
			entity.signature = self._extract_signature(content, entity_type, entity.language)

		if level.value >= LODLevel.FULL.value or entity_type == EntityType.COMMENT:
			entity.content = analysis_result.get("content", "")

		# Process children recursively (without passing file_path)
		children = analysis_result.get("children", [])
		for child in children:
			child_entity = self._convert_to_lod(child, level)
			entity.children.append(child_entity)

		# Add any additional metadata
		if "dependencies" in analysis_result:
			entity.metadata["dependencies"] = analysis_result["dependencies"]
		if "calls" in analysis_result:
			entity.metadata["calls"] = analysis_result["calls"]

		return entity

	def _extract_signature(self, content: str, entity_type: EntityType, _language: str) -> str:
		"""
		Extract function/method signature from content.

		This is a simple implementation; ideally, the language-specific handlers
		should provide this functionality.

		Args:
		    content: Full entity content
		    entity_type: Type of entity
		    _language: Programming language (unused currently)

		Returns:
		    Signature string

		"""
		if not content:
			return ""

		# For functions and methods, extract the first line (declaration)
		if entity_type in [EntityType.FUNCTION, EntityType.METHOD, EntityType.CLASS, EntityType.INTERFACE]:
			lines = content.split("\n")
			if lines:
				# Return first line without trailing characters
				return lines[0].rstrip(":{")

		return ""

__init__

__init__(
	analyzer: TreeSitterAnalyzer | None = None,
) -> None

Initialize the LOD generator.

Parameters:

Name Type Description Default
analyzer TreeSitterAnalyzer | None

Optional shared TreeSitterAnalyzer instance. If None, a new one is created.

None
Source code in src/codemap/processor/lod.py
79
80
81
82
83
84
85
86
def __init__(self, analyzer: TreeSitterAnalyzer | None = None) -> None:
	"""
	Initialize the LOD generator.

	Args:
		analyzer: Optional shared TreeSitterAnalyzer instance. If None, a new one is created.
	"""
	self.analyzer = analyzer or TreeSitterAnalyzer()

analyzer instance-attribute

analyzer = analyzer or TreeSitterAnalyzer()

generate_lod

generate_lod(
	file_path: Path, level: LODLevel = STRUCTURE
) -> LODEntity | None

Generate LOD representation for a file.

Parameters:

Name Type Description Default
file_path Path

Path to the file to analyze

required
level LODLevel

Level of detail to generate (default changed to STRUCTURE)

STRUCTURE

Returns:

Type Description
LODEntity | None

LODEntity representing the file, or None if analysis failed

Source code in src/codemap/processor/lod.py
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
def generate_lod(self, file_path: Path, level: LODLevel = LODLevel.STRUCTURE) -> LODEntity | None:
	"""
	Generate LOD representation for a file.

	Args:
	    file_path: Path to the file to analyze
	    level: Level of detail to generate (default changed to STRUCTURE)

	Returns:
	    LODEntity representing the file, or None if analysis failed

	"""
	# Analyze file with tree-sitter - analyzer now handles content reading & caching
	analysis_result = self.analyzer.analyze_file(file_path)  # Pass only file_path
	if not analysis_result:
		logger.warning(f"Failed to analyze {file_path}")
		return None

	# Convert analysis result to LOD, passing the file_path
	return self._convert_to_lod(analysis_result, level, file_path)