Chunking

Module for chunking source code files using LODGenerator.

logger `module-attribute`

logger = getLogger(__name__)

TreeSitterChunker

Chunks code files based on LODEntity structure generated by LODGenerator.

Source code in src/codemap/processor/vector/chunking.py

class TreeSitterChunker:
	"""Chunks code files based on LODEntity structure generated by LODGenerator."""

	def __init__(
		self,
		lod_generator: LODGenerator | None = None,
		config_loader: "ConfigLoader | None" = None,
		git_context: GitRepoContext | None = None,
		repo_checksum_calculator: "RepoChecksumCalculator | None" = None,
	) -> None:
		"""
		Initialize the chunker.

		Args:
		    lod_generator: An instance of LODGenerator. If None, creates a new one.
		    config_loader: Configuration loader instance.
		    git_context: Git repository context instance.
		    repo_checksum_calculator: Optional RepoChecksumCalculator instance.

		"""
		self.lod_generator = lod_generator or LODGenerator()
		if config_loader:
			self.config_loader = config_loader
		else:
			from codemap.config import ConfigLoader

			self.config_loader = ConfigLoader.get_instance()

		# Load configuration values
		embedding_config = self.config_loader.get.embedding
		chunking_config = embedding_config.chunking

		# Set constants from config with fallbacks
		self.max_hierarchy_depth = chunking_config.max_hierarchy_depth
		self.max_file_lines = chunking_config.max_file_lines

		self.git_context = git_context
		self.repo_checksum_calculator = repo_checksum_calculator

	def _get_entity_code_content(self, entity: LODEntity, file_lines: list[str]) -> str | None:
		"""Extract the raw code content for an entity using its line numbers."""
		if entity.start_line is None or entity.end_line is None:
			return None

		start_idx = entity.start_line - 1
		end_idx = entity.end_line
		if 0 <= start_idx < end_idx <= len(file_lines):
			return "\n".join(file_lines[start_idx:end_idx])
		logger.warning(
			"Invalid line numbers for entity %s in %s: start=%s, end=%s, total_lines=%d",
			entity.name,
			entity.metadata.get("file_path"),
			entity.start_line,
			entity.end_line,
			len(file_lines),
		)
		return None

	def _build_hierarchy_path(self, entity: LODEntity, parent_path: str = "") -> str:
		"""
		Build a hierarchical path string representing the entity's position in the code.

		Args:
		        entity: The current entity
		        parent_path: Path of parent entities

		Returns:
		        String representation of the hierarchy path

		"""
		entity_name = entity.name or f"<{entity.entity_type.name.lower()}>"
		if not parent_path:
			return entity_name
		return f"{parent_path}.{entity_name}"

	def _extract_nested_entities(self, entity: LODEntity) -> list[dict[str, Any]]:
		"""
		Extract information about nested entities to enhance chunk context.

		Args:
		        entity: The current entity

		Returns:
		        List of dictionaries containing info about nested entities

		"""
		nested_info = []

		def process_nested(nested_entity: LODEntity, depth: int = 1) -> None:
			"""Process a nested entity and its children recursively to extract information.

			Args:
				nested_entity: The nested entity to process
				depth: Current depth in the hierarchy (default: 1)

			Returns:
				None: Modifies nested_info in place by appending entity information
			"""
			# Skip UNKNOWN entities
			if nested_entity.entity_type == EntityType.UNKNOWN:
				return

			entity_info = {
				"type": nested_entity.entity_type.name,
				"name": nested_entity.name or f"<{nested_entity.entity_type.name.lower()}>",
				"signature": nested_entity.signature or "",
				"depth": depth,
				"line_range": f"{nested_entity.start_line}-{nested_entity.end_line}"
				if nested_entity.start_line and nested_entity.end_line
				else "",
			}
			nested_info.append(entity_info)

			# Process children (limited by configured max hierarchy depth)
			if depth < self.max_hierarchy_depth:
				for child in nested_entity.children:
					process_nested(child, depth + 1)

		# Process all direct children
		for child in entity.children:
			process_nested(child)

		return nested_info

	def _chunk_entity_recursive(
		self,
		entity: LODEntity,
		absolute_file_path: Path,
		file_lines: list[str],
		git_hash: str | None,
		file_content_hash: str,
		language: str,
		last_modified_time: float,
		parent_hierarchy: str = "",
		file_entity: LODEntity | None = None,
	) -> Generator[ChunkSchema, None, None]:
		"""Recursive helper to generate chunks from the LODEntity tree with hierarchy context."""
		# Decide which entity types are significant enough to become their own chunk
		primary_chunkable_types = (
			EntityType.MODULE,
			EntityType.CLASS,
			EntityType.INTERFACE,
			EntityType.STRUCT,
		)

		secondary_chunkable_types = (
			EntityType.FUNCTION,
			EntityType.METHOD,
		)

		# Skip UNKNOWN entities entirely
		if entity.entity_type == EntityType.UNKNOWN:
			return

		# Build hierarchy path for this entity
		entity_hierarchy = self._build_hierarchy_path(entity, parent_hierarchy)

		# For primary entities (modules, classes), create full chunks with all their content
		if (
			entity.entity_type in primary_chunkable_types
			and entity.start_line is not None
			and entity.end_line is not None
		):
			try:
				# Get full content including all nested entities
				code_content = self._get_entity_code_content(entity, file_lines)
				if code_content:
					# Extract information about nested entities to enhance context
					nested_entities = self._extract_nested_entities(entity)

					# Construct rich chunk content with nested entity information
					content_parts = []
					content_parts.append(f"Type: {entity.entity_type.name}")
					content_parts.append(f"Path: {entity_hierarchy}")
					if entity.name:
						content_parts.append(f"Name: {entity.name}")
					if entity.signature:
						content_parts.append(f"Signature: {entity.signature}")
					if entity.docstring:
						content_parts.append(f"Docstring:\n{entity.docstring}")

					# Add structure overview
					if nested_entities:
						content_parts.append("Contains:")
						for ne in nested_entities:
							indent = "  " * ne["depth"]
							content_parts.append(
								f"{indent}- {ne['type']}: {ne['name']} {ne['signature']} (lines {ne['line_range']})"
							)

					# Add the full code
					content_parts.append(f"Code:\n```{language}\n{code_content}\n```")

					# Add raw unformatted code at the end
					content_parts.append(f"Raw:\n{code_content}")

					chunk_content = "\n\n".join(content_parts)

					# Generate content_hash from the final chunk_content
					content_hasher = xxhash.xxh3_64()
					content_hasher.update(chunk_content.encode("utf-8"))
					chunk_content_hash = content_hasher.hexdigest()

					# Reverted path logic: use original file_path
					# Removed relative path calculation
					metadata = self._make_chunk_metadata(
						chunk_content_hash,
						file_content_hash,
						absolute_file_path,
						entity.start_line,
						entity.end_line,
						entity.entity_type.name,
						entity.name or "",
						language,
						entity_hierarchy,
						last_modified_time,
					)
					yield ChunkSchema(content=chunk_content, metadata=metadata)

			except (ValueError, TypeError, KeyError, AttributeError):
				logger.exception("Error processing LOD entity %s in %s", entity.name, absolute_file_path)

		# For secondary entities (functions, methods), create individual chunks
		elif (
			entity.entity_type in secondary_chunkable_types
			and entity.start_line is not None
			and entity.end_line is not None
		):
			try:
				code_content = self._get_entity_code_content(entity, file_lines)
				if code_content:
					# Use file entity if available (for better context)
					file_context = ""
					if file_entity and file_entity.entity_type == EntityType.MODULE:
						file_context = f"File: {file_entity.name or absolute_file_path.name}\n"

					# Construct rich chunk content
					content_parts = []
					content_parts.append(f"{file_context}Type: {entity.entity_type.name}")
					content_parts.append(f"Path: {entity_hierarchy}")
					if entity.name:
						content_parts.append(f"Name: {entity.name}")
					if entity.signature:
						content_parts.append(f"Signature: {entity.signature}")
					if entity.docstring:
						content_parts.append(f"Docstring:\n{entity.docstring}")

					# Add code with any dependencies visible in comments
					content_parts.append(f"Code:\n```{language}\n{code_content}\n```")

					# Add raw unformatted code at the end
					content_parts.append(f"Raw:\n{code_content}")

					chunk_content = "\n\n".join(content_parts)

					# Generate content_hash from the final chunk_content
					content_hasher = xxhash.xxh3_64()
					content_hasher.update(chunk_content.encode("utf-8"))
					chunk_content_hash = content_hasher.hexdigest()

					# Use default chunk_id generation from schema for now
					metadata = self._make_chunk_metadata(
						chunk_content_hash,
						file_content_hash,
						absolute_file_path,
						entity.start_line,
						entity.end_line,
						entity.entity_type.name,
						entity.name or "",
						language,
						entity_hierarchy,
						last_modified_time,
					)
					yield ChunkSchema(content=chunk_content, metadata=metadata)

			except (ValueError, TypeError, KeyError, AttributeError):
				logger.exception("Error processing LOD entity %s in %s", entity.name, absolute_file_path)

		# Recursively process children, remove repo_path pass
		for child in entity.children:
			yield from self._chunk_entity_recursive(
				child,
				absolute_file_path,
				file_lines,
				git_hash,
				file_content_hash,
				language,
				last_modified_time,
				entity_hierarchy,
				file_entity=file_entity,
			)

	def chunk_file(
		self,
		absolute_file_path: Path,
		git_hash: str | None = None,
		lod_level: LODLevel = LODLevel.FULL,  # Use FULL for max info, not DETAIL
	) -> Generator[ChunkSchema, None, None]:
		"""
		Generates code chunks for a given file using LODGenerator.

		Args:
		    absolute_file_path: The absolute path to the file to chunk.
		    git_hash: Optional Git hash of the file content (blob hash).
		    lod_level: The level of detail to request from LODGenerator.

		Yields:
		    CodeChunk dictionaries, each representing a semantically rich code chunk.

		"""
		if not absolute_file_path.is_absolute():
			logger.warning(f"chunk_file received relative path: {absolute_file_path}. Resolving.")
			absolute_file_path = absolute_file_path.resolve()

		try:
			last_modified_time = absolute_file_path.stat().st_mtime

			# Generate the LODEntity tree for the file using the specified level of detail
			root_entity = self.lod_generator.generate_lod(absolute_file_path, lod_level)

			if not root_entity:
				logger.debug("LODGenerator returned no entity for %s, skipping chunking", absolute_file_path)
				return

			# Try to get full_content_str from root_entity metadata (set by LODGenerator)
			content = root_entity.metadata.get("full_content_str")

			if content is None:  # Fallback if not provided by LODGenerator
				logger.debug(
					"full_content_str not in root_entity metadata for %s. Reading file directly.", absolute_file_path
				)
				content = read_file_content(absolute_file_path)
				if content is None:
					logger.debug(
						"Skipping file %s - could not obtain content via LOD or direct read", absolute_file_path
					)
					return

			# Language should be available in the root entity metadata now
			resolved_language = root_entity.metadata.get("language", "unknown")
			file_lines = content.splitlines()

			# Generate file_hash from the entire file content
			file_content_hasher = xxhash.xxh3_128()
			file_content_hasher.update(content.encode("utf-8"))
			entire_file_content_hash = file_content_hasher.hexdigest()

			# First, create a chunk for the entire file if it's small enough
			if len(file_lines) < self.max_file_lines:
				# Create a chunk for the entire file
				try:
					whole_file_content = "\n".join(file_lines)

					# Information about the file as a whole
					content_parts = []
					content_parts.append("Type: FILE")
					file_name = absolute_file_path.name
					content_parts.append(f"Path: {file_name}")
					content_parts.append(f"Name: {file_name}")

					# Add docstring if the file has one (module docstring)
					if root_entity.docstring:
						content_parts.append(f"Docstring:\n{root_entity.docstring}")

					# Get structure overview
					nested_entities = self._extract_nested_entities(root_entity)
					if nested_entities:
						content_parts.append("Contains:")
						for ne in nested_entities:
							indent = "  " * ne["depth"]
							content_parts.append(
								f"{indent}- {ne['type']}: {ne['name']} {ne['signature']} (lines {ne['line_range']})"
							)

					# Add the full code
					content_parts.append(f"Code:\n```{resolved_language}\n{whole_file_content}\n```")

					# Add raw unformatted code at the end
					content_parts.append(f"Raw:\n{whole_file_content}")

					chunk_content = "\n\n".join(content_parts)

					# Generate content_hash from the final chunk_content
					content_hasher = xxhash.xxh3_64()
					content_hasher.update(chunk_content.encode("utf-8"))
					chunk_content_hash = content_hasher.hexdigest()

					metadata = self._make_chunk_metadata(
						chunk_content_hash,
						entire_file_content_hash,
						absolute_file_path,
						1,
						len(file_lines),
						"FILE",
						file_name,
						resolved_language,
						file_name,
						last_modified_time,
					)
					yield ChunkSchema(content=chunk_content, metadata=metadata)
				except (ValueError, TypeError, KeyError, AttributeError) as e:
					logger.warning("Error creating whole-file chunk for %s: %s", absolute_file_path, e)

			# Then create more specific chunks for the individual entities
			yield from self._chunk_entity_recursive(
				root_entity,
				absolute_file_path,
				file_lines,
				git_hash,
				entire_file_content_hash,
				resolved_language,
				last_modified_time,
				file_entity=root_entity,
			)

		except (OSError, ValueError, TypeError, KeyError, AttributeError) as e:
			logger.debug("Failed to chunk file %s: %s", absolute_file_path, str(e))
			return

	def _make_git_metadata(self, relative_file_path_for_git: str, start_line: int, end_line: int) -> GitMetadataSchema:
		"""Get git metadata for a file.

		Args:
			relative_file_path_for_git (str): Path relative to repo root or filename
			start_line (int): Start line of the chunk
			end_line (int): End line of the chunk

		Returns:
			GitMetadataSchema: Git metadata for the file
		"""
		if not self.git_context:
			# fallback: return empty/default metadata
			return GitMetadataSchema(
				git_hash="",
				tracked=False,
				branch="",
				blame=[],
			)

		# Ensure we have a proper path that can be found in the repository
		if (
			"/" not in relative_file_path_for_git
			and "\\" not in relative_file_path_for_git
			and self.git_context.tracked_files
		):
			# We have just a filename - try to find it in tracked files
			matching_paths = [p for p in self.git_context.tracked_files if p.endswith("/" + relative_file_path_for_git)]
			if len(matching_paths) == 1:
				relative_file_path_for_git = matching_paths[0]
				logger.debug(f"Updated path to use tracked file path: {relative_file_path_for_git}")

		# Get metadata from git context
		return self.git_context.get_metadata_schema(relative_file_path_for_git, start_line, end_line)

	def _make_chunk_metadata(
		self,
		chunk_content_hash: str,
		file_content_hash: str,
		absolute_file_path: Path,
		start_line: int,
		end_line: int,
		entity_type: str,
		entity_name: str,
		language: str,
		hierarchy_path: str,
		last_modified_time: float,
	) -> ChunkMetadataSchema:
		"""
		Create a ChunkMetadataSchema for the chunk.

		Args:
			chunk_content_hash (str): The content hash.
			file_content_hash (str): The hash of the entire file content.
			absolute_file_path (Path): The absolute path to the file.
			start_line (int): Start line.
			end_line (int): End line.
			entity_type (str): Entity type.
			entity_name (str): Entity name.
			language (str): Language.
			hierarchy_path (str): Hierarchy path.
			last_modified_time (float): File's last modification timestamp.

		Returns:
			ChunkMetadataSchema: The chunk metadata.
		"""
		# Default to file name only if we can't determine the relative path
		relative_path_for_git_and_schema = str(absolute_file_path.name)

		if self.git_context and self.git_context.repo_root:
			try:
				# Try to make the path relative to the git repo root
				relative_path = absolute_file_path.relative_to(self.git_context.repo_root)
				relative_path_for_git_and_schema = str(relative_path.as_posix())
			except ValueError:
				# If that fails, check if the file is tracked in the repository
				if self.git_context.tracked_files and absolute_file_path.name in [
					Path(p).name for p in self.git_context.tracked_files
				]:
					# Try to find the actual relative path from tracked files
					matching_paths = [
						p for p in self.git_context.tracked_files if Path(p).name == absolute_file_path.name
					]
					if len(matching_paths) == 1:
						# Found a unique match in tracked files
						relative_path_for_git_and_schema = matching_paths[0]
						logger.debug(
							f"Found tracked file path for {absolute_file_path.name}: {relative_path_for_git_and_schema}"
						)
					elif len(matching_paths) > 1:
						# Multiple matches - use the one with path most similar to absolute_file_path
						best_match = max(
							matching_paths,
							key=lambda p: sum(
								1 for a, b in zip(str(p), str(absolute_file_path), strict=False) if a == b
							),
						)
						relative_path_for_git_and_schema = best_match
						logger.debug(
							f"Multiple tracked paths for {absolute_file_path.name}, "
							f"using best match: {relative_path_for_git_and_schema}"
						)
					else:
						logger.warning(
							f"File path {absolute_file_path} could not be made relative to "
							f"git repo root {self.git_context.repo_root}. Using filename as fallback."
						)
				else:
					logger.warning(
						f"File path {absolute_file_path} could not be made relative to "
						f"git repo root {self.git_context.repo_root}. Using filename as fallback."
					)
		else:
			# If no git_context or repo_root, we can't reliably determine the relative path
			logger.debug(
				"No Git context or repo root available. Using file name "
				f"'{absolute_file_path.name}' as file_path in metadata."
			)

		generated_chunk_id = str(uuid.uuid4())

		return ChunkMetadataSchema(
			chunk_id=generated_chunk_id,
			content_hash=chunk_content_hash,
			start_line=start_line,
			end_line=end_line,
			entity_type=entity_type,
			entity_name=entity_name or "",
			hierarchy_path=hierarchy_path,
			git_metadata=self._make_git_metadata(relative_path_for_git_and_schema, start_line, end_line),
			file_metadata=FileMetadataSchema(
				file_path=relative_path_for_git_and_schema,
				language=language,
				last_modified_time=last_modified_time,
				file_content_hash=file_content_hash,
			),
		)

init

__init__(
	lod_generator: LODGenerator | None = None,
	config_loader: ConfigLoader | None = None,
	git_context: GitRepoContext | None = None,
	repo_checksum_calculator: RepoChecksumCalculator
	| None = None,
) -> None

Initialize the chunker.

Parameters:

Name	Type	Description	Default
`lod_generator`	`LODGenerator \| None`	An instance of LODGenerator. If None, creates a new one.	`None`
`config_loader`	`ConfigLoader \| None`	Configuration loader instance.	`None`
`git_context`	`GitRepoContext \| None`	Git repository context instance.	`None`
`repo_checksum_calculator`	`RepoChecksumCalculator \| None`	Optional RepoChecksumCalculator instance.	`None`

Source code in src/codemap/processor/vector/chunking.py

def __init__(
	self,
	lod_generator: LODGenerator | None = None,
	config_loader: "ConfigLoader | None" = None,
	git_context: GitRepoContext | None = None,
	repo_checksum_calculator: "RepoChecksumCalculator | None" = None,
) -> None:
	"""
	Initialize the chunker.

	Args:
	    lod_generator: An instance of LODGenerator. If None, creates a new one.
	    config_loader: Configuration loader instance.
	    git_context: Git repository context instance.
	    repo_checksum_calculator: Optional RepoChecksumCalculator instance.

	"""
	self.lod_generator = lod_generator or LODGenerator()
	if config_loader:
		self.config_loader = config_loader
	else:
		from codemap.config import ConfigLoader

		self.config_loader = ConfigLoader.get_instance()

	# Load configuration values
	embedding_config = self.config_loader.get.embedding
	chunking_config = embedding_config.chunking

	# Set constants from config with fallbacks
	self.max_hierarchy_depth = chunking_config.max_hierarchy_depth
	self.max_file_lines = chunking_config.max_file_lines

	self.git_context = git_context
	self.repo_checksum_calculator = repo_checksum_calculator

lod_generator `instance-attribute`

lod_generator = lod_generator or LODGenerator()

config_loader `instance-attribute`

config_loader = config_loader

max_hierarchy_depth `instance-attribute`

max_hierarchy_depth = max_hierarchy_depth

max_file_lines `instance-attribute`

max_file_lines = max_file_lines

git_context `instance-attribute`

git_context = git_context

repo_checksum_calculator `instance-attribute`

repo_checksum_calculator = repo_checksum_calculator

chunk_file

chunk_file(
	absolute_file_path: Path,
	git_hash: str | None = None,
	lod_level: LODLevel = FULL,
) -> Generator[ChunkSchema, None, None]

Generates code chunks for a given file using LODGenerator.

Parameters:

Name	Type	Description	Default
`absolute_file_path`	`Path`	The absolute path to the file to chunk.	required
`git_hash`	`str \| None`	Optional Git hash of the file content (blob hash).	`None`
`lod_level`	`LODLevel`	The level of detail to request from LODGenerator.	`FULL`

Yields:

Type	Description
`ChunkSchema`	CodeChunk dictionaries, each representing a semantically rich code chunk.

Source code in src/codemap/processor/vector/chunking.py

def chunk_file(
	self,
	absolute_file_path: Path,
	git_hash: str | None = None,
	lod_level: LODLevel = LODLevel.FULL,  # Use FULL for max info, not DETAIL
) -> Generator[ChunkSchema, None, None]:
	"""
	Generates code chunks for a given file using LODGenerator.

	Args:
	    absolute_file_path: The absolute path to the file to chunk.
	    git_hash: Optional Git hash of the file content (blob hash).
	    lod_level: The level of detail to request from LODGenerator.

	Yields:
	    CodeChunk dictionaries, each representing a semantically rich code chunk.

	"""
	if not absolute_file_path.is_absolute():
		logger.warning(f"chunk_file received relative path: {absolute_file_path}. Resolving.")
		absolute_file_path = absolute_file_path.resolve()

	try:
		last_modified_time = absolute_file_path.stat().st_mtime

		# Generate the LODEntity tree for the file using the specified level of detail
		root_entity = self.lod_generator.generate_lod(absolute_file_path, lod_level)

		if not root_entity:
			logger.debug("LODGenerator returned no entity for %s, skipping chunking", absolute_file_path)
			return

		# Try to get full_content_str from root_entity metadata (set by LODGenerator)
		content = root_entity.metadata.get("full_content_str")

		if content is None:  # Fallback if not provided by LODGenerator
			logger.debug(
				"full_content_str not in root_entity metadata for %s. Reading file directly.", absolute_file_path
			)
			content = read_file_content(absolute_file_path)
			if content is None:
				logger.debug(
					"Skipping file %s - could not obtain content via LOD or direct read", absolute_file_path
				)
				return

		# Language should be available in the root entity metadata now
		resolved_language = root_entity.metadata.get("language", "unknown")
		file_lines = content.splitlines()

		# Generate file_hash from the entire file content
		file_content_hasher = xxhash.xxh3_128()
		file_content_hasher.update(content.encode("utf-8"))
		entire_file_content_hash = file_content_hasher.hexdigest()

		# First, create a chunk for the entire file if it's small enough
		if len(file_lines) < self.max_file_lines:
			# Create a chunk for the entire file
			try:
				whole_file_content = "\n".join(file_lines)

				# Information about the file as a whole
				content_parts = []
				content_parts.append("Type: FILE")
				file_name = absolute_file_path.name
				content_parts.append(f"Path: {file_name}")
				content_parts.append(f"Name: {file_name}")

				# Add docstring if the file has one (module docstring)
				if root_entity.docstring:
					content_parts.append(f"Docstring:\n{root_entity.docstring}")

				# Get structure overview
				nested_entities = self._extract_nested_entities(root_entity)
				if nested_entities:
					content_parts.append("Contains:")
					for ne in nested_entities:
						indent = "  " * ne["depth"]
						content_parts.append(
							f"{indent}- {ne['type']}: {ne['name']} {ne['signature']} (lines {ne['line_range']})"
						)

				# Add the full code
				content_parts.append(f"Code:\n```{resolved_language}\n{whole_file_content}\n```")

				# Add raw unformatted code at the end
				content_parts.append(f"Raw:\n{whole_file_content}")

				chunk_content = "\n\n".join(content_parts)

				# Generate content_hash from the final chunk_content
				content_hasher = xxhash.xxh3_64()
				content_hasher.update(chunk_content.encode("utf-8"))
				chunk_content_hash = content_hasher.hexdigest()

				metadata = self._make_chunk_metadata(
					chunk_content_hash,
					entire_file_content_hash,
					absolute_file_path,
					1,
					len(file_lines),
					"FILE",
					file_name,
					resolved_language,
					file_name,
					last_modified_time,
				)
				yield ChunkSchema(content=chunk_content, metadata=metadata)
			except (ValueError, TypeError, KeyError, AttributeError) as e:
				logger.warning("Error creating whole-file chunk for %s: %s", absolute_file_path, e)

		# Then create more specific chunks for the individual entities
		yield from self._chunk_entity_recursive(
			root_entity,
			absolute_file_path,
			file_lines,
			git_hash,
			entire_file_content_hash,
			resolved_language,
			last_modified_time,
			file_entity=root_entity,
		)

	except (OSError, ValueError, TypeError, KeyError, AttributeError) as e:
		logger.debug("Failed to chunk file %s: %s", absolute_file_path, str(e))
		return

Chunking

logger module-attribute

TreeSitterChunker

__init__

lod_generator instance-attribute

config_loader instance-attribute

max_hierarchy_depth instance-attribute

max_file_lines instance-attribute

git_context instance-attribute

repo_checksum_calculator instance-attribute

chunk_file

logger `module-attribute`

init

lod_generator `instance-attribute`

config_loader `instance-attribute`

max_hierarchy_depth `instance-attribute`

max_file_lines `instance-attribute`

git_context `instance-attribute`

repo_checksum_calculator `instance-attribute`