Skip to content

File Utils

Utility functions for file operations in CodeMap.

logger module-attribute

logger = getLogger(__name__)

count_tokens

count_tokens(file_path: Path) -> int

Rough estimation of tokens in a file.

Parameters:

Name Type Description Default
file_path Path

Path to the file to count tokens in.

required

Returns:

Type Description
int

Estimated number of tokens in the file.

Source code in src/codemap/utils/file_utils.py
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
def count_tokens(file_path: Path) -> int:
	"""
	Rough estimation of tokens in a file.

	Args:
	    file_path: Path to the file to count tokens in.

	Returns:
	    Estimated number of tokens in the file.

	"""
	try:
		with file_path.open(encoding="utf-8") as f:
			content = f.read()
			# Simple tokenization by whitespace
			return len(content.split())
	except (OSError, UnicodeDecodeError):
		return 0

read_file_content

read_file_content(file_path: Path | str) -> str | None

Read content from a file with proper error handling.

Parameters:

Name Type Description Default
file_path Path | str

Path to the file to read

required

Returns:

Type Description
str | None

Content of the file as string, or None if the file cannot be read

Source code in src/codemap/utils/file_utils.py
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
def read_file_content(file_path: Path | str) -> str | None:
	"""
	Read content from a file with proper error handling.

	Args:
	    file_path: Path to the file to read

	Returns:
	    Content of the file as string, or None if the file cannot be read

	"""
	path_obj = Path(file_path)
	try:
		with path_obj.open("r", encoding="utf-8") as f:
			return f.read()
	except FileNotFoundError:
		# Handle case where file was tracked but has been deleted
		logger.debug(f"File not found: {path_obj} - possibly deleted since last tracked")
		return None
	except UnicodeDecodeError:
		# Try to read as binary and then decode with error handling
		logger.warning("File %s contains non-UTF-8 characters, attempting to decode with errors='replace'", path_obj)
		try:
			with path_obj.open("rb") as f:
				content = f.read()
				return content.decode("utf-8", errors="replace")
		except (OSError, FileNotFoundError):
			logger.debug(f"Unable to read file as binary: {path_obj}")
			return None
	except OSError as e:
		# Handle other file access errors
		logger.debug(f"Error reading file {path_obj}: {e}")
		return None

ensure_directory_exists

ensure_directory_exists(dir_path: Path) -> None

Ensure that a directory exists, creating it if necessary.

Parameters:

Name Type Description Default
dir_path Path

The path to the directory.

required
Source code in src/codemap/utils/file_utils.py
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
def ensure_directory_exists(dir_path: Path) -> None:
	"""
	Ensure that a directory exists, creating it if necessary.

	Args:
	    dir_path (Path): The path to the directory.

	"""
	if not dir_path.exists():
		logger.info(f"Creating directory: {dir_path}")
		try:
			dir_path.mkdir(parents=True, exist_ok=True)
		except OSError:
			logger.exception(f"Failed to create directory {dir_path}")
			raise
	elif not dir_path.is_dir():
		logger.error(f"Path exists but is not a directory: {dir_path}")
		msg = f"Path exists but is not a directory: {dir_path}"
		raise NotADirectoryError(msg)

is_binary_file

is_binary_file(file_path: Path) -> bool

Check if a file is binary.

Parameters:

Name Type Description Default
file_path Path

Path to the file

required

Returns:

Type Description
bool

True if the file is binary, False otherwise

Source code in src/codemap/utils/file_utils.py
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
def is_binary_file(file_path: Path) -> bool:
	"""
	Check if a file is binary.

	Args:
	        file_path: Path to the file

	Returns:
	        True if the file is binary, False otherwise

	"""
	# Skip files larger than 10 MB
	try:
		if file_path.stat().st_size > 10 * 1024 * 1024:
			return True

		# Try to read as text
		with file_path.open(encoding="utf-8") as f:
			chunk = f.read(1024)
			return "\0" in chunk
	except UnicodeDecodeError:
		return True
	except (OSError, PermissionError):
		return True