Skip to content

Path Utils

Utilities for handling paths and file system operations.

logger module-attribute

logger = getLogger(__name__)

filter_paths_by_gitignore

filter_paths_by_gitignore(
	paths: Sequence[Path], repo_root: Path
) -> list[Path]

Filter paths based on .gitignore patterns.

This function filters a list of paths to exclude those that match patterns in a .gitignore file, while preserving the directory structure.

Parameters:

Name Type Description Default
paths Sequence[Path]

Sequence of paths to filter

required
repo_root Path

Root directory of the repository

required

Returns:

Type Description
list[Path]

List of paths that don't match any gitignore patterns

Source code in src/codemap/utils/path_utils.py
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
def filter_paths_by_gitignore(paths: Sequence[Path], repo_root: Path) -> list[Path]:
	"""
	Filter paths based on .gitignore patterns.

	This function filters a list of paths to exclude those that match
	patterns in a .gitignore file, while preserving the directory structure.

	Args:
	    paths: Sequence of paths to filter
	    repo_root: Root directory of the repository

	Returns:
	    List of paths that don't match any gitignore patterns

	"""
	try:
		import pathspec
		from pathspec.patterns.gitwildmatch import GitWildMatchPattern
	except ImportError:
		logger.warning("pathspec package not installed, gitignore filtering disabled")
		return list(paths)

	# Read .gitignore if it exists
	gitignore_path = repo_root / ".gitignore"
	gitignore_patterns = []

	if gitignore_path.exists():
		# Parse gitignore patterns
		with gitignore_path.open("r", encoding="utf-8") as f:
			gitignore_content = f.read()
		gitignore_patterns = gitignore_content.splitlines()

	# Add default patterns for common directories that should be ignored
	default_ignore_patterns = [
		"__pycache__/",
		"*.py[cod]",
		"*$py.class",
		".git/",
		".pytest_cache/",
		".coverage",
		"htmlcov/",
		".tox/",
		".nox/",
		".hypothesis/",
		".mypy_cache/",
		".ruff_cache/",
		"dist/",
		"build/",
		"*.so",
		"*.egg",
		"*.egg-info/",
		".env/",
		"venv/",
		".venv/",
		"env/",
		"ENV/",
		"node_modules/",
	]

	# Combine patterns with existing ones, avoiding duplicates
	all_patterns = gitignore_patterns + [p for p in default_ignore_patterns if p not in gitignore_patterns]

	# Create path spec with direct import
	spec = pathspec.PathSpec.from_lines(GitWildMatchPattern, all_patterns)

	# Filter paths
	filtered_paths = []

	# Process files first
	file_paths = [p for p in paths if p.is_file()]
	for path in file_paths:
		try:
			rel_path = path.relative_to(repo_root)
			if not spec.match_file(str(rel_path)):
				filtered_paths.append(path)
		except ValueError:
			# Path is not relative to repo_root
			filtered_paths.append(path)

	# Process directories
	dir_paths = [p for p in paths if p.is_dir()]

	# First check which directories are included according to gitignore patterns
	included_dirs = []
	for dir_path in dir_paths:
		try:
			rel_path = dir_path.relative_to(repo_root)
			rel_path_str = str(rel_path) + "/"  # Add trailing slash for directory patterns

			# Skip the directory if it matches a gitignore pattern
			if spec.match_file(rel_path_str):
				logger.debug(f"Skipping ignored directory: {rel_path}")
				continue

			# Check if any parent directory is already ignored
			parent_ignored = False
			for parent in rel_path.parents:
				parent_str = str(parent) + "/"
				if spec.match_file(parent_str):
					parent_ignored = True
					logger.debug(f"Skipping directory with ignored parent: {parent}")
					break

			if not parent_ignored:
				included_dirs.append(dir_path)

		except ValueError:
			# Path is not relative to repo_root
			included_dirs.append(dir_path)

	# Include all directories at all levels to preserve hierarchy
	# Directories with no content might still be needed for the tree visualization
	filtered_paths.extend(included_dirs)

	logger.debug(f"Filtered {len(paths)} paths down to {len(filtered_paths)} after applying gitignore patterns")
	return filtered_paths

normalize_path

normalize_path(path: str | Path) -> Path

Normalize a path to an absolute Path object.

Parameters:

Name Type Description Default
path str | Path

Path string or object

required

Returns:

Type Description
Path

Normalized absolute Path

Source code in src/codemap/utils/path_utils.py
133
134
135
136
137
138
139
140
141
142
143
144
145
146
def normalize_path(path: str | Path) -> Path:
	"""
	Normalize a path to an absolute Path object.

	Args:
	    path: Path string or object

	Returns:
	    Normalized absolute Path

	"""
	if isinstance(path, str):
		path = Path(path)
	return path.expanduser().resolve()

get_relative_path

get_relative_path(path: Path, base_path: Path) -> Path

Get path relative to base_path if possible, otherwise return absolute path.

Parameters:

Name Type Description Default
path Path

The path to make relative

required
base_path Path

The base path to make it relative to

required

Returns:

Type Description
Path

Relative path if possible, otherwise absolute path

Source code in src/codemap/utils/path_utils.py
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
def get_relative_path(path: Path, base_path: Path) -> Path:
	"""
	Get path relative to base_path if possible, otherwise return absolute path.

	Args:
	    path: The path to make relative
	    base_path: The base path to make it relative to

	Returns:
	    Relative path if possible, otherwise absolute path

	"""
	try:
		return path.relative_to(base_path)
	except ValueError:
		return path.absolute()