Skip to content

Javascript

JavaScript-specific configuration for syntax chunking.

logger module-attribute

logger = getLogger(__name__)

JavaScriptConfig dataclass

Bases: LanguageConfig

JavaScript-specific syntax chunking configuration.

Source code in src/codemap/processor/tree_sitter/languages/javascript.py
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
class JavaScriptConfig(LanguageConfig):
	"""JavaScript-specific syntax chunking configuration."""

	# File-level entities
	module: ClassVar[list[str]] = ["program"]
	namespace: ClassVar[list[str]] = ["export_statement"]  # Using export as namespace indicator

	# Type definitions
	class_: ClassVar[list[str]] = ["class_declaration", "class"]
	interface: ClassVar[list[str]] = []  # Pure JS doesn't have interfaces
	protocol: ClassVar[list[str]] = []  # Pure JS doesn't have protocols
	struct: ClassVar[list[str]] = []  # Pure JS doesn't have structs
	enum: ClassVar[list[str]] = []  # Pure JS doesn't have enums
	type_alias: ClassVar[list[str]] = []  # Pure JS doesn't have type aliases

	# Functions and methods
	function: ClassVar[list[str]] = [
		"function_declaration",
		"function",
		"arrow_function",
		"generator_function_declaration",
	]
	method: ClassVar[list[str]] = ["method_definition"]
	property_def: ClassVar[list[str]] = ["property_identifier", "public_field_definition"]
	test_case: ClassVar[list[str]] = ["call_expression"]  # Special detection for test frameworks
	test_suite: ClassVar[list[str]] = ["call_expression"]  # Special detection for test frameworks

	# Variables and constants
	variable: ClassVar[list[str]] = ["variable_declaration", "lexical_declaration"]
	constant: ClassVar[list[str]] = ["variable_declaration", "lexical_declaration"]  # const declarations
	class_field: ClassVar[list[str]] = ["public_field_definition"]

	# Code organization
	import_: ClassVar[list[str]] = ["import_statement"]
	decorator: ClassVar[list[str]] = ["decorator"]

	# Documentation
	comment: ClassVar[list[str]] = ["comment"]
	docstring: ClassVar[list[str]] = ["comment"]  # JS uses comments for documentation

	file_extensions: ClassVar[list[str]] = [".js", ".jsx", ".mjs", ".cjs"]
	tree_sitter_name: ClassVar[str] = "javascript"

module class-attribute

module: list[str] = ['program']

namespace class-attribute

namespace: list[str] = ['export_statement']

class_ class-attribute

class_: list[str] = ['class_declaration', 'class']

interface class-attribute

interface: list[str] = []

protocol class-attribute

protocol: list[str] = []

struct class-attribute

struct: list[str] = []

enum class-attribute

enum: list[str] = []

type_alias class-attribute

type_alias: list[str] = []

function class-attribute

function: list[str] = [
	"function_declaration",
	"function",
	"arrow_function",
	"generator_function_declaration",
]

method class-attribute

method: list[str] = ['method_definition']

property_def class-attribute

property_def: list[str] = [
	"property_identifier",
	"public_field_definition",
]

test_case class-attribute

test_case: list[str] = ['call_expression']

test_suite class-attribute

test_suite: list[str] = ['call_expression']

variable class-attribute

variable: list[str] = [
	"variable_declaration",
	"lexical_declaration",
]

constant class-attribute

constant: list[str] = [
	"variable_declaration",
	"lexical_declaration",
]

class_field class-attribute

class_field: list[str] = ['public_field_definition']

import_ class-attribute

import_: list[str] = ['import_statement']

decorator class-attribute

decorator: list[str] = ['decorator']

comment class-attribute

comment: list[str] = ['comment']

docstring class-attribute

docstring: list[str] = ['comment']

file_extensions class-attribute

file_extensions: list[str] = [".js", ".jsx", ".mjs", ".cjs"]

tree_sitter_name class-attribute

tree_sitter_name: str = 'javascript'

JAVASCRIPT_CONFIG module-attribute

JAVASCRIPT_CONFIG = JavaScriptConfig()

JavaScriptSyntaxHandler

Bases: LanguageSyntaxHandler

JavaScript-specific syntax handling logic.

Source code in src/codemap/processor/tree_sitter/languages/javascript.py
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
class JavaScriptSyntaxHandler(LanguageSyntaxHandler):
	"""JavaScript-specific syntax handling logic."""

	@staticmethod
	def _get_node_text(node: Node, content_bytes: bytes) -> str:
		"""Helper to get node text safely."""
		try:
			return content_bytes[node.start_byte : node.end_byte].decode("utf-8", errors="ignore")
		except IndexError:
			return ""

	def __init__(self) -> None:
		"""Initialize with JavaScript configuration."""
		super().__init__(JAVASCRIPT_CONFIG)

	def get_entity_type(self, node: Node, parent: Node | None, content_bytes: bytes) -> EntityType:
		"""
		Determine the EntityType for a JavaScript node.

		Args:
		    node: The tree-sitter node
		    parent: The parent node (if any)
		    content_bytes: Source code content as bytes

		Returns:
		    The entity type

		"""
		node_type = node.type
		logger.debug(
			"Getting entity type for JavaScript node: type=%s, parent_type=%s",
			node_type,
			parent.type if parent else None,
		)

		# Module-level
		if node_type in self.config.module:
			return EntityType.MODULE
		if node_type in self.config.namespace:
			return EntityType.NAMESPACE

		# Documentation
		if node_type in self.config.comment:
			# Check if this is a JSDoc comment (starts with /**)
			if self._is_jsdoc_comment(node, content_bytes):
				return EntityType.DOCSTRING
			return EntityType.COMMENT

		# Type definitions
		if node_type in self.config.class_:
			return EntityType.CLASS

		# Functions and methods
		if node_type in self.config.function:
			# Check if this is a test function (for frameworks like Jest, Mocha)
			if self._is_test_function(node, content_bytes):
				return EntityType.TEST_CASE
			return EntityType.FUNCTION

		if node_type in self.config.method:
			return EntityType.METHOD

		# Check for test suite declarations (describe blocks in Jest/Mocha)
		if node_type in self.config.test_suite and self._is_test_suite(node, content_bytes):
			return EntityType.TEST_SUITE

		# Property definitions
		if node_type in self.config.property_def:
			return EntityType.PROPERTY

		# Variables and constants
		if node_type in self.config.variable:
			# Check if it's a const declaration
			if self._is_constant(node, content_bytes):
				return EntityType.CONSTANT
			return EntityType.VARIABLE

		# Class fields
		if node_type in self.config.class_field:
			return EntityType.CLASS_FIELD

		# Code organization
		if node_type in self.config.import_:
			return EntityType.IMPORT

		return EntityType.UNKNOWN

	def _is_jsdoc_comment(self, node: Node, content_bytes: bytes) -> bool:
		"""
		Check if a comment node is a JSDoc comment.

		Args:
		    node: The comment node
		    content_bytes: Source code content as bytes

		Returns:
		    True if the node is a JSDoc comment

		"""
		if node.type != "comment":
			return False

		try:
			comment_text = content_bytes[node.start_byte : node.end_byte].decode("utf-8", errors="ignore")
			return comment_text.startswith("/**") and comment_text.endswith("*/")
		except (UnicodeDecodeError, IndexError):
			return False

	def _is_constant(self, node: Node, content_bytes: bytes) -> bool:
		"""
		Check if a variable declaration is a constant.

		Args:
		    node: The variable declaration node
		    content_bytes: Source code content as bytes

		Returns:
		    True if the node is a constant declaration

		"""
		if node.type not in ["variable_declaration", "lexical_declaration"]:
			return False

		try:
			decl_text = content_bytes[node.start_byte : node.start_byte + 5].decode("utf-8", errors="ignore")
			return decl_text.startswith("const")
		except (UnicodeDecodeError, IndexError):
			return False

	def _is_test_function(self, node: Node, content_bytes: bytes) -> bool:
		"""
		Check if a function is a test function.

		Args:
		    node: The function node
		    content_bytes: Source code content as bytes

		Returns:
		    True if the node is a test function

		"""
		if node.type == "call_expression":
			callee = node.child_by_field_name("function")
			if callee:
				try:
					callee_text = content_bytes[callee.start_byte : callee.end_byte].decode("utf-8", errors="ignore")
					return callee_text in ["it", "test"]
				except (UnicodeDecodeError, IndexError):
					pass
		return False

	def _is_test_suite(self, node: Node, content_bytes: bytes) -> bool:
		"""
		Check if a node is a test suite declaration.

		Args:
		    node: The node
		    content_bytes: Source code content as bytes

		Returns:
		    True if the node is a test suite declaration

		"""
		if node.type == "call_expression":
			callee = node.child_by_field_name("function")
			if callee:
				try:
					callee_text = content_bytes[callee.start_byte : callee.end_byte].decode("utf-8", errors="ignore")
					return callee_text == "describe"
				except (UnicodeDecodeError, IndexError):
					pass
		return False

	def find_docstring(self, node: Node, content_bytes: bytes) -> tuple[str | None, Node | None]:
		"""
		Find the docstring associated with a definition node.

		Args:
		    node: The tree-sitter node
		    content_bytes: Source code content as bytes

		Returns:
		    A tuple containing:
		    - The extracted docstring text (or None).
		    - The specific AST node representing the docstring (or None).

		"""
		# For functions, classes, and other definition nodes
		parent_node = node.parent

		# Look for JSDoc comments immediately preceding the node
		if parent_node:
			index = None
			for i, child in enumerate(parent_node.children):
				if child == node:
					index = i
					break

			if index is not None and index > 0:
				prev_node = parent_node.children[index - 1]
				if prev_node.type == "comment" and self._is_jsdoc_comment(prev_node, content_bytes):
					try:
						comment_text = content_bytes[prev_node.start_byte : prev_node.end_byte].decode(
							"utf-8", errors="ignore"
						)
						# Clean JSDoc format: remove /** */ and trim
						comment_text = comment_text.strip()
						comment_text = comment_text.removeprefix("/**")
						comment_text = comment_text.removesuffix("*/")
						return comment_text.strip(), prev_node
					except (UnicodeDecodeError, IndexError) as e:
						logger.warning("Failed to decode JavaScript comment: %s", e)

		return None, None

	def extract_name(self, node: Node, content_bytes: bytes) -> str:
		"""
		Extract the name identifier from a definition node.

		Args:
		    node: The tree-sitter node
		    content_bytes: Source code content as bytes

		Returns:
		    The extracted name

		"""
		# Try to find the name field based on node type
		name_node = None

		if node.type in ["function_declaration", "class_declaration", "method_definition"]:
			name_node = node.child_by_field_name("name")
		elif node.type == "property_identifier":
			name_node = node
		elif node.type in ["variable_declaration", "lexical_declaration"]:
			# Get the first declarator and its name
			declarator = node.child_by_field_name("declarations")
			if declarator and declarator.named_child_count > 0:
				first_declarator = declarator.named_children[0]
				name_node = first_declarator.child_by_field_name("name")
		elif node.type == "public_field_definition":
			name_node = node.child_by_field_name("name")

		if name_node:
			try:
				return content_bytes[name_node.start_byte : name_node.end_byte].decode("utf-8", errors="ignore")
			except (UnicodeDecodeError, IndexError, AttributeError) as e:
				logger.warning("Failed to decode JavaScript name: %s", e)
				return f"<decoding-error-{node.type}>"

		# For call expressions that represent tests or suites
		if node.type == "call_expression":
			callee = node.child_by_field_name("function")
			arguments = node.child_by_field_name("arguments")

			if callee and arguments and arguments.named_child_count > 0:
				# First argument is typically the test/suite name
				first_arg = arguments.named_children[0]
				if first_arg.type == "string":
					try:
						name = content_bytes[first_arg.start_byte : first_arg.end_byte].decode("utf-8", errors="ignore")
						# Remove quotes
						return name.strip("\"'")
					except (UnicodeDecodeError, IndexError):
						pass

		return f"<anonymous-{node.type}>"

	def get_body_node(self, node: Node) -> Node | None:
		"""Get the statement block node for JS/TS function/class/method body."""
		# Common body node type in JS/TS grammar
		body_field_names = ["body", "statement_block"]
		for field_name in body_field_names:
			body_node = node.child_by_field_name(field_name)
			if body_node:
				# Sometimes the direct body is an expression (arrow functions)
				# Check if the found node is a block type
				if body_node.type == "statement_block":
					return body_node
				if body_node.type == "expression_statement":  # Arrow function returning object literal
					if body_node.child_count > 0 and body_node.children[0].type == "object":
						return body_node  # Treat expression as body
				elif node.type == "arrow_function":  # Direct expression in arrow function
					return body_node
		# Fallback for classes where body might be direct children within curly braces
		if node.type in ("class_declaration", "class"):
			for child in node.children:
				if child.type == "class_body":
					return child
		return None

	def extract_signature(self, node: Node, content_bytes: bytes) -> str | None:
		"""Extract the signature up to the opening curly brace '{' for JS/TS."""
		# Find the body node first
		body_node = self.get_body_node(node)

		if body_node:
			# Signature is everything from the start of the node up to the start of the body
			start_byte = node.start_byte
			end_byte = body_node.start_byte
			# Adjust end_byte to exclude trailing whitespace before the body
			while end_byte > start_byte and content_bytes[end_byte - 1 : end_byte].isspace():
				end_byte -= 1
			try:
				return content_bytes[start_byte:end_byte].decode("utf-8", errors="ignore").strip()
			except IndexError:
				return None
		else:
			# Fallback: if no body found (e.g., abstract method, interface?), return the first line
			return self._get_node_text(node, content_bytes).splitlines()[0]

	def get_enclosing_node_of_type(self, node: Node, target_type: EntityType) -> Node | None:
		"""Find the first ancestor node matching the target JS/TS entity type."""
		target_node_types = []
		if target_type == EntityType.CLASS:
			target_node_types = ["class_declaration", "class", "class_expression"]
		elif target_type == EntityType.FUNCTION:
			# Includes function declarations, arrow functions, function expressions
			target_node_types = ["function_declaration", "arrow_function", "function"]
		elif target_type == EntityType.METHOD:
			target_node_types = ["method_definition"]
		elif target_type == EntityType.MODULE:
			# Module is typically the root 'program' node
			current = node
			while current.parent:
				current = current.parent
			return current if current.type == "program" else None
		# Add other types if needed (e.g., INTERFACE for TS)

		if not target_node_types:
			return None

		current = node.parent
		while current:
			if current.type in target_node_types:
				return current
			current = current.parent
		return None

	def get_children_to_process(self, node: Node, body_node: Node | None) -> list[Node]:
		"""
		Get the list of child nodes that should be recursively processed.

		Args:
		    node: The tree-sitter node
		    body_node: The body node if available

		Returns:
		    List of child nodes to process

		"""
		# Process children of the body node if it exists, otherwise process direct children
		if body_node:
			return list(body_node.children)

		# Special handling for certain nodes
		if node.type in ["variable_declaration", "lexical_declaration"]:
			# Process the declarations field
			declarations = node.child_by_field_name("declarations")
			return [declarations] if declarations else []

		return list(node.children)

	def should_skip_node(self, node: Node) -> bool:
		"""
		Determine if a node should be skipped entirely during processing.

		Args:
		    node: The tree-sitter node

		Returns:
		    True if the node should be skipped

		"""
		# Skip non-named nodes (like punctuation, operators)
		if not node.is_named:
			return True

		# Skip syntax nodes that don't contribute to code structure
		return node.type in ["(", ")", "{", "}", "[", "]", ";", ".", ",", ":", "=>"]

	def extract_imports(self, node: Node, content_bytes: bytes) -> list[str]:
		"""
		Extract imported module names from a JavaScript import statement.

		Args:
		    node: The tree-sitter node representing an import statement
		    content_bytes: Source code content as bytes

		Returns:
		    List of imported module names as strings

		"""
		if node.type not in self.config.import_:
			return []

		imported_names = []

		try:
			# Find the source (module path) of the import
			source_node = node.child_by_field_name("source")
			if not source_node:
				return []

			# Extract the module path from the string literal
			module_path = content_bytes[source_node.start_byte : source_node.end_byte].decode("utf-8", errors="ignore")
			# Remove quotes
			module_path = module_path.strip("\"'")

			# Check for different import patterns:

			# 1. Default import: "import Name from 'module'"
			default_import = node.child_by_field_name("default")
			if default_import:
				name = content_bytes[default_import.start_byte : default_import.end_byte].decode(
					"utf-8", errors="ignore"
				)
				imported_names.append(f"{module_path}.default")

			# 2. Named imports: "import { foo, bar as baz } from 'module'"
			named_imports = node.child_by_field_name("named_imports")
			if named_imports:
				for child in named_imports.children:
					if child.type == "import_specifier":
						imported_name = child.child_by_field_name("name")
						if imported_name:
							name = content_bytes[imported_name.start_byte : imported_name.end_byte].decode(
								"utf-8", errors="ignore"
							)
							imported_names.append(f"{module_path}.{name}")

			# 3. Namespace import: "import * as Name from 'module'"
			namespace_import = node.child_by_field_name("namespace_import")
			if namespace_import:
				imported_names.append(f"{module_path}.*")

			# If no specific imports found but we have a module, add the whole module
			if not imported_names and module_path:
				imported_names.append(module_path)

		except (UnicodeDecodeError, IndexError, AttributeError) as e:
			logger.warning("Failed to decode JavaScript imports: %s", e)

		return imported_names

	def extract_calls(self, node: Node, content_bytes: bytes) -> list[str]:
		"""
		Extract names of functions/methods called within a JS node's scope.

		Recursively searches for 'call_expression' nodes and extracts the function identifier.

		Args:
		    node: The tree-sitter node (e.g., function/method body)
		    content_bytes: Source code content as bytes

		Returns:
		    List of called function/method names

		"""
		calls = []
		for child in node.children:
			if child.type == "call_expression":
				function_node = child.child_by_field_name("function")
				if function_node:
					# Extract the identifier (e.g., funcName, obj.methodName)
					try:
						call_name = content_bytes[function_node.start_byte : function_node.end_byte].decode(
							"utf-8", errors="ignore"
						)
						calls.append(call_name)
					except UnicodeDecodeError:
						pass  # Ignore decoding errors
			# Recursively search deeper within non-call children
			else:
				calls.extend(self.extract_calls(child, content_bytes))
		return list(set(calls))  # Return unique calls

__init__

__init__() -> None

Initialize with JavaScript configuration.

Source code in src/codemap/processor/tree_sitter/languages/javascript.py
75
76
77
def __init__(self) -> None:
	"""Initialize with JavaScript configuration."""
	super().__init__(JAVASCRIPT_CONFIG)

get_entity_type

get_entity_type(
	node: Node, parent: Node | None, content_bytes: bytes
) -> EntityType

Determine the EntityType for a JavaScript node.

Parameters:

Name Type Description Default
node Node

The tree-sitter node

required
parent Node | None

The parent node (if any)

required
content_bytes bytes

Source code content as bytes

required

Returns:

Type Description
EntityType

The entity type

Source code in src/codemap/processor/tree_sitter/languages/javascript.py
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
def get_entity_type(self, node: Node, parent: Node | None, content_bytes: bytes) -> EntityType:
	"""
	Determine the EntityType for a JavaScript node.

	Args:
	    node: The tree-sitter node
	    parent: The parent node (if any)
	    content_bytes: Source code content as bytes

	Returns:
	    The entity type

	"""
	node_type = node.type
	logger.debug(
		"Getting entity type for JavaScript node: type=%s, parent_type=%s",
		node_type,
		parent.type if parent else None,
	)

	# Module-level
	if node_type in self.config.module:
		return EntityType.MODULE
	if node_type in self.config.namespace:
		return EntityType.NAMESPACE

	# Documentation
	if node_type in self.config.comment:
		# Check if this is a JSDoc comment (starts with /**)
		if self._is_jsdoc_comment(node, content_bytes):
			return EntityType.DOCSTRING
		return EntityType.COMMENT

	# Type definitions
	if node_type in self.config.class_:
		return EntityType.CLASS

	# Functions and methods
	if node_type in self.config.function:
		# Check if this is a test function (for frameworks like Jest, Mocha)
		if self._is_test_function(node, content_bytes):
			return EntityType.TEST_CASE
		return EntityType.FUNCTION

	if node_type in self.config.method:
		return EntityType.METHOD

	# Check for test suite declarations (describe blocks in Jest/Mocha)
	if node_type in self.config.test_suite and self._is_test_suite(node, content_bytes):
		return EntityType.TEST_SUITE

	# Property definitions
	if node_type in self.config.property_def:
		return EntityType.PROPERTY

	# Variables and constants
	if node_type in self.config.variable:
		# Check if it's a const declaration
		if self._is_constant(node, content_bytes):
			return EntityType.CONSTANT
		return EntityType.VARIABLE

	# Class fields
	if node_type in self.config.class_field:
		return EntityType.CLASS_FIELD

	# Code organization
	if node_type in self.config.import_:
		return EntityType.IMPORT

	return EntityType.UNKNOWN

find_docstring

find_docstring(
	node: Node, content_bytes: bytes
) -> tuple[str | None, Node | None]

Find the docstring associated with a definition node.

Parameters:

Name Type Description Default
node Node

The tree-sitter node

required
content_bytes bytes

Source code content as bytes

required

Returns:

Type Description
str | None

A tuple containing:

Node | None
  • The extracted docstring text (or None).
tuple[str | None, Node | None]
  • The specific AST node representing the docstring (or None).
Source code in src/codemap/processor/tree_sitter/languages/javascript.py
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
def find_docstring(self, node: Node, content_bytes: bytes) -> tuple[str | None, Node | None]:
	"""
	Find the docstring associated with a definition node.

	Args:
	    node: The tree-sitter node
	    content_bytes: Source code content as bytes

	Returns:
	    A tuple containing:
	    - The extracted docstring text (or None).
	    - The specific AST node representing the docstring (or None).

	"""
	# For functions, classes, and other definition nodes
	parent_node = node.parent

	# Look for JSDoc comments immediately preceding the node
	if parent_node:
		index = None
		for i, child in enumerate(parent_node.children):
			if child == node:
				index = i
				break

		if index is not None and index > 0:
			prev_node = parent_node.children[index - 1]
			if prev_node.type == "comment" and self._is_jsdoc_comment(prev_node, content_bytes):
				try:
					comment_text = content_bytes[prev_node.start_byte : prev_node.end_byte].decode(
						"utf-8", errors="ignore"
					)
					# Clean JSDoc format: remove /** */ and trim
					comment_text = comment_text.strip()
					comment_text = comment_text.removeprefix("/**")
					comment_text = comment_text.removesuffix("*/")
					return comment_text.strip(), prev_node
				except (UnicodeDecodeError, IndexError) as e:
					logger.warning("Failed to decode JavaScript comment: %s", e)

	return None, None

extract_name

extract_name(node: Node, content_bytes: bytes) -> str

Extract the name identifier from a definition node.

Parameters:

Name Type Description Default
node Node

The tree-sitter node

required
content_bytes bytes

Source code content as bytes

required

Returns:

Type Description
str

The extracted name

Source code in src/codemap/processor/tree_sitter/languages/javascript.py
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
def extract_name(self, node: Node, content_bytes: bytes) -> str:
	"""
	Extract the name identifier from a definition node.

	Args:
	    node: The tree-sitter node
	    content_bytes: Source code content as bytes

	Returns:
	    The extracted name

	"""
	# Try to find the name field based on node type
	name_node = None

	if node.type in ["function_declaration", "class_declaration", "method_definition"]:
		name_node = node.child_by_field_name("name")
	elif node.type == "property_identifier":
		name_node = node
	elif node.type in ["variable_declaration", "lexical_declaration"]:
		# Get the first declarator and its name
		declarator = node.child_by_field_name("declarations")
		if declarator and declarator.named_child_count > 0:
			first_declarator = declarator.named_children[0]
			name_node = first_declarator.child_by_field_name("name")
	elif node.type == "public_field_definition":
		name_node = node.child_by_field_name("name")

	if name_node:
		try:
			return content_bytes[name_node.start_byte : name_node.end_byte].decode("utf-8", errors="ignore")
		except (UnicodeDecodeError, IndexError, AttributeError) as e:
			logger.warning("Failed to decode JavaScript name: %s", e)
			return f"<decoding-error-{node.type}>"

	# For call expressions that represent tests or suites
	if node.type == "call_expression":
		callee = node.child_by_field_name("function")
		arguments = node.child_by_field_name("arguments")

		if callee and arguments and arguments.named_child_count > 0:
			# First argument is typically the test/suite name
			first_arg = arguments.named_children[0]
			if first_arg.type == "string":
				try:
					name = content_bytes[first_arg.start_byte : first_arg.end_byte].decode("utf-8", errors="ignore")
					# Remove quotes
					return name.strip("\"'")
				except (UnicodeDecodeError, IndexError):
					pass

	return f"<anonymous-{node.type}>"

get_body_node

get_body_node(node: Node) -> Node | None

Get the statement block node for JS/TS function/class/method body.

Source code in src/codemap/processor/tree_sitter/languages/javascript.py
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
def get_body_node(self, node: Node) -> Node | None:
	"""Get the statement block node for JS/TS function/class/method body."""
	# Common body node type in JS/TS grammar
	body_field_names = ["body", "statement_block"]
	for field_name in body_field_names:
		body_node = node.child_by_field_name(field_name)
		if body_node:
			# Sometimes the direct body is an expression (arrow functions)
			# Check if the found node is a block type
			if body_node.type == "statement_block":
				return body_node
			if body_node.type == "expression_statement":  # Arrow function returning object literal
				if body_node.child_count > 0 and body_node.children[0].type == "object":
					return body_node  # Treat expression as body
			elif node.type == "arrow_function":  # Direct expression in arrow function
				return body_node
	# Fallback for classes where body might be direct children within curly braces
	if node.type in ("class_declaration", "class"):
		for child in node.children:
			if child.type == "class_body":
				return child
	return None

extract_signature

extract_signature(
	node: Node, content_bytes: bytes
) -> str | None

Extract the signature up to the opening curly brace '{' for JS/TS.

Source code in src/codemap/processor/tree_sitter/languages/javascript.py
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
def extract_signature(self, node: Node, content_bytes: bytes) -> str | None:
	"""Extract the signature up to the opening curly brace '{' for JS/TS."""
	# Find the body node first
	body_node = self.get_body_node(node)

	if body_node:
		# Signature is everything from the start of the node up to the start of the body
		start_byte = node.start_byte
		end_byte = body_node.start_byte
		# Adjust end_byte to exclude trailing whitespace before the body
		while end_byte > start_byte and content_bytes[end_byte - 1 : end_byte].isspace():
			end_byte -= 1
		try:
			return content_bytes[start_byte:end_byte].decode("utf-8", errors="ignore").strip()
		except IndexError:
			return None
	else:
		# Fallback: if no body found (e.g., abstract method, interface?), return the first line
		return self._get_node_text(node, content_bytes).splitlines()[0]

get_enclosing_node_of_type

get_enclosing_node_of_type(
	node: Node, target_type: EntityType
) -> Node | None

Find the first ancestor node matching the target JS/TS entity type.

Source code in src/codemap/processor/tree_sitter/languages/javascript.py
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
def get_enclosing_node_of_type(self, node: Node, target_type: EntityType) -> Node | None:
	"""Find the first ancestor node matching the target JS/TS entity type."""
	target_node_types = []
	if target_type == EntityType.CLASS:
		target_node_types = ["class_declaration", "class", "class_expression"]
	elif target_type == EntityType.FUNCTION:
		# Includes function declarations, arrow functions, function expressions
		target_node_types = ["function_declaration", "arrow_function", "function"]
	elif target_type == EntityType.METHOD:
		target_node_types = ["method_definition"]
	elif target_type == EntityType.MODULE:
		# Module is typically the root 'program' node
		current = node
		while current.parent:
			current = current.parent
		return current if current.type == "program" else None
	# Add other types if needed (e.g., INTERFACE for TS)

	if not target_node_types:
		return None

	current = node.parent
	while current:
		if current.type in target_node_types:
			return current
		current = current.parent
	return None

get_children_to_process

get_children_to_process(
	node: Node, body_node: Node | None
) -> list[Node]

Get the list of child nodes that should be recursively processed.

Parameters:

Name Type Description Default
node Node

The tree-sitter node

required
body_node Node | None

The body node if available

required

Returns:

Type Description
list[Node]

List of child nodes to process

Source code in src/codemap/processor/tree_sitter/languages/javascript.py
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
def get_children_to_process(self, node: Node, body_node: Node | None) -> list[Node]:
	"""
	Get the list of child nodes that should be recursively processed.

	Args:
	    node: The tree-sitter node
	    body_node: The body node if available

	Returns:
	    List of child nodes to process

	"""
	# Process children of the body node if it exists, otherwise process direct children
	if body_node:
		return list(body_node.children)

	# Special handling for certain nodes
	if node.type in ["variable_declaration", "lexical_declaration"]:
		# Process the declarations field
		declarations = node.child_by_field_name("declarations")
		return [declarations] if declarations else []

	return list(node.children)

should_skip_node

should_skip_node(node: Node) -> bool

Determine if a node should be skipped entirely during processing.

Parameters:

Name Type Description Default
node Node

The tree-sitter node

required

Returns:

Type Description
bool

True if the node should be skipped

Source code in src/codemap/processor/tree_sitter/languages/javascript.py
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
def should_skip_node(self, node: Node) -> bool:
	"""
	Determine if a node should be skipped entirely during processing.

	Args:
	    node: The tree-sitter node

	Returns:
	    True if the node should be skipped

	"""
	# Skip non-named nodes (like punctuation, operators)
	if not node.is_named:
		return True

	# Skip syntax nodes that don't contribute to code structure
	return node.type in ["(", ")", "{", "}", "[", "]", ";", ".", ",", ":", "=>"]

extract_imports

extract_imports(
	node: Node, content_bytes: bytes
) -> list[str]

Extract imported module names from a JavaScript import statement.

Parameters:

Name Type Description Default
node Node

The tree-sitter node representing an import statement

required
content_bytes bytes

Source code content as bytes

required

Returns:

Type Description
list[str]

List of imported module names as strings

Source code in src/codemap/processor/tree_sitter/languages/javascript.py
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
def extract_imports(self, node: Node, content_bytes: bytes) -> list[str]:
	"""
	Extract imported module names from a JavaScript import statement.

	Args:
	    node: The tree-sitter node representing an import statement
	    content_bytes: Source code content as bytes

	Returns:
	    List of imported module names as strings

	"""
	if node.type not in self.config.import_:
		return []

	imported_names = []

	try:
		# Find the source (module path) of the import
		source_node = node.child_by_field_name("source")
		if not source_node:
			return []

		# Extract the module path from the string literal
		module_path = content_bytes[source_node.start_byte : source_node.end_byte].decode("utf-8", errors="ignore")
		# Remove quotes
		module_path = module_path.strip("\"'")

		# Check for different import patterns:

		# 1. Default import: "import Name from 'module'"
		default_import = node.child_by_field_name("default")
		if default_import:
			name = content_bytes[default_import.start_byte : default_import.end_byte].decode(
				"utf-8", errors="ignore"
			)
			imported_names.append(f"{module_path}.default")

		# 2. Named imports: "import { foo, bar as baz } from 'module'"
		named_imports = node.child_by_field_name("named_imports")
		if named_imports:
			for child in named_imports.children:
				if child.type == "import_specifier":
					imported_name = child.child_by_field_name("name")
					if imported_name:
						name = content_bytes[imported_name.start_byte : imported_name.end_byte].decode(
							"utf-8", errors="ignore"
						)
						imported_names.append(f"{module_path}.{name}")

		# 3. Namespace import: "import * as Name from 'module'"
		namespace_import = node.child_by_field_name("namespace_import")
		if namespace_import:
			imported_names.append(f"{module_path}.*")

		# If no specific imports found but we have a module, add the whole module
		if not imported_names and module_path:
			imported_names.append(module_path)

	except (UnicodeDecodeError, IndexError, AttributeError) as e:
		logger.warning("Failed to decode JavaScript imports: %s", e)

	return imported_names

extract_calls

extract_calls(
	node: Node, content_bytes: bytes
) -> list[str]

Extract names of functions/methods called within a JS node's scope.

Recursively searches for 'call_expression' nodes and extracts the function identifier.

Parameters:

Name Type Description Default
node Node

The tree-sitter node (e.g., function/method body)

required
content_bytes bytes

Source code content as bytes

required

Returns:

Type Description
list[str]

List of called function/method names

Source code in src/codemap/processor/tree_sitter/languages/javascript.py
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
def extract_calls(self, node: Node, content_bytes: bytes) -> list[str]:
	"""
	Extract names of functions/methods called within a JS node's scope.

	Recursively searches for 'call_expression' nodes and extracts the function identifier.

	Args:
	    node: The tree-sitter node (e.g., function/method body)
	    content_bytes: Source code content as bytes

	Returns:
	    List of called function/method names

	"""
	calls = []
	for child in node.children:
		if child.type == "call_expression":
			function_node = child.child_by_field_name("function")
			if function_node:
				# Extract the identifier (e.g., funcName, obj.methodName)
				try:
					call_name = content_bytes[function_node.start_byte : function_node.end_byte].decode(
						"utf-8", errors="ignore"
					)
					calls.append(call_name)
				except UnicodeDecodeError:
					pass  # Ignore decoding errors
		# Recursively search deeper within non-call children
		else:
			calls.extend(self.extract_calls(child, content_bytes))
	return list(set(calls))  # Return unique calls