bugbug - npm Package Compare versions

+1

-1

bugbug.egg-info/PKG-INFO

		Metadata-Version: 2.1
		Name: bugbug
		Version: 0.0.614
		Version: 0.0.615
		Summary: ML tools for Mozilla projects
		@@ -5,0 +5,0 @@ Author: Marco Castelluccio

+9

-9

bugbug.egg-info/requires.txt

		amqp==5.3.1
		beautifulsoup4==4.14.2
		beautifulsoup4==4.14.3
		boto3==1.41.2
		imbalanced-learn==0.14.0
		langchain-anthropic==1.1.0
		langchain-anthropic==1.3.0
		langchain-classic==1.0.0
		langchain-community==0.4.1
		langchain-google-genai==3.1.0
		langchain-google-genai==4.0.0
		langchain-mistralai==1.0.1
		langchain-openai==1.0.3
		langchain==1.0.8
		langgraph==1.0.3
		langchain-openai==1.1.3
		langchain==1.2.0
		langgraph==1.0.5
		libmozdata==0.2.12
		@@ -23,3 +23,3 @@ llama-cpp-python==0.2.90
		numpy==2.3.5
		orjson==3.11.4
		orjson==3.11.5
		ortools==9.14.6206
		@@ -31,3 +31,3 @@ pandas==2.3.3
		python-hglib==2.6.2
		qdrant-client==1.16.0
		qdrant-client==1.15.1
		ratelimit==2.2.1
		@@ -42,3 +42,3 @@ requests-html==0.10.0
		tabulate==0.9.0
		taskcluster==93.1.5
		taskcluster==94.1.1
		tenacity==9.1.2
		@@ -45,0 +45,0 @@ tqdm==4.67.1

+0

-1

bugbug.egg-info/SOURCES.txt

		@@ -112,3 +112,2 @@ LICENSE
		scripts/code_review_tool_evaluator_report.py
		scripts/code_review_tool_runner.py
		scripts/comment_level_labeler.py
		@@ -115,0 +114,0 @@ scripts/comment_resolver_runner.py

+7

-0

bugbug/code_search/searchfox_api.py

		@@ -238,2 +238,9 @@ # -- coding: utf-8 --
		)
		if source is None:
		logger.warning(
		"Could not extract source for %s:%d",
		definition["path"],
		definition["start"],
		)
		continue
		result.append(
		@@ -240,0 +247,0 @@ Function(

+4

-1

bugbug/test_scheduling.py

		@@ -941,3 +941,6 @@ # -- coding: utf-8 --
		# Collect head files.
		head_files = data.get("DEFAULT", {}).get("head", "").split(" ")
		head_value = data.get("DEFAULT", {}).get("head", [])
		head_files = (
		head_value if isinstance(head_value, list) else head_value.split(" ")
		)
		for head_file in head_files:
		@@ -944,0 +947,0 @@ if not head_file.strip():

+1

-2

bugbug/tools/code_review/__init__.py

		@@ -18,3 +18,3 @@ # -- coding: utf-8 --
		# Agent
		from bugbug.tools.code_review.agent import TARGET_SOFTWARE, CodeReviewTool
		from bugbug.tools.code_review.agent import CodeReviewTool

		@@ -58,3 +58,2 @@ # Databases
		"CodeReviewTool",
		"TARGET_SOFTWARE",
		# Databases
		@@ -61,0 +60,0 @@ "EvaluationAction",

+73

-49

bugbug/tools/code_review/agent.py

		@@ -12,5 +12,6 @@ # -- coding: utf-8 --
		from logging import getLogger
		from typing import Iterable, Literal, Optional
		from typing import Iterable, Optional

		from langchain.agents import create_agent
		from langchain.agents.structured_output import ProviderStrategy
		from langchain.chat_models import BaseChatModel
		@@ -21,2 +22,3 @@ from langchain.messages import HumanMessage
		from langgraph.errors import GraphRecursionError
		from pydantic import BaseModel, Field
		from unidiff import PatchSet
		@@ -34,8 +36,7 @@
		DEFAULT_REJECTED_EXAMPLES,
		OUTPUT_FORMAT_JSON,
		OUTPUT_FORMAT_TEXT,
		FIRST_MESSAGE_TEMPLATE,
		PROMPT_TEMPLATE_FILTERING_ANALYSIS,
		PROMPT_TEMPLATE_REVIEW,
		PROMPT_TEMPLATE_SUMMARIZATION,
		STATIC_COMMENT_EXAMPLES,
		SYSTEM_PROMPT_TEMPLATE,
		TEMPLATE_COMMENT_EXAMPLE,
		@@ -47,3 +48,2 @@ TEMPLATE_PATCH_FROM_HUNK,
		generate_processed_output,
		parse_model_output,
		)
		@@ -57,6 +57,25 @@ from bugbug.tools.core.data_types import InlineComment

		# Global variable for target software
		TARGET_SOFTWARE: str \| None = None

		class GeneratedReviewComment(BaseModel):
		"""A review comment generated by the code review agent."""

		file: str = Field(description="The path to the file the comment applies to.")
		code_line: int = Field(description="The line number that the comment refers to.")
		comment: str = Field(description="The review comment.")
		explanation: str = Field(
		description="A brief rationale for the comment, including how confident you are and why."
		)
		order: int = Field(
		description="An integer representing the priority of the comment, with 1 being the highest confidence/importance."
		)


		class AgentResponse(BaseModel):
		"""The response from the code review agent."""

		comments: list[GeneratedReviewComment] = Field(
		description="A list of generated review comments."
		)


		class CodeReviewTool(GenerativeModelTool):
		@@ -68,2 +87,4 @@ version = "0.0.1"
		llm: BaseChatModel,
		summarization_llm: BaseChatModel,
		filtering_llm: BaseChatModel,
		function_search: Optional[FunctionSearch] = None,
		@@ -74,7 +95,7 @@ review_comments_db: Optional["ReviewCommentsDB"] = None,
		suggestions_feedback_db: Optional["SuggestionsFeedbackDB"] = None,
		target_software: Optional[str] = None,
		target_software: str = "Mozilla Firefox",
		) -> None:
		super().__init__()

		self.target_software = target_software or TARGET_SOFTWARE
		self.target_software = target_software

		@@ -97,14 +118,10 @@ self._tokenizer = get_tokenizer(

		experience_scope = (
		f"the {self.target_software} source code"
		if self.target_software
		else "a software project"
		)

		self.summarization_chain = LLMChain(
		prompt=PromptTemplate.from_template(
		PROMPT_TEMPLATE_SUMMARIZATION,
		partial_variables={"experience_scope": experience_scope},
		partial_variables={
		"experience_scope": f"the {self.target_software} source code"
		},
		),
		llm=llm,
		llm=summarization_llm,
		verbose=verbose,
		@@ -115,7 +132,5 @@ )
		PROMPT_TEMPLATE_FILTERING_ANALYSIS,
		partial_variables={
		"target_code_consistency": self.target_software or "rest of the"
		},
		partial_variables={"target_code_consistency": self.target_software},
		),
		llm=llm,
		llm=filtering_llm,
		verbose=verbose,
		@@ -131,3 +146,6 @@ )
		tools,
		system_prompt=f"You are an expert reviewer for {experience_scope}, with experience on source code reviews.",
		system_prompt=SYSTEM_PROMPT_TEMPLATE.format(
		target_software=self.target_software,
		),
		response_format=ProviderStrategy(AgentResponse),
		)
		@@ -143,8 +161,25 @@

		@staticmethod
		def create(
		llm=None, summarization_llm=None, filtering_llm=None, **kwargs
		) -> "CodeReviewTool":
		from bugbug.tools.core.llms import create_anthropic_llm

		return CodeReviewTool(
		llm=llm
		or create_anthropic_llm(
		model_name="claude-opus-4-5-20251101",
		max_tokens=40_000,
		temperature=None,
		thinking={"type": "enabled", "budget_tokens": 10_000},
		),
		summarization_llm=summarization_llm or create_anthropic_llm(),
		filtering_llm=filtering_llm or create_anthropic_llm(),
		**kwargs,
		)

		def count_tokens(self, text):
		return len(self._tokenizer.encode(text))

		def generate_initial_prompt(
		self, patch: Patch, output_format: Literal["JSON", "TEXT"] = "JSON"
		) -> str:
		def generate_initial_prompt(self, patch: Patch) -> str:
		formatted_patch = format_patch_set(patch.patch_set)
		@@ -157,2 +192,3 @@
		"patch_title": patch.patch_title,
		"patch_description": patch.patch_description,
		},
		@@ -165,13 +201,4 @@ return_only_outputs=True,

		if output_format == "JSON":
		output_instructions = OUTPUT_FORMAT_JSON
		elif output_format == "TEXT":
		output_instructions = OUTPUT_FORMAT_TEXT
		else:
		raise ValueError(
		f"Unsupported output format: {output_format}, choose JSON or TEXT"
		)

		created_before = patch.date_created if self.is_experiment_env else None
		return PROMPT_TEMPLATE_REVIEW.format(
		return FIRST_MESSAGE_TEMPLATE.format(
		patch=formatted_patch,
		@@ -181,11 +208,5 @@ patch_summarization=output_summarization,
		approved_examples=self._get_generated_examples(patch, created_before),
		target_code_consistency=self.target_software or "rest of the",
		output_instructions=output_instructions,
		bug_title=patch.bug_title,
		patch_title=patch.patch_title,
		patch_url=patch.patch_url,
		target_software=self.target_software,
		)

		def _generate_suggestions(self, patch: Patch):
		def _generate_suggestions(self, patch: Patch) -> list[GeneratedReviewComment]:
		try:
		@@ -200,2 +221,3 @@ for chunk in self.agent.stream(
		stream_mode="values",
		config={"recursion_limit": 50},
		):
		@@ -206,3 +228,3 @@ result = chunk

		return result["messages"][-1].content
		return result["structured_response"].comments

		@@ -213,5 +235,3 @@ def run(self, patch: Patch) -> list[InlineComment] \| None:

		output = self._generate_suggestions(patch)

		unfiltered_suggestions = parse_model_output(output)
		unfiltered_suggestions = self._generate_suggestions(patch)
		if not unfiltered_suggestions:
		@@ -229,3 +249,5 @@ logger.info("No suggestions were generated")
		{
		"comments": output,
		"comments": str(
		[comment.model_dump() for comment in unfiltered_suggestions]
		),
		"rejected_examples": rejected_examples,
		@@ -320,3 +342,5 @@ },

		def get_similar_rejected_comments(self, suggestions) -> Iterable[str]:
		def get_similar_rejected_comments(
		self, suggestions: list[GeneratedReviewComment]
		) -> Iterable[str]:
		if not self.suggestions_feedback_db:
		@@ -331,3 +355,3 @@ raise Exception("Suggestions feedback database is not available")
		self.suggestions_feedback_db.find_similar_rejected_suggestions(
		suggestion["comment"],
		suggestion.comment,
		limit=num_examples_per_suggestion,
		@@ -334,0 +358,0 @@ excluded_ids=seen_ids,

+16

-9

bugbug/tools/code_review/langchain_tools.py

		@@ -23,20 +23,27 @@ # -- coding: utf-8 --
		@tool
		def expand_context(file_path: str, line_number: int) -> str:
		"""Expand the context around a specific line in a file diff.
		def expand_context(file_path: str, start_line: int, end_line: int) -> str:
		"""Show the content of a file between specified line numbers as it is before the patch.

		Be careful to not fill your context window with too much data. Request the
		minimum amount of context necessary to understand the code, but do not split
		what you really need into multiple requests if the line range is continuous.

		Args:
		file_path: The path to the file.
		line_number: The line number to expand context around. It should be based on the original file, not the patch.
		start_line: The starting line number in the original file. Minimum is 1.
		end_line: The ending line number in the original file. Maximum is the total number of lines in the file.

		Returns:
		Lines of code around the specified line number.
		The content of the file between the specified line numbers.
		"""
		runtime = get_runtime(CodeReviewContext)
		file_content = runtime.context.patch.get_old_file(file_path)

		# TODO: Expanding the context using an AST parser like tree-sitter to
		# include the whole function or class when it is relatively small.
		try:
		file_content = runtime.context.patch.get_old_file(file_path)
		except FileNotFoundError:
		return "File not found in the repository before the patch."

		lines = file_content.splitlines()
		start = max(0, line_number - 20)
		end = min(len(lines), line_number + 20)
		start = max(1, start_line) - 1
		end = min(len(lines), end_line)

		@@ -43,0 +50,0 @@ # Format the output with line numbers that match the original file.

+58

-86

bugbug/tools/code_review/prompts.py

		@@ -16,3 +16,3 @@ # -- coding: utf-8 --
		1. Intent: Describe the intent of the changes, what they are trying to achieve, and how they relate to the bug or feature request.
		2. Structure: Describe the structure of the changes, including any new functions, classes, or modules introduced, and how they fit into the existing codebase.
		2. Solution: Describe the solution implemented in the code changes, focusing on how the changes address the intent.

		@@ -22,86 +22,67 @@ Do not include any code in the summarization, only a description of the changes.
		Bug title:
		<bug_title>
		{bug_title}
		</bug_title>

		Commit message:
		<commit_message>
		{patch_title}
		{patch_description}
		</commit_message>

		Diff:
		{patch}"""
		<patch>
		{patch}
		</patch>"""

		PROMPT_TEMPLATE_REVIEW = """<task>
		Generate high-quality code review comments for the patch provided below.
		</task>
		SYSTEM_PROMPT_TEMPLATE = """You are an expert {target_software} engineer tasked with analyzing a pull request and providing high-quality review comments. You will examine a code patch and generate constructive feedback focusing on potential issues in the changed code.

		<instructions>
		<analyze_changes>
		Analyze the Changes:
		* Understand the intent and structure of the changes in the patch.
		* Use the provided summarization for context, but prioritize what's visible in the diff.
		</analyze_changes>
		## Instructions

		<identify_issues>
		Identify Issues:
		* Detect bugs, logical errors, performance concerns, security issues, or violations of the `{target_code_consistency}` coding standards.
		* Focus only on new or changed lines (lines beginning with `+`).
		* Prioritize: Security vulnerabilities > Functional bugs > Performance issues > Style/readability concerns.
		</identify_issues>
		Follow this systematic approach to review the patch:

		<assess_confidence>
		Assess Confidence and Order:
		* Only include comments where you are at least 80% confident the issue is valid.
		* Sort the comments by descending confidence and importance:
		* Start with issues you are certain are valid.
		* Also, prioritize important issues that you are confident about.
		* Follow with issues that are plausible but uncertain (possible false positives).
		* When uncertain, use available tools to verify before commenting.
		* Assign each comment a numeric `order`, starting at 1.
		</assess_confidence>
		Step 1: Analyze the Changes
		- Understand what the patch is trying to accomplish
		- Use the patch summary for context, but focus primarily on what you can see in the actual diff
		- Identify the intent and structure of the changes

		<write_comments>
		Write Clear, Constructive Comments:
		* Use direct, declarative language. State the problem definitively, then suggest the fix.
		* Keep comments short and specific.
		* Focus strictly on code-related concerns.
		* Banned phrases: "maybe", "might want to", "consider", "possibly", "could be", "you may want to".
		* Use directive language: "Fix", "Remove", "Change", "Add", "Validate", "Check" (not "Consider checking").
		* Avoid repeating what the code is doing unless it supports your critique.
		</write_comments>
		Step 2: Identify Issues
		- Look for bugs, logical errors, performance problems, security vulnerabilities, or violations of the coding standards
		- Focus ONLY on new or changed lines (lines that begin with `+`)
		- Never comment on unmodified code
		- Prioritize issues in this order: Security vulnerabilities > Functional bugs > Performance issues > Style/readability concerns

		<use_tools>
		Use available tools to verify concerns:
		* Use tools to gather context when you suspect an issue but need verification.
		* Use `find_function_definition` to check if error handling or validation exists elsewhere.
		* Use `expand_context` to see if edge cases are handled in surrounding code.
		* Do not suggest issues you cannot verify with available context and tools.
		</use_tools>
		Step 3: Verify and Assess Confidence
		- Use available tools when you need to verify concerns or gather additional context
		- Only include comments where you are at least 80% confident the issue is valid
		- When uncertain about an issue, use tools like `find_function_definition` or `expand_context` to verify before commenting
		- Do not suggest issues you cannot verify with available context

		<avoid>
		Avoid Comments That:
		* Refer to unmodified code (lines without a `+` prefix).
		* Ask for verification or confirmation (e.g., "Check if…", "Ensure that…").
		* Provide praise or restate obvious facts.
		* Focus on testing.
		* Point out issues that are already handled in the visible code.
		* Suggest problems based on assumptions without verifying the context.
		* Flag style preferences without clear `{target_code_consistency}` standard violations.
		</avoid>
		</instructions>
		Step 4: Sort and Order Comments
		- Sort comments by descending confidence and importance
		- Start with issues you are certain are valid and that are most critical
		- Assign each comment a numeric order starting at 1

		<output_format>
		{output_instructions}
		</output_format>
		Step 5: Write Clear, Constructive Comments
		- Use direct, declarative language - state the problem definitively, then suggest the fix
		- Keep comments short and specific
		- Use directive language: "Fix", "Remove", "Change", "Add"
		- NEVER use these banned phrases: "maybe", "might want to", "consider", "possibly", "could be", "you may want to"
		- Focus strictly on code-related concerns

		<examples>
		{comment_examples}
		{approved_examples}
		</examples>
		## What NOT to Include

		<context>
		Review Context:
		Target Software: {target_software}
		Bug Title: {bug_title}
		Patch Title: {patch_title}
		Source URL: {patch_url}
		</context>
		Do not write comments that:
		- Refer to unmodified code (lines without a `+` prefix)
		- Ask for verification or confirmation (e.g., "Check if...", "Ensure that...")
		- Provide praise or restate obvious facts
		- Focus on testing concerns
		- Point out issues that are already handled in the visible code
		- Suggest problems based on assumptions without verifying the context
		- Flag style preferences without clear coding standard violations
		"""


		FIRST_MESSAGE_TEMPLATE = """Here is a summary of the patch:

		<patch_summary>
		@@ -111,25 +92,16 @@ {patch_summarization}

		<patch>
		{patch}
		</patch>
		"""

		OUTPUT_FORMAT_JSON = """
		Respond only with a JSON list. Each object must contain the following fields:
		Here are examples of good code review comments to guide your style and approach:

		* `"file"`: The relative path to the file the comment applies to.
		* `"code_line"`: The number of the specific changed line of code that the comment refers to.
		* `"comment"`: A concise review comment.
		* `"explanation"`: A brief rationale for the comment, including how confident you are and why.
		* `"order"`: An integer representing the priority of the comment, with 1 being the highest confidence/importance.
		"""
		<examples>
		{comment_examples}
		{approved_examples}
		</examples>

		OUTPUT_FORMAT_TEXT = """
		Respond only with a plain text list with the following details:

		* `"filename"`: The relative path to the file the comment applies to.
		* `"line_number"`: The number of the specific changed line of code that the comment refers to.
		* `"comment"`: A concise review comment.
		Here is the patch you need to review:

		The format should be: filename:line_number "comment"
		<patch>
		{patch}
		</patch>
		"""
		@@ -136,0 +108,0 @@

+2

-1

bugbug/tools/core/llms.py

		@@ -47,3 +47,3 @@ # -- coding: utf-8 --
		def create_anthropic_llm(
		temperature=0.2, top_p=None, model_name="claude-sonnet-4-5-20250929"
		temperature=0.2, top_p=None, model_name="claude-sonnet-4-5-20250929", **kwargs
		):
		@@ -57,2 +57,3 @@ from langchain_anthropic import ChatAnthropic
		top_p=top_p,
		**kwargs,
		)
		@@ -59,0 +60,0 @@

+6

-0

bugbug/tools/core/platforms/base.py

		@@ -56,2 +56,8 @@ # -- coding: utf-8 --
		@abstractmethod
		def patch_description(self) -> str:
		"""Return the description of the patch."""
		...

		@property
		@abstractmethod
		def patch_url(self) -> str:
		@@ -58,0 +64,0 @@ """Return the URL of the patch."""

+6

-2

bugbug/tools/core/platforms/bugzilla.py

		@@ -180,3 +180,3 @@ # -- coding: utf-8 --
		def __init__(self, data: dict):
		self.metadata = data
		self._metadata = data

		@@ -201,4 +201,8 @@ @staticmethod

		@property
		def summary(self) -> str:
		return self._metadata["summary"]

		def to_md(self) -> str:
		"""Return a markdown representation of the bug."""
		return bug_dict_to_markdown(self.metadata)
		return bug_dict_to_markdown(self._metadata)

+36

-20

bugbug/tools/core/platforms/phabricator.py

		@@ -17,5 +17,6 @@ # -- coding: utf-8 --

		from bugbug import bugzilla, db, phabricator, utils
		from bugbug import db, phabricator, utils
		from bugbug.tools.core.data_types import InlineComment, ReviewRequest
		from bugbug.tools.core.platforms.base import Patch, ReviewData
		from bugbug.tools.core.platforms.bugzilla import Bug
		from bugbug.utils import get_secret
		@@ -134,3 +135,3 @@

		def _get_file(self, file_path: str, is_before_patch: bool) -> str:
		def _get_file_from_patch(self, file_path: str, is_before_patch: bool) -> str:
		for changeset in self._changesets:
		@@ -155,5 +156,29 @@ if changeset["fields"]["path"]["displayPath"] == file_path:

		def _get_file_from_repo(self, file_path: str, commit_hash: str) -> str:
		r = utils.get_session("hgmo").get(
		f"https://hg.mozilla.org/mozilla-unified/raw-file/{commit_hash}/{file_path}",
		headers={
		"User-Agent": utils.get_user_agent(),
		},
		)

		if r.status_code == 404:
		raise FileNotFoundError(
		f"File {file_path} not found in commit {commit_hash}"
		)

		r.raise_for_status()
		return r.text

		def get_old_file(self, file_path: str) -> str:
		return self._get_file(file_path, is_before_patch=True)
		if file_path.startswith("b/") or file_path.startswith("a/"):
		file_path = file_path[2:]

		try:
		return self._get_file_from_patch(file_path, is_before_patch=True)
		except FileNotFoundError:
		return self._get_file_from_repo(
		file_path, commit_hash=self.base_commit_hash
		)

		@cached_property
		@@ -257,14 +282,5 @@ def _changesets(self) -> list[dict]:
		@cached_property
		def _bug_metadata(self) -> dict \| None:
		id = self.bug_id
		bugs = bugzilla.get(id)
		def bug(self) -> Bug:
		return Bug.get(self.bug_id)

		if id not in bugs:
		logger.warning(
		"Bug %d not found in Bugzilla. This might be a private bug.", id
		)
		return None

		return bugs[id]

		@property
		@@ -274,10 +290,6 @@ def bug_id(self) -> int:

		@cached_property
		@property
		def bug_title(self) -> str:
		if not self._bug_metadata:
		# Use a placeholder when the bug metadata is not available
		return "--"
		return self.bug.summary

		return self._bug_metadata["summary"]

		@cached_property
		@@ -288,2 +300,6 @@ def patch_title(self) -> str:
		@property
		def patch_description(self) -> str:
		return self._revision_metadata["fields"].get("summary", "")

		@property
		def revision_id(self) -> int:
		@@ -290,0 +306,0 @@ return self._revision_metadata["id"]

+4

-0

bugbug/tools/core/platforms/swarm.py

		@@ -56,2 +56,6 @@ # -- coding: utf-8 --

		@property
		def patch_description(self) -> str:
		raise NotImplementedError

		@cached_property
		@@ -58,0 +62,0 @@ def bug_title(self) -> str:

+1

-1

PKG-INFO

		Metadata-Version: 2.1
		Name: bugbug
		Version: 0.0.614
		Version: 0.0.615
		Summary: ML tools for Mozilla projects
		@@ -5,0 +5,0 @@ Author: Marco Castelluccio

+9

-9

requirements.txt

		amqp==5.3.1
		beautifulsoup4==4.14.2
		beautifulsoup4==4.14.3
		boto3==1.41.2
		imbalanced-learn==0.14.0
		langchain==1.0.8
		langchain-anthropic==1.1.0
		langchain==1.2.0
		langchain-anthropic==1.3.0
		langchain-classic==1.0.0
		langchain-community==0.4.1
		langchain-google-genai==3.1.0
		langchain-google-genai==4.0.0
		langchain-mistralai==1.0.1
		langchain-openai==1.0.3
		langgraph==1.0.3
		langchain-openai==1.1.3
		langgraph==1.0.5
		libmozdata==0.2.12
		@@ -23,3 +23,3 @@ llama-cpp-python==0.2.90
		numpy==2.3.5
		orjson==3.11.4
		orjson==3.11.5
		ortools==9.14.6206
		@@ -31,3 +31,3 @@ pandas==2.3.3
		python-hglib==2.6.2
		qdrant-client==1.16.0
		qdrant-client==1.15.1
		ratelimit==2.2.1
		@@ -42,3 +42,3 @@ requests==2.32.5
		tabulate==0.9.0
		taskcluster==93.1.5
		taskcluster==94.1.1
		tenacity==9.1.2
		@@ -45,0 +45,0 @@ tqdm==4.67.1

+3

-2

scripts/code_review_tool_evaluator_report.py

		@@ -5,6 +5,7 @@ # %%

		from scripts.code_review_tool_evaluator import get_latest_evaluation_results_file
		import scripts.code_review_tool_evaluator as evaluator_script

		evaluation_results = pd.read_csv(
		get_latest_evaluation_results_file("../evaluation_results")
		# evaluator_script.get_latest_evaluation_results_file("../evaluation_results")
		evaluator_script.get_ongoing_evaluation_results_file("../evaluation_results")
		)
		@@ -11,0 +12,0 @@

+86

-30

scripts/code_review_tool_evaluator.py

		@@ -35,5 +35,5 @@ # -- coding: utf-8 --
		from bugbug.tools.core import llms
		from bugbug.tools.core.exceptions import ModelResultError
		from bugbug.vectordb import QdrantVectorDB

		code_review.TARGET_SOFTWARE = "Mozilla Firefox"
		VERBOSE_CODE_REVIEW = False
		@@ -225,3 +225,5 @@

		def get_tool_variants() -> list[tuple[str, code_review.CodeReviewTool]]:
		def get_tool_variants(
		variants: list[str],
		) -> list[tuple[str, code_review.CodeReviewTool]]:
		"""Returns a list of tool variants to evaluate.
		@@ -259,27 +261,31 @@

		tool_variants.append(
		(
		"Claude",
		code_review.CodeReviewTool(
		llm=llms.create_anthropic_llm(),
		function_search=function_search,
		review_comments_db=review_comments_db,
		suggestions_feedback_db=suggestions_feedback_db,
		verbose=VERBOSE_CODE_REVIEW,
		),
		if "claude" in variants:
		tool_variants.append(
		(
		"Claude",
		code_review.CodeReviewTool.create(
		function_search=function_search,
		review_comments_db=review_comments_db,
		suggestions_feedback_db=suggestions_feedback_db,
		verbose=VERBOSE_CODE_REVIEW,
		),
		)
		)
		)

		tool_variants.append(
		(
		"GPT",
		code_review.CodeReviewTool(
		llm=llms.create_openai_llm(),
		function_search=function_search,
		review_comments_db=review_comments_db,
		suggestions_feedback_db=suggestions_feedback_db,
		verbose=VERBOSE_CODE_REVIEW,
		),
		if "gpt" in variants:
		llm = llms.create_openai_llm()
		tool_variants.append(
		(
		"GPT",
		code_review.CodeReviewTool.create(
		llm=llm,
		summarization_llm=llm,
		filtering_llm=llm,
		function_search=function_search,
		review_comments_db=review_comments_db,
		suggestions_feedback_db=suggestions_feedback_db,
		verbose=VERBOSE_CODE_REVIEW,
		),
		)
		)
		)

		@@ -354,2 +360,22 @@ return tool_variants

		def get_ongoing_evaluation_results_file(results_dir: str \| None):
		import glob
		import os

		base_file = get_latest_evaluation_results_file(results_dir)
		files = [
		file
		for file in glob.glob("evaluation_results_*.csv", root_dir=results_dir)
		if "#" not in file and file > base_file
		]
		if not files:
		raise FileNotFoundError("No ongoing evaluation results file found.")

		latests_file = max(files)
		if results_dir:
		return os.path.join(results_dir, latests_file)

		return latests_file


		def main(args):
		@@ -361,7 +387,6 @@ review_platform = "phabricator"

		tool_variants = get_tool_variants()
		tool_variants = get_tool_variants(args.variants)

		evaluator = FeedbackEvaluator(args.evaluation_dataset)

		is_first_result = True
		result_file = os.path.join(
		@@ -371,6 +396,14 @@ args.results_dir,
		)
		evaluation_results_file = os.path.join(
		args.results_dir,
		f"evaluation_results_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.csv",
		)
		is_first_result = not os.path.exists(result_file)

		if is_first_result:
		evaluation_results_file = os.path.join(
		args.results_dir,
		f"evaluation_results_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.csv",
		)
		seen_patches = set()
		else:
		evaluation_results_file = get_ongoing_evaluation_results_file(args.results_dir)
		seen_patches = set(pd.read_csv(evaluation_results_file)["diff_id"].to_list())

		result_unique_columns = ["Review Request ID", "File", "Line", "Comment Number"]
		@@ -430,2 +463,14 @@ result_all_columns = result_unique_columns + [
		for review_request_id, review_request in selected_review_requests:
		if review_request_id in [227266, 233414]:
		print(
		f"Skipping Review Request ID {review_request_id} because it is known to cause issues."
		)
		continue

		if review_request.patch_id in seen_patches:
		print(
		f"Skipping Review Request ID {review_request_id} (Diff ID {review_request.patch_id}) because it was already evaluated."
		)
		continue

		print("---------------------------------------------------------")
		@@ -453,2 +498,5 @@ print(f"Review Request ID: {review_request_id}")
		continue
		except ModelResultError as e:
		print("Error while running the tool:", e)
		continue

		@@ -559,2 +607,10 @@ print_prettified_comments(comments)
		)
		parser.add_argument(
		"--variant",
		dest="variants",
		action="append",
		help="the variants to use, use multiple times for multiple variants",
		choices=["claude", "gpt"],
		required=True,
		)

		@@ -561,0 +617,0 @@ args = parser.parse_args()

+1

-1

VERSION

		@@ -1,1 +0,1 @@
		0.0.614
		0.0.615

-66

scripts/code_review_tool_runner.py

		# -- coding: utf-8 --
		# This Source Code Form is subject to the terms of the Mozilla Public
		# License, v. 2.0. If a copy of the MPL was not distributed with this file,
		# You can obtain one at http://mozilla.org/MPL/2.0/.

		import argparse
		import sys

		from bugbug.code_search.function_search import function_search_classes
		from bugbug.tools import code_review
		from bugbug.tools.core import llms
		from bugbug.vectordb import QdrantVectorDB


		def run(args) -> None:
		llm = llms.create_llm_from_args(args)

		function_search = (
		function_search_classes[args.function_search_type]()
		if args.function_search_type is not None
		else None
		)
		vector_db = QdrantVectorDB("diff_comments")
		review_comments_db = code_review.ReviewCommentsDB(vector_db)
		code_review_tool = code_review.CodeReviewTool(
		llm,
		function_search=function_search,
		review_comments_db=review_comments_db,
		show_patch_example=False,
		)

		review_data = code_review.review_data_classes[args.review_platform]()

		revision = review_data.get_review_request_by_id(args.review_request_id)
		patch = review_data.get_patch_by_id(revision.patch_id)

		print(patch)
		print(code_review_tool.run(patch))
		input()


		def parse_args(args):
		parser = argparse.ArgumentParser(
		formatter_class=argparse.ArgumentDefaultsHelpFormatter
		)
		parser.add_argument(
		"--review_platform",
		help="Review platform",
		choices=list(code_review.review_data_classes.keys()),
		)
		parser.add_argument(
		"--review_request_id",
		help="Review request ID",
		)
		llms.create_llm_to_args(parser)
		parser.add_argument(
		"--function_search_type",
		help="Function search tool",
		choices=list(function_search_classes.keys()),
		)
		return parser.parse_args(args)


		if __name__ == "__main__":
		args = parse_args(sys.argv[1:])
		run(args)

bugbug - npm Package Compare versions

Improved metrics

Worsened metrics