Latest Threat Research:SANDWORM_MODE: Shai-Hulud-Style npm Worm Hijacks CI Workflows and Poisons AI Toolchains.Details
Socket
Book a DemoInstallSign in
Socket

bugbug

Package Overview
Dependencies
Maintainers
2
Versions
543
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

bugbug - npm Package Compare versions

Comparing version
0.0.614
to
0.0.615
+1
-1
bugbug.egg-info/PKG-INFO
Metadata-Version: 2.1
Name: bugbug
Version: 0.0.614
Version: 0.0.615
Summary: ML tools for Mozilla projects

@@ -5,0 +5,0 @@ Author: Marco Castelluccio

amqp==5.3.1
beautifulsoup4==4.14.2
beautifulsoup4==4.14.3
boto3==1.41.2
imbalanced-learn==0.14.0
langchain-anthropic==1.1.0
langchain-anthropic==1.3.0
langchain-classic==1.0.0
langchain-community==0.4.1
langchain-google-genai==3.1.0
langchain-google-genai==4.0.0
langchain-mistralai==1.0.1
langchain-openai==1.0.3
langchain==1.0.8
langgraph==1.0.3
langchain-openai==1.1.3
langchain==1.2.0
langgraph==1.0.5
libmozdata==0.2.12

@@ -23,3 +23,3 @@ llama-cpp-python==0.2.90

numpy==2.3.5
orjson==3.11.4
orjson==3.11.5
ortools==9.14.6206

@@ -31,3 +31,3 @@ pandas==2.3.3

python-hglib==2.6.2
qdrant-client==1.16.0
qdrant-client==1.15.1
ratelimit==2.2.1

@@ -42,3 +42,3 @@ requests-html==0.10.0

tabulate==0.9.0
taskcluster==93.1.5
taskcluster==94.1.1
tenacity==9.1.2

@@ -45,0 +45,0 @@ tqdm==4.67.1

@@ -112,3 +112,2 @@ LICENSE

scripts/code_review_tool_evaluator_report.py
scripts/code_review_tool_runner.py
scripts/comment_level_labeler.py

@@ -115,0 +114,0 @@ scripts/comment_resolver_runner.py

@@ -238,2 +238,9 @@ # -*- coding: utf-8 -*-

)
if source is None:
logger.warning(
"Could not extract source for %s:%d",
definition["path"],
definition["start"],
)
continue
result.append(

@@ -240,0 +247,0 @@ Function(

@@ -941,3 +941,6 @@ # -*- coding: utf-8 -*-

# Collect head files.
head_files = data.get("DEFAULT", {}).get("head", "").split(" ")
head_value = data.get("DEFAULT", {}).get("head", [])
head_files = (
head_value if isinstance(head_value, list) else head_value.split(" ")
)
for head_file in head_files:

@@ -944,0 +947,0 @@ if not head_file.strip():

@@ -18,3 +18,3 @@ # -*- coding: utf-8 -*-

# Agent
from bugbug.tools.code_review.agent import TARGET_SOFTWARE, CodeReviewTool
from bugbug.tools.code_review.agent import CodeReviewTool

@@ -58,3 +58,2 @@ # Databases

"CodeReviewTool",
"TARGET_SOFTWARE",
# Databases

@@ -61,0 +60,0 @@ "EvaluationAction",

@@ -12,5 +12,6 @@ # -*- coding: utf-8 -*-

from logging import getLogger
from typing import Iterable, Literal, Optional
from typing import Iterable, Optional
from langchain.agents import create_agent
from langchain.agents.structured_output import ProviderStrategy
from langchain.chat_models import BaseChatModel

@@ -21,2 +22,3 @@ from langchain.messages import HumanMessage

from langgraph.errors import GraphRecursionError
from pydantic import BaseModel, Field
from unidiff import PatchSet

@@ -34,8 +36,7 @@

DEFAULT_REJECTED_EXAMPLES,
OUTPUT_FORMAT_JSON,
OUTPUT_FORMAT_TEXT,
FIRST_MESSAGE_TEMPLATE,
PROMPT_TEMPLATE_FILTERING_ANALYSIS,
PROMPT_TEMPLATE_REVIEW,
PROMPT_TEMPLATE_SUMMARIZATION,
STATIC_COMMENT_EXAMPLES,
SYSTEM_PROMPT_TEMPLATE,
TEMPLATE_COMMENT_EXAMPLE,

@@ -47,3 +48,2 @@ TEMPLATE_PATCH_FROM_HUNK,

generate_processed_output,
parse_model_output,
)

@@ -57,6 +57,25 @@ from bugbug.tools.core.data_types import InlineComment

# Global variable for target software
TARGET_SOFTWARE: str | None = None
class GeneratedReviewComment(BaseModel):
"""A review comment generated by the code review agent."""
file: str = Field(description="The path to the file the comment applies to.")
code_line: int = Field(description="The line number that the comment refers to.")
comment: str = Field(description="The review comment.")
explanation: str = Field(
description="A brief rationale for the comment, including how confident you are and why."
)
order: int = Field(
description="An integer representing the priority of the comment, with 1 being the highest confidence/importance."
)
class AgentResponse(BaseModel):
"""The response from the code review agent."""
comments: list[GeneratedReviewComment] = Field(
description="A list of generated review comments."
)
class CodeReviewTool(GenerativeModelTool):

@@ -68,2 +87,4 @@ version = "0.0.1"

llm: BaseChatModel,
summarization_llm: BaseChatModel,
filtering_llm: BaseChatModel,
function_search: Optional[FunctionSearch] = None,

@@ -74,7 +95,7 @@ review_comments_db: Optional["ReviewCommentsDB"] = None,

suggestions_feedback_db: Optional["SuggestionsFeedbackDB"] = None,
target_software: Optional[str] = None,
target_software: str = "Mozilla Firefox",
) -> None:
super().__init__()
self.target_software = target_software or TARGET_SOFTWARE
self.target_software = target_software

@@ -97,14 +118,10 @@ self._tokenizer = get_tokenizer(

experience_scope = (
f"the {self.target_software} source code"
if self.target_software
else "a software project"
)
self.summarization_chain = LLMChain(
prompt=PromptTemplate.from_template(
PROMPT_TEMPLATE_SUMMARIZATION,
partial_variables={"experience_scope": experience_scope},
partial_variables={
"experience_scope": f"the {self.target_software} source code"
},
),
llm=llm,
llm=summarization_llm,
verbose=verbose,

@@ -115,7 +132,5 @@ )

PROMPT_TEMPLATE_FILTERING_ANALYSIS,
partial_variables={
"target_code_consistency": self.target_software or "rest of the"
},
partial_variables={"target_code_consistency": self.target_software},
),
llm=llm,
llm=filtering_llm,
verbose=verbose,

@@ -131,3 +146,6 @@ )

tools,
system_prompt=f"You are an expert reviewer for {experience_scope}, with experience on source code reviews.",
system_prompt=SYSTEM_PROMPT_TEMPLATE.format(
target_software=self.target_software,
),
response_format=ProviderStrategy(AgentResponse),
)

@@ -143,8 +161,25 @@

@staticmethod
def create(
llm=None, summarization_llm=None, filtering_llm=None, **kwargs
) -> "CodeReviewTool":
from bugbug.tools.core.llms import create_anthropic_llm
return CodeReviewTool(
llm=llm
or create_anthropic_llm(
model_name="claude-opus-4-5-20251101",
max_tokens=40_000,
temperature=None,
thinking={"type": "enabled", "budget_tokens": 10_000},
),
summarization_llm=summarization_llm or create_anthropic_llm(),
filtering_llm=filtering_llm or create_anthropic_llm(),
**kwargs,
)
def count_tokens(self, text):
return len(self._tokenizer.encode(text))
def generate_initial_prompt(
self, patch: Patch, output_format: Literal["JSON", "TEXT"] = "JSON"
) -> str:
def generate_initial_prompt(self, patch: Patch) -> str:
formatted_patch = format_patch_set(patch.patch_set)

@@ -157,2 +192,3 @@

"patch_title": patch.patch_title,
"patch_description": patch.patch_description,
},

@@ -165,13 +201,4 @@ return_only_outputs=True,

if output_format == "JSON":
output_instructions = OUTPUT_FORMAT_JSON
elif output_format == "TEXT":
output_instructions = OUTPUT_FORMAT_TEXT
else:
raise ValueError(
f"Unsupported output format: {output_format}, choose JSON or TEXT"
)
created_before = patch.date_created if self.is_experiment_env else None
return PROMPT_TEMPLATE_REVIEW.format(
return FIRST_MESSAGE_TEMPLATE.format(
patch=formatted_patch,

@@ -181,11 +208,5 @@ patch_summarization=output_summarization,

approved_examples=self._get_generated_examples(patch, created_before),
target_code_consistency=self.target_software or "rest of the",
output_instructions=output_instructions,
bug_title=patch.bug_title,
patch_title=patch.patch_title,
patch_url=patch.patch_url,
target_software=self.target_software,
)
def _generate_suggestions(self, patch: Patch):
def _generate_suggestions(self, patch: Patch) -> list[GeneratedReviewComment]:
try:

@@ -200,2 +221,3 @@ for chunk in self.agent.stream(

stream_mode="values",
config={"recursion_limit": 50},
):

@@ -206,3 +228,3 @@ result = chunk

return result["messages"][-1].content
return result["structured_response"].comments

@@ -213,5 +235,3 @@ def run(self, patch: Patch) -> list[InlineComment] | None:

output = self._generate_suggestions(patch)
unfiltered_suggestions = parse_model_output(output)
unfiltered_suggestions = self._generate_suggestions(patch)
if not unfiltered_suggestions:

@@ -229,3 +249,5 @@ logger.info("No suggestions were generated")

{
"comments": output,
"comments": str(
[comment.model_dump() for comment in unfiltered_suggestions]
),
"rejected_examples": rejected_examples,

@@ -320,3 +342,5 @@ },

def get_similar_rejected_comments(self, suggestions) -> Iterable[str]:
def get_similar_rejected_comments(
self, suggestions: list[GeneratedReviewComment]
) -> Iterable[str]:
if not self.suggestions_feedback_db:

@@ -331,3 +355,3 @@ raise Exception("Suggestions feedback database is not available")

self.suggestions_feedback_db.find_similar_rejected_suggestions(
suggestion["comment"],
suggestion.comment,
limit=num_examples_per_suggestion,

@@ -334,0 +358,0 @@ excluded_ids=seen_ids,

@@ -23,20 +23,27 @@ # -*- coding: utf-8 -*-

@tool
def expand_context(file_path: str, line_number: int) -> str:
"""Expand the context around a specific line in a file diff.
def expand_context(file_path: str, start_line: int, end_line: int) -> str:
"""Show the content of a file between specified line numbers as it is before the patch.
Be careful to not fill your context window with too much data. Request the
minimum amount of context necessary to understand the code, but do not split
what you really need into multiple requests if the line range is continuous.
Args:
file_path: The path to the file.
line_number: The line number to expand context around. It should be based on the original file, not the patch.
start_line: The starting line number in the original file. Minimum is 1.
end_line: The ending line number in the original file. Maximum is the total number of lines in the file.
Returns:
Lines of code around the specified line number.
The content of the file between the specified line numbers.
"""
runtime = get_runtime(CodeReviewContext)
file_content = runtime.context.patch.get_old_file(file_path)
# TODO: Expanding the context using an AST parser like tree-sitter to
# include the whole function or class when it is relatively small.
try:
file_content = runtime.context.patch.get_old_file(file_path)
except FileNotFoundError:
return "File not found in the repository before the patch."
lines = file_content.splitlines()
start = max(0, line_number - 20)
end = min(len(lines), line_number + 20)
start = max(1, start_line) - 1
end = min(len(lines), end_line)

@@ -43,0 +50,0 @@ # Format the output with line numbers that match the original file.

@@ -16,3 +16,3 @@ # -*- coding: utf-8 -*-

1. **Intent**: Describe the intent of the changes, what they are trying to achieve, and how they relate to the bug or feature request.
2. **Structure**: Describe the structure of the changes, including any new functions, classes, or modules introduced, and how they fit into the existing codebase.
2. **Solution**: Describe the solution implemented in the code changes, focusing on how the changes address the intent.

@@ -22,86 +22,67 @@ Do not include any code in the summarization, only a description of the changes.

**Bug title**:
<bug_title>
{bug_title}
</bug_title>
**Commit message**:
<commit_message>
{patch_title}
{patch_description}
</commit_message>
**Diff**:
{patch}"""
<patch>
{patch}
</patch>"""
PROMPT_TEMPLATE_REVIEW = """<task>
Generate high-quality code review comments for the patch provided below.
</task>
SYSTEM_PROMPT_TEMPLATE = """You are an expert {target_software} engineer tasked with analyzing a pull request and providing high-quality review comments. You will examine a code patch and generate constructive feedback focusing on potential issues in the changed code.
<instructions>
<analyze_changes>
**Analyze the Changes**:
* Understand the intent and structure of the changes in the patch.
* Use the provided summarization for context, but prioritize what's visible in the diff.
</analyze_changes>
## Instructions
<identify_issues>
**Identify Issues**:
* Detect bugs, logical errors, performance concerns, security issues, or violations of the `{target_code_consistency}` coding standards.
* Focus only on **new or changed lines** (lines beginning with `+`).
* **Prioritize**: Security vulnerabilities > Functional bugs > Performance issues > Style/readability concerns.
</identify_issues>
Follow this systematic approach to review the patch:
<assess_confidence>
**Assess Confidence and Order**:
* **Only include comments where you are at least 80% confident the issue is valid**.
* **Sort the comments by descending confidence and importance**:
* Start with issues you are **certain are valid**.
* Also, prioritize important issues that you are **confident about**.
* Follow with issues that are **plausible but uncertain** (possible false positives).
* **When uncertain, use available tools to verify before commenting**.
* Assign each comment a numeric `order`, starting at 1.
</assess_confidence>
**Step 1: Analyze the Changes**
- Understand what the patch is trying to accomplish
- Use the patch summary for context, but focus primarily on what you can see in the actual diff
- Identify the intent and structure of the changes
<write_comments>
**Write Clear, Constructive Comments**:
* Use **direct, declarative language**. State the problem definitively, then suggest the fix.
* Keep comments **short and specific**.
* Focus strictly on code-related concerns.
* **Banned phrases**: "maybe", "might want to", "consider", "possibly", "could be", "you may want to".
* **Use directive language**: "Fix", "Remove", "Change", "Add", "Validate", "Check" (not "Consider checking").
* Avoid repeating what the code is doing unless it supports your critique.
</write_comments>
**Step 2: Identify Issues**
- Look for bugs, logical errors, performance problems, security vulnerabilities, or violations of the coding standards
- Focus ONLY on new or changed lines (lines that begin with `+`)
- Never comment on unmodified code
- Prioritize issues in this order: Security vulnerabilities > Functional bugs > Performance issues > Style/readability concerns
<use_tools>
**Use available tools to verify concerns**:
* Use tools to gather context when you suspect an issue but need verification.
* Use `find_function_definition` to check if error handling or validation exists elsewhere.
* Use `expand_context` to see if edge cases are handled in surrounding code.
* **Do not suggest issues you cannot verify with available context and tools**.
</use_tools>
**Step 3: Verify and Assess Confidence**
- Use available tools when you need to verify concerns or gather additional context
- Only include comments where you are at least 80% confident the issue is valid
- When uncertain about an issue, use tools like `find_function_definition` or `expand_context` to verify before commenting
- Do not suggest issues you cannot verify with available context
<avoid>
**Avoid Comments That**:
* Refer to unmodified code (lines without a `+` prefix).
* Ask for verification or confirmation (e.g., "Check if…", "Ensure that…").
* Provide praise or restate obvious facts.
* Focus on testing.
* Point out issues that are already handled in the visible code.
* Suggest problems based on assumptions without verifying the context.
* Flag style preferences without clear `{target_code_consistency}` standard violations.
</avoid>
</instructions>
**Step 4: Sort and Order Comments**
- Sort comments by descending confidence and importance
- Start with issues you are certain are valid and that are most critical
- Assign each comment a numeric order starting at 1
<output_format>
{output_instructions}
</output_format>
**Step 5: Write Clear, Constructive Comments**
- Use direct, declarative language - state the problem definitively, then suggest the fix
- Keep comments short and specific
- Use directive language: "Fix", "Remove", "Change", "Add"
- NEVER use these banned phrases: "maybe", "might want to", "consider", "possibly", "could be", "you may want to"
- Focus strictly on code-related concerns
<examples>
{comment_examples}
{approved_examples}
</examples>
## What NOT to Include
<context>
**Review Context**:
Target Software: {target_software}
Bug Title: {bug_title}
Patch Title: {patch_title}
Source URL: {patch_url}
</context>
Do not write comments that:
- Refer to unmodified code (lines without a `+` prefix)
- Ask for verification or confirmation (e.g., "Check if...", "Ensure that...")
- Provide praise or restate obvious facts
- Focus on testing concerns
- Point out issues that are already handled in the visible code
- Suggest problems based on assumptions without verifying the context
- Flag style preferences without clear coding standard violations
"""
FIRST_MESSAGE_TEMPLATE = """Here is a summary of the patch:
<patch_summary>

@@ -111,25 +92,16 @@ {patch_summarization}

<patch>
{patch}
</patch>
"""
OUTPUT_FORMAT_JSON = """
Respond only with a **JSON list**. Each object must contain the following fields:
Here are examples of good code review comments to guide your style and approach:
* `"file"`: The relative path to the file the comment applies to.
* `"code_line"`: The number of the specific changed line of code that the comment refers to.
* `"comment"`: A concise review comment.
* `"explanation"`: A brief rationale for the comment, including how confident you are and why.
* `"order"`: An integer representing the priority of the comment, with 1 being the highest confidence/importance.
"""
<examples>
{comment_examples}
{approved_examples}
</examples>
OUTPUT_FORMAT_TEXT = """
Respond only with a **plain text list** with the following details:
* `"filename"`: The relative path to the file the comment applies to.
* `"line_number"`: The number of the specific changed line of code that the comment refers to.
* `"comment"`: A concise review comment.
Here is the patch you need to review:
The format should be: filename:line_number "comment"
<patch>
{patch}
</patch>
"""

@@ -136,0 +108,0 @@

@@ -47,3 +47,3 @@ # -*- coding: utf-8 -*-

def create_anthropic_llm(
temperature=0.2, top_p=None, model_name="claude-sonnet-4-5-20250929"
temperature=0.2, top_p=None, model_name="claude-sonnet-4-5-20250929", **kwargs
):

@@ -57,2 +57,3 @@ from langchain_anthropic import ChatAnthropic

top_p=top_p,
**kwargs,
)

@@ -59,0 +60,0 @@

@@ -56,2 +56,8 @@ # -*- coding: utf-8 -*-

@abstractmethod
def patch_description(self) -> str:
"""Return the description of the patch."""
...
@property
@abstractmethod
def patch_url(self) -> str:

@@ -58,0 +64,0 @@ """Return the URL of the patch."""

@@ -180,3 +180,3 @@ # -*- coding: utf-8 -*-

def __init__(self, data: dict):
self.metadata = data
self._metadata = data

@@ -201,4 +201,8 @@ @staticmethod

@property
def summary(self) -> str:
return self._metadata["summary"]
def to_md(self) -> str:
"""Return a markdown representation of the bug."""
return bug_dict_to_markdown(self.metadata)
return bug_dict_to_markdown(self._metadata)

@@ -17,5 +17,6 @@ # -*- coding: utf-8 -*-

from bugbug import bugzilla, db, phabricator, utils
from bugbug import db, phabricator, utils
from bugbug.tools.core.data_types import InlineComment, ReviewRequest
from bugbug.tools.core.platforms.base import Patch, ReviewData
from bugbug.tools.core.platforms.bugzilla import Bug
from bugbug.utils import get_secret

@@ -134,3 +135,3 @@

def _get_file(self, file_path: str, is_before_patch: bool) -> str:
def _get_file_from_patch(self, file_path: str, is_before_patch: bool) -> str:
for changeset in self._changesets:

@@ -155,5 +156,29 @@ if changeset["fields"]["path"]["displayPath"] == file_path:

def _get_file_from_repo(self, file_path: str, commit_hash: str) -> str:
r = utils.get_session("hgmo").get(
f"https://hg.mozilla.org/mozilla-unified/raw-file/{commit_hash}/{file_path}",
headers={
"User-Agent": utils.get_user_agent(),
},
)
if r.status_code == 404:
raise FileNotFoundError(
f"File {file_path} not found in commit {commit_hash}"
)
r.raise_for_status()
return r.text
def get_old_file(self, file_path: str) -> str:
return self._get_file(file_path, is_before_patch=True)
if file_path.startswith("b/") or file_path.startswith("a/"):
file_path = file_path[2:]
try:
return self._get_file_from_patch(file_path, is_before_patch=True)
except FileNotFoundError:
return self._get_file_from_repo(
file_path, commit_hash=self.base_commit_hash
)
@cached_property

@@ -257,14 +282,5 @@ def _changesets(self) -> list[dict]:

@cached_property
def _bug_metadata(self) -> dict | None:
id = self.bug_id
bugs = bugzilla.get(id)
def bug(self) -> Bug:
return Bug.get(self.bug_id)
if id not in bugs:
logger.warning(
"Bug %d not found in Bugzilla. This might be a private bug.", id
)
return None
return bugs[id]
@property

@@ -274,10 +290,6 @@ def bug_id(self) -> int:

@cached_property
@property
def bug_title(self) -> str:
if not self._bug_metadata:
# Use a placeholder when the bug metadata is not available
return "--"
return self.bug.summary
return self._bug_metadata["summary"]
@cached_property

@@ -288,2 +300,6 @@ def patch_title(self) -> str:

@property
def patch_description(self) -> str:
return self._revision_metadata["fields"].get("summary", "")
@property
def revision_id(self) -> int:

@@ -290,0 +306,0 @@ return self._revision_metadata["id"]

@@ -56,2 +56,6 @@ # -*- coding: utf-8 -*-

@property
def patch_description(self) -> str:
raise NotImplementedError
@cached_property

@@ -58,0 +62,0 @@ def bug_title(self) -> str:

Metadata-Version: 2.1
Name: bugbug
Version: 0.0.614
Version: 0.0.615
Summary: ML tools for Mozilla projects

@@ -5,0 +5,0 @@ Author: Marco Castelluccio

amqp==5.3.1
beautifulsoup4==4.14.2
beautifulsoup4==4.14.3
boto3==1.41.2
imbalanced-learn==0.14.0
langchain==1.0.8
langchain-anthropic==1.1.0
langchain==1.2.0
langchain-anthropic==1.3.0
langchain-classic==1.0.0
langchain-community==0.4.1
langchain-google-genai==3.1.0
langchain-google-genai==4.0.0
langchain-mistralai==1.0.1
langchain-openai==1.0.3
langgraph==1.0.3
langchain-openai==1.1.3
langgraph==1.0.5
libmozdata==0.2.12

@@ -23,3 +23,3 @@ llama-cpp-python==0.2.90

numpy==2.3.5
orjson==3.11.4
orjson==3.11.5
ortools==9.14.6206

@@ -31,3 +31,3 @@ pandas==2.3.3

python-hglib==2.6.2
qdrant-client==1.16.0
qdrant-client==1.15.1
ratelimit==2.2.1

@@ -42,3 +42,3 @@ requests==2.32.5

tabulate==0.9.0
taskcluster==93.1.5
taskcluster==94.1.1
tenacity==9.1.2

@@ -45,0 +45,0 @@ tqdm==4.67.1

@@ -5,6 +5,7 @@ # %%

from scripts.code_review_tool_evaluator import get_latest_evaluation_results_file
import scripts.code_review_tool_evaluator as evaluator_script
evaluation_results = pd.read_csv(
get_latest_evaluation_results_file("../evaluation_results")
# evaluator_script.get_latest_evaluation_results_file("../evaluation_results")
evaluator_script.get_ongoing_evaluation_results_file("../evaluation_results")
)

@@ -11,0 +12,0 @@

@@ -35,5 +35,5 @@ # -*- coding: utf-8 -*-

from bugbug.tools.core import llms
from bugbug.tools.core.exceptions import ModelResultError
from bugbug.vectordb import QdrantVectorDB
code_review.TARGET_SOFTWARE = "Mozilla Firefox"
VERBOSE_CODE_REVIEW = False

@@ -225,3 +225,5 @@

def get_tool_variants() -> list[tuple[str, code_review.CodeReviewTool]]:
def get_tool_variants(
variants: list[str],
) -> list[tuple[str, code_review.CodeReviewTool]]:
"""Returns a list of tool variants to evaluate.

@@ -259,27 +261,31 @@

tool_variants.append(
(
"Claude",
code_review.CodeReviewTool(
llm=llms.create_anthropic_llm(),
function_search=function_search,
review_comments_db=review_comments_db,
suggestions_feedback_db=suggestions_feedback_db,
verbose=VERBOSE_CODE_REVIEW,
),
if "claude" in variants:
tool_variants.append(
(
"Claude",
code_review.CodeReviewTool.create(
function_search=function_search,
review_comments_db=review_comments_db,
suggestions_feedback_db=suggestions_feedback_db,
verbose=VERBOSE_CODE_REVIEW,
),
)
)
)
tool_variants.append(
(
"GPT",
code_review.CodeReviewTool(
llm=llms.create_openai_llm(),
function_search=function_search,
review_comments_db=review_comments_db,
suggestions_feedback_db=suggestions_feedback_db,
verbose=VERBOSE_CODE_REVIEW,
),
if "gpt" in variants:
llm = llms.create_openai_llm()
tool_variants.append(
(
"GPT",
code_review.CodeReviewTool.create(
llm=llm,
summarization_llm=llm,
filtering_llm=llm,
function_search=function_search,
review_comments_db=review_comments_db,
suggestions_feedback_db=suggestions_feedback_db,
verbose=VERBOSE_CODE_REVIEW,
),
)
)
)

@@ -354,2 +360,22 @@ return tool_variants

def get_ongoing_evaluation_results_file(results_dir: str | None):
import glob
import os
base_file = get_latest_evaluation_results_file(results_dir)
files = [
file
for file in glob.glob("evaluation_results_*.csv", root_dir=results_dir)
if "#" not in file and file > base_file
]
if not files:
raise FileNotFoundError("No ongoing evaluation results file found.")
latests_file = max(files)
if results_dir:
return os.path.join(results_dir, latests_file)
return latests_file
def main(args):

@@ -361,7 +387,6 @@ review_platform = "phabricator"

tool_variants = get_tool_variants()
tool_variants = get_tool_variants(args.variants)
evaluator = FeedbackEvaluator(args.evaluation_dataset)
is_first_result = True
result_file = os.path.join(

@@ -371,6 +396,14 @@ args.results_dir,

)
evaluation_results_file = os.path.join(
args.results_dir,
f"evaluation_results_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.csv",
)
is_first_result = not os.path.exists(result_file)
if is_first_result:
evaluation_results_file = os.path.join(
args.results_dir,
f"evaluation_results_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.csv",
)
seen_patches = set()
else:
evaluation_results_file = get_ongoing_evaluation_results_file(args.results_dir)
seen_patches = set(pd.read_csv(evaluation_results_file)["diff_id"].to_list())
result_unique_columns = ["Review Request ID", "File", "Line", "Comment Number"]

@@ -430,2 +463,14 @@ result_all_columns = result_unique_columns + [

for review_request_id, review_request in selected_review_requests:
if review_request_id in [227266, 233414]:
print(
f"Skipping Review Request ID {review_request_id} because it is known to cause issues."
)
continue
if review_request.patch_id in seen_patches:
print(
f"Skipping Review Request ID {review_request_id} (Diff ID {review_request.patch_id}) because it was already evaluated."
)
continue
print("---------------------------------------------------------")

@@ -453,2 +498,5 @@ print(f"Review Request ID: {review_request_id}")

continue
except ModelResultError as e:
print("Error while running the tool:", e)
continue

@@ -559,2 +607,10 @@ print_prettified_comments(comments)

)
parser.add_argument(
"--variant",
dest="variants",
action="append",
help="the variants to use, use multiple times for multiple variants",
choices=["claude", "gpt"],
required=True,
)

@@ -561,0 +617,0 @@ args = parser.parse_args()

@@ -1,1 +0,1 @@

0.0.614
0.0.615
# -*- coding: utf-8 -*-
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
import argparse
import sys
from bugbug.code_search.function_search import function_search_classes
from bugbug.tools import code_review
from bugbug.tools.core import llms
from bugbug.vectordb import QdrantVectorDB
def run(args) -> None:
llm = llms.create_llm_from_args(args)
function_search = (
function_search_classes[args.function_search_type]()
if args.function_search_type is not None
else None
)
vector_db = QdrantVectorDB("diff_comments")
review_comments_db = code_review.ReviewCommentsDB(vector_db)
code_review_tool = code_review.CodeReviewTool(
llm,
function_search=function_search,
review_comments_db=review_comments_db,
show_patch_example=False,
)
review_data = code_review.review_data_classes[args.review_platform]()
revision = review_data.get_review_request_by_id(args.review_request_id)
patch = review_data.get_patch_by_id(revision.patch_id)
print(patch)
print(code_review_tool.run(patch))
input()
def parse_args(args):
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument(
"--review_platform",
help="Review platform",
choices=list(code_review.review_data_classes.keys()),
)
parser.add_argument(
"--review_request_id",
help="Review request ID",
)
llms.create_llm_to_args(parser)
parser.add_argument(
"--function_search_type",
help="Function search tool",
choices=list(function_search_classes.keys()),
)
return parser.parse_args(args)
if __name__ == "__main__":
args = parse_args(sys.argv[1:])
run(args)