bugbug
Advanced tools
| Metadata-Version: 2.1 | ||
| Name: bugbug | ||
| Version: 0.0.614 | ||
| Version: 0.0.615 | ||
| Summary: ML tools for Mozilla projects | ||
@@ -5,0 +5,0 @@ Author: Marco Castelluccio |
| amqp==5.3.1 | ||
| beautifulsoup4==4.14.2 | ||
| beautifulsoup4==4.14.3 | ||
| boto3==1.41.2 | ||
| imbalanced-learn==0.14.0 | ||
| langchain-anthropic==1.1.0 | ||
| langchain-anthropic==1.3.0 | ||
| langchain-classic==1.0.0 | ||
| langchain-community==0.4.1 | ||
| langchain-google-genai==3.1.0 | ||
| langchain-google-genai==4.0.0 | ||
| langchain-mistralai==1.0.1 | ||
| langchain-openai==1.0.3 | ||
| langchain==1.0.8 | ||
| langgraph==1.0.3 | ||
| langchain-openai==1.1.3 | ||
| langchain==1.2.0 | ||
| langgraph==1.0.5 | ||
| libmozdata==0.2.12 | ||
@@ -23,3 +23,3 @@ llama-cpp-python==0.2.90 | ||
| numpy==2.3.5 | ||
| orjson==3.11.4 | ||
| orjson==3.11.5 | ||
| ortools==9.14.6206 | ||
@@ -31,3 +31,3 @@ pandas==2.3.3 | ||
| python-hglib==2.6.2 | ||
| qdrant-client==1.16.0 | ||
| qdrant-client==1.15.1 | ||
| ratelimit==2.2.1 | ||
@@ -42,3 +42,3 @@ requests-html==0.10.0 | ||
| tabulate==0.9.0 | ||
| taskcluster==93.1.5 | ||
| taskcluster==94.1.1 | ||
| tenacity==9.1.2 | ||
@@ -45,0 +45,0 @@ tqdm==4.67.1 |
@@ -112,3 +112,2 @@ LICENSE | ||
| scripts/code_review_tool_evaluator_report.py | ||
| scripts/code_review_tool_runner.py | ||
| scripts/comment_level_labeler.py | ||
@@ -115,0 +114,0 @@ scripts/comment_resolver_runner.py |
@@ -238,2 +238,9 @@ # -*- coding: utf-8 -*- | ||
| ) | ||
| if source is None: | ||
| logger.warning( | ||
| "Could not extract source for %s:%d", | ||
| definition["path"], | ||
| definition["start"], | ||
| ) | ||
| continue | ||
| result.append( | ||
@@ -240,0 +247,0 @@ Function( |
@@ -941,3 +941,6 @@ # -*- coding: utf-8 -*- | ||
| # Collect head files. | ||
| head_files = data.get("DEFAULT", {}).get("head", "").split(" ") | ||
| head_value = data.get("DEFAULT", {}).get("head", []) | ||
| head_files = ( | ||
| head_value if isinstance(head_value, list) else head_value.split(" ") | ||
| ) | ||
| for head_file in head_files: | ||
@@ -944,0 +947,0 @@ if not head_file.strip(): |
@@ -18,3 +18,3 @@ # -*- coding: utf-8 -*- | ||
| # Agent | ||
| from bugbug.tools.code_review.agent import TARGET_SOFTWARE, CodeReviewTool | ||
| from bugbug.tools.code_review.agent import CodeReviewTool | ||
@@ -58,3 +58,2 @@ # Databases | ||
| "CodeReviewTool", | ||
| "TARGET_SOFTWARE", | ||
| # Databases | ||
@@ -61,0 +60,0 @@ "EvaluationAction", |
@@ -12,5 +12,6 @@ # -*- coding: utf-8 -*- | ||
| from logging import getLogger | ||
| from typing import Iterable, Literal, Optional | ||
| from typing import Iterable, Optional | ||
| from langchain.agents import create_agent | ||
| from langchain.agents.structured_output import ProviderStrategy | ||
| from langchain.chat_models import BaseChatModel | ||
@@ -21,2 +22,3 @@ from langchain.messages import HumanMessage | ||
| from langgraph.errors import GraphRecursionError | ||
| from pydantic import BaseModel, Field | ||
| from unidiff import PatchSet | ||
@@ -34,8 +36,7 @@ | ||
| DEFAULT_REJECTED_EXAMPLES, | ||
| OUTPUT_FORMAT_JSON, | ||
| OUTPUT_FORMAT_TEXT, | ||
| FIRST_MESSAGE_TEMPLATE, | ||
| PROMPT_TEMPLATE_FILTERING_ANALYSIS, | ||
| PROMPT_TEMPLATE_REVIEW, | ||
| PROMPT_TEMPLATE_SUMMARIZATION, | ||
| STATIC_COMMENT_EXAMPLES, | ||
| SYSTEM_PROMPT_TEMPLATE, | ||
| TEMPLATE_COMMENT_EXAMPLE, | ||
@@ -47,3 +48,2 @@ TEMPLATE_PATCH_FROM_HUNK, | ||
| generate_processed_output, | ||
| parse_model_output, | ||
| ) | ||
@@ -57,6 +57,25 @@ from bugbug.tools.core.data_types import InlineComment | ||
| # Global variable for target software | ||
| TARGET_SOFTWARE: str | None = None | ||
| class GeneratedReviewComment(BaseModel): | ||
| """A review comment generated by the code review agent.""" | ||
| file: str = Field(description="The path to the file the comment applies to.") | ||
| code_line: int = Field(description="The line number that the comment refers to.") | ||
| comment: str = Field(description="The review comment.") | ||
| explanation: str = Field( | ||
| description="A brief rationale for the comment, including how confident you are and why." | ||
| ) | ||
| order: int = Field( | ||
| description="An integer representing the priority of the comment, with 1 being the highest confidence/importance." | ||
| ) | ||
| class AgentResponse(BaseModel): | ||
| """The response from the code review agent.""" | ||
| comments: list[GeneratedReviewComment] = Field( | ||
| description="A list of generated review comments." | ||
| ) | ||
| class CodeReviewTool(GenerativeModelTool): | ||
@@ -68,2 +87,4 @@ version = "0.0.1" | ||
| llm: BaseChatModel, | ||
| summarization_llm: BaseChatModel, | ||
| filtering_llm: BaseChatModel, | ||
| function_search: Optional[FunctionSearch] = None, | ||
@@ -74,7 +95,7 @@ review_comments_db: Optional["ReviewCommentsDB"] = None, | ||
| suggestions_feedback_db: Optional["SuggestionsFeedbackDB"] = None, | ||
| target_software: Optional[str] = None, | ||
| target_software: str = "Mozilla Firefox", | ||
| ) -> None: | ||
| super().__init__() | ||
| self.target_software = target_software or TARGET_SOFTWARE | ||
| self.target_software = target_software | ||
@@ -97,14 +118,10 @@ self._tokenizer = get_tokenizer( | ||
| experience_scope = ( | ||
| f"the {self.target_software} source code" | ||
| if self.target_software | ||
| else "a software project" | ||
| ) | ||
| self.summarization_chain = LLMChain( | ||
| prompt=PromptTemplate.from_template( | ||
| PROMPT_TEMPLATE_SUMMARIZATION, | ||
| partial_variables={"experience_scope": experience_scope}, | ||
| partial_variables={ | ||
| "experience_scope": f"the {self.target_software} source code" | ||
| }, | ||
| ), | ||
| llm=llm, | ||
| llm=summarization_llm, | ||
| verbose=verbose, | ||
@@ -115,7 +132,5 @@ ) | ||
| PROMPT_TEMPLATE_FILTERING_ANALYSIS, | ||
| partial_variables={ | ||
| "target_code_consistency": self.target_software or "rest of the" | ||
| }, | ||
| partial_variables={"target_code_consistency": self.target_software}, | ||
| ), | ||
| llm=llm, | ||
| llm=filtering_llm, | ||
| verbose=verbose, | ||
@@ -131,3 +146,6 @@ ) | ||
| tools, | ||
| system_prompt=f"You are an expert reviewer for {experience_scope}, with experience on source code reviews.", | ||
| system_prompt=SYSTEM_PROMPT_TEMPLATE.format( | ||
| target_software=self.target_software, | ||
| ), | ||
| response_format=ProviderStrategy(AgentResponse), | ||
| ) | ||
@@ -143,8 +161,25 @@ | ||
| @staticmethod | ||
| def create( | ||
| llm=None, summarization_llm=None, filtering_llm=None, **kwargs | ||
| ) -> "CodeReviewTool": | ||
| from bugbug.tools.core.llms import create_anthropic_llm | ||
| return CodeReviewTool( | ||
| llm=llm | ||
| or create_anthropic_llm( | ||
| model_name="claude-opus-4-5-20251101", | ||
| max_tokens=40_000, | ||
| temperature=None, | ||
| thinking={"type": "enabled", "budget_tokens": 10_000}, | ||
| ), | ||
| summarization_llm=summarization_llm or create_anthropic_llm(), | ||
| filtering_llm=filtering_llm or create_anthropic_llm(), | ||
| **kwargs, | ||
| ) | ||
| def count_tokens(self, text): | ||
| return len(self._tokenizer.encode(text)) | ||
| def generate_initial_prompt( | ||
| self, patch: Patch, output_format: Literal["JSON", "TEXT"] = "JSON" | ||
| ) -> str: | ||
| def generate_initial_prompt(self, patch: Patch) -> str: | ||
| formatted_patch = format_patch_set(patch.patch_set) | ||
@@ -157,2 +192,3 @@ | ||
| "patch_title": patch.patch_title, | ||
| "patch_description": patch.patch_description, | ||
| }, | ||
@@ -165,13 +201,4 @@ return_only_outputs=True, | ||
| if output_format == "JSON": | ||
| output_instructions = OUTPUT_FORMAT_JSON | ||
| elif output_format == "TEXT": | ||
| output_instructions = OUTPUT_FORMAT_TEXT | ||
| else: | ||
| raise ValueError( | ||
| f"Unsupported output format: {output_format}, choose JSON or TEXT" | ||
| ) | ||
| created_before = patch.date_created if self.is_experiment_env else None | ||
| return PROMPT_TEMPLATE_REVIEW.format( | ||
| return FIRST_MESSAGE_TEMPLATE.format( | ||
| patch=formatted_patch, | ||
@@ -181,11 +208,5 @@ patch_summarization=output_summarization, | ||
| approved_examples=self._get_generated_examples(patch, created_before), | ||
| target_code_consistency=self.target_software or "rest of the", | ||
| output_instructions=output_instructions, | ||
| bug_title=patch.bug_title, | ||
| patch_title=patch.patch_title, | ||
| patch_url=patch.patch_url, | ||
| target_software=self.target_software, | ||
| ) | ||
| def _generate_suggestions(self, patch: Patch): | ||
| def _generate_suggestions(self, patch: Patch) -> list[GeneratedReviewComment]: | ||
| try: | ||
@@ -200,2 +221,3 @@ for chunk in self.agent.stream( | ||
| stream_mode="values", | ||
| config={"recursion_limit": 50}, | ||
| ): | ||
@@ -206,3 +228,3 @@ result = chunk | ||
| return result["messages"][-1].content | ||
| return result["structured_response"].comments | ||
@@ -213,5 +235,3 @@ def run(self, patch: Patch) -> list[InlineComment] | None: | ||
| output = self._generate_suggestions(patch) | ||
| unfiltered_suggestions = parse_model_output(output) | ||
| unfiltered_suggestions = self._generate_suggestions(patch) | ||
| if not unfiltered_suggestions: | ||
@@ -229,3 +249,5 @@ logger.info("No suggestions were generated") | ||
| { | ||
| "comments": output, | ||
| "comments": str( | ||
| [comment.model_dump() for comment in unfiltered_suggestions] | ||
| ), | ||
| "rejected_examples": rejected_examples, | ||
@@ -320,3 +342,5 @@ }, | ||
| def get_similar_rejected_comments(self, suggestions) -> Iterable[str]: | ||
| def get_similar_rejected_comments( | ||
| self, suggestions: list[GeneratedReviewComment] | ||
| ) -> Iterable[str]: | ||
| if not self.suggestions_feedback_db: | ||
@@ -331,3 +355,3 @@ raise Exception("Suggestions feedback database is not available") | ||
| self.suggestions_feedback_db.find_similar_rejected_suggestions( | ||
| suggestion["comment"], | ||
| suggestion.comment, | ||
| limit=num_examples_per_suggestion, | ||
@@ -334,0 +358,0 @@ excluded_ids=seen_ids, |
@@ -23,20 +23,27 @@ # -*- coding: utf-8 -*- | ||
| @tool | ||
| def expand_context(file_path: str, line_number: int) -> str: | ||
| """Expand the context around a specific line in a file diff. | ||
| def expand_context(file_path: str, start_line: int, end_line: int) -> str: | ||
| """Show the content of a file between specified line numbers as it is before the patch. | ||
| Be careful to not fill your context window with too much data. Request the | ||
| minimum amount of context necessary to understand the code, but do not split | ||
| what you really need into multiple requests if the line range is continuous. | ||
| Args: | ||
| file_path: The path to the file. | ||
| line_number: The line number to expand context around. It should be based on the original file, not the patch. | ||
| start_line: The starting line number in the original file. Minimum is 1. | ||
| end_line: The ending line number in the original file. Maximum is the total number of lines in the file. | ||
| Returns: | ||
| Lines of code around the specified line number. | ||
| The content of the file between the specified line numbers. | ||
| """ | ||
| runtime = get_runtime(CodeReviewContext) | ||
| file_content = runtime.context.patch.get_old_file(file_path) | ||
| # TODO: Expanding the context using an AST parser like tree-sitter to | ||
| # include the whole function or class when it is relatively small. | ||
| try: | ||
| file_content = runtime.context.patch.get_old_file(file_path) | ||
| except FileNotFoundError: | ||
| return "File not found in the repository before the patch." | ||
| lines = file_content.splitlines() | ||
| start = max(0, line_number - 20) | ||
| end = min(len(lines), line_number + 20) | ||
| start = max(1, start_line) - 1 | ||
| end = min(len(lines), end_line) | ||
@@ -43,0 +50,0 @@ # Format the output with line numbers that match the original file. |
@@ -16,3 +16,3 @@ # -*- coding: utf-8 -*- | ||
| 1. **Intent**: Describe the intent of the changes, what they are trying to achieve, and how they relate to the bug or feature request. | ||
| 2. **Structure**: Describe the structure of the changes, including any new functions, classes, or modules introduced, and how they fit into the existing codebase. | ||
| 2. **Solution**: Describe the solution implemented in the code changes, focusing on how the changes address the intent. | ||
@@ -22,86 +22,67 @@ Do not include any code in the summarization, only a description of the changes. | ||
| **Bug title**: | ||
| <bug_title> | ||
| {bug_title} | ||
| </bug_title> | ||
| **Commit message**: | ||
| <commit_message> | ||
| {patch_title} | ||
| {patch_description} | ||
| </commit_message> | ||
| **Diff**: | ||
| {patch}""" | ||
| <patch> | ||
| {patch} | ||
| </patch>""" | ||
| PROMPT_TEMPLATE_REVIEW = """<task> | ||
| Generate high-quality code review comments for the patch provided below. | ||
| </task> | ||
| SYSTEM_PROMPT_TEMPLATE = """You are an expert {target_software} engineer tasked with analyzing a pull request and providing high-quality review comments. You will examine a code patch and generate constructive feedback focusing on potential issues in the changed code. | ||
| <instructions> | ||
| <analyze_changes> | ||
| **Analyze the Changes**: | ||
| * Understand the intent and structure of the changes in the patch. | ||
| * Use the provided summarization for context, but prioritize what's visible in the diff. | ||
| </analyze_changes> | ||
| ## Instructions | ||
| <identify_issues> | ||
| **Identify Issues**: | ||
| * Detect bugs, logical errors, performance concerns, security issues, or violations of the `{target_code_consistency}` coding standards. | ||
| * Focus only on **new or changed lines** (lines beginning with `+`). | ||
| * **Prioritize**: Security vulnerabilities > Functional bugs > Performance issues > Style/readability concerns. | ||
| </identify_issues> | ||
| Follow this systematic approach to review the patch: | ||
| <assess_confidence> | ||
| **Assess Confidence and Order**: | ||
| * **Only include comments where you are at least 80% confident the issue is valid**. | ||
| * **Sort the comments by descending confidence and importance**: | ||
| * Start with issues you are **certain are valid**. | ||
| * Also, prioritize important issues that you are **confident about**. | ||
| * Follow with issues that are **plausible but uncertain** (possible false positives). | ||
| * **When uncertain, use available tools to verify before commenting**. | ||
| * Assign each comment a numeric `order`, starting at 1. | ||
| </assess_confidence> | ||
| **Step 1: Analyze the Changes** | ||
| - Understand what the patch is trying to accomplish | ||
| - Use the patch summary for context, but focus primarily on what you can see in the actual diff | ||
| - Identify the intent and structure of the changes | ||
| <write_comments> | ||
| **Write Clear, Constructive Comments**: | ||
| * Use **direct, declarative language**. State the problem definitively, then suggest the fix. | ||
| * Keep comments **short and specific**. | ||
| * Focus strictly on code-related concerns. | ||
| * **Banned phrases**: "maybe", "might want to", "consider", "possibly", "could be", "you may want to". | ||
| * **Use directive language**: "Fix", "Remove", "Change", "Add", "Validate", "Check" (not "Consider checking"). | ||
| * Avoid repeating what the code is doing unless it supports your critique. | ||
| </write_comments> | ||
| **Step 2: Identify Issues** | ||
| - Look for bugs, logical errors, performance problems, security vulnerabilities, or violations of the coding standards | ||
| - Focus ONLY on new or changed lines (lines that begin with `+`) | ||
| - Never comment on unmodified code | ||
| - Prioritize issues in this order: Security vulnerabilities > Functional bugs > Performance issues > Style/readability concerns | ||
| <use_tools> | ||
| **Use available tools to verify concerns**: | ||
| * Use tools to gather context when you suspect an issue but need verification. | ||
| * Use `find_function_definition` to check if error handling or validation exists elsewhere. | ||
| * Use `expand_context` to see if edge cases are handled in surrounding code. | ||
| * **Do not suggest issues you cannot verify with available context and tools**. | ||
| </use_tools> | ||
| **Step 3: Verify and Assess Confidence** | ||
| - Use available tools when you need to verify concerns or gather additional context | ||
| - Only include comments where you are at least 80% confident the issue is valid | ||
| - When uncertain about an issue, use tools like `find_function_definition` or `expand_context` to verify before commenting | ||
| - Do not suggest issues you cannot verify with available context | ||
| <avoid> | ||
| **Avoid Comments That**: | ||
| * Refer to unmodified code (lines without a `+` prefix). | ||
| * Ask for verification or confirmation (e.g., "Check if…", "Ensure that…"). | ||
| * Provide praise or restate obvious facts. | ||
| * Focus on testing. | ||
| * Point out issues that are already handled in the visible code. | ||
| * Suggest problems based on assumptions without verifying the context. | ||
| * Flag style preferences without clear `{target_code_consistency}` standard violations. | ||
| </avoid> | ||
| </instructions> | ||
| **Step 4: Sort and Order Comments** | ||
| - Sort comments by descending confidence and importance | ||
| - Start with issues you are certain are valid and that are most critical | ||
| - Assign each comment a numeric order starting at 1 | ||
| <output_format> | ||
| {output_instructions} | ||
| </output_format> | ||
| **Step 5: Write Clear, Constructive Comments** | ||
| - Use direct, declarative language - state the problem definitively, then suggest the fix | ||
| - Keep comments short and specific | ||
| - Use directive language: "Fix", "Remove", "Change", "Add" | ||
| - NEVER use these banned phrases: "maybe", "might want to", "consider", "possibly", "could be", "you may want to" | ||
| - Focus strictly on code-related concerns | ||
| <examples> | ||
| {comment_examples} | ||
| {approved_examples} | ||
| </examples> | ||
| ## What NOT to Include | ||
| <context> | ||
| **Review Context**: | ||
| Target Software: {target_software} | ||
| Bug Title: {bug_title} | ||
| Patch Title: {patch_title} | ||
| Source URL: {patch_url} | ||
| </context> | ||
| Do not write comments that: | ||
| - Refer to unmodified code (lines without a `+` prefix) | ||
| - Ask for verification or confirmation (e.g., "Check if...", "Ensure that...") | ||
| - Provide praise or restate obvious facts | ||
| - Focus on testing concerns | ||
| - Point out issues that are already handled in the visible code | ||
| - Suggest problems based on assumptions without verifying the context | ||
| - Flag style preferences without clear coding standard violations | ||
| """ | ||
| FIRST_MESSAGE_TEMPLATE = """Here is a summary of the patch: | ||
| <patch_summary> | ||
@@ -111,25 +92,16 @@ {patch_summarization} | ||
| <patch> | ||
| {patch} | ||
| </patch> | ||
| """ | ||
| OUTPUT_FORMAT_JSON = """ | ||
| Respond only with a **JSON list**. Each object must contain the following fields: | ||
| Here are examples of good code review comments to guide your style and approach: | ||
| * `"file"`: The relative path to the file the comment applies to. | ||
| * `"code_line"`: The number of the specific changed line of code that the comment refers to. | ||
| * `"comment"`: A concise review comment. | ||
| * `"explanation"`: A brief rationale for the comment, including how confident you are and why. | ||
| * `"order"`: An integer representing the priority of the comment, with 1 being the highest confidence/importance. | ||
| """ | ||
| <examples> | ||
| {comment_examples} | ||
| {approved_examples} | ||
| </examples> | ||
| OUTPUT_FORMAT_TEXT = """ | ||
| Respond only with a **plain text list** with the following details: | ||
| * `"filename"`: The relative path to the file the comment applies to. | ||
| * `"line_number"`: The number of the specific changed line of code that the comment refers to. | ||
| * `"comment"`: A concise review comment. | ||
| Here is the patch you need to review: | ||
| The format should be: filename:line_number "comment" | ||
| <patch> | ||
| {patch} | ||
| </patch> | ||
| """ | ||
@@ -136,0 +108,0 @@ |
@@ -47,3 +47,3 @@ # -*- coding: utf-8 -*- | ||
| def create_anthropic_llm( | ||
| temperature=0.2, top_p=None, model_name="claude-sonnet-4-5-20250929" | ||
| temperature=0.2, top_p=None, model_name="claude-sonnet-4-5-20250929", **kwargs | ||
| ): | ||
@@ -57,2 +57,3 @@ from langchain_anthropic import ChatAnthropic | ||
| top_p=top_p, | ||
| **kwargs, | ||
| ) | ||
@@ -59,0 +60,0 @@ |
@@ -56,2 +56,8 @@ # -*- coding: utf-8 -*- | ||
| @abstractmethod | ||
| def patch_description(self) -> str: | ||
| """Return the description of the patch.""" | ||
| ... | ||
| @property | ||
| @abstractmethod | ||
| def patch_url(self) -> str: | ||
@@ -58,0 +64,0 @@ """Return the URL of the patch.""" |
@@ -180,3 +180,3 @@ # -*- coding: utf-8 -*- | ||
| def __init__(self, data: dict): | ||
| self.metadata = data | ||
| self._metadata = data | ||
@@ -201,4 +201,8 @@ @staticmethod | ||
| @property | ||
| def summary(self) -> str: | ||
| return self._metadata["summary"] | ||
| def to_md(self) -> str: | ||
| """Return a markdown representation of the bug.""" | ||
| return bug_dict_to_markdown(self.metadata) | ||
| return bug_dict_to_markdown(self._metadata) |
@@ -17,5 +17,6 @@ # -*- coding: utf-8 -*- | ||
| from bugbug import bugzilla, db, phabricator, utils | ||
| from bugbug import db, phabricator, utils | ||
| from bugbug.tools.core.data_types import InlineComment, ReviewRequest | ||
| from bugbug.tools.core.platforms.base import Patch, ReviewData | ||
| from bugbug.tools.core.platforms.bugzilla import Bug | ||
| from bugbug.utils import get_secret | ||
@@ -134,3 +135,3 @@ | ||
| def _get_file(self, file_path: str, is_before_patch: bool) -> str: | ||
| def _get_file_from_patch(self, file_path: str, is_before_patch: bool) -> str: | ||
| for changeset in self._changesets: | ||
@@ -155,5 +156,29 @@ if changeset["fields"]["path"]["displayPath"] == file_path: | ||
| def _get_file_from_repo(self, file_path: str, commit_hash: str) -> str: | ||
| r = utils.get_session("hgmo").get( | ||
| f"https://hg.mozilla.org/mozilla-unified/raw-file/{commit_hash}/{file_path}", | ||
| headers={ | ||
| "User-Agent": utils.get_user_agent(), | ||
| }, | ||
| ) | ||
| if r.status_code == 404: | ||
| raise FileNotFoundError( | ||
| f"File {file_path} not found in commit {commit_hash}" | ||
| ) | ||
| r.raise_for_status() | ||
| return r.text | ||
| def get_old_file(self, file_path: str) -> str: | ||
| return self._get_file(file_path, is_before_patch=True) | ||
| if file_path.startswith("b/") or file_path.startswith("a/"): | ||
| file_path = file_path[2:] | ||
| try: | ||
| return self._get_file_from_patch(file_path, is_before_patch=True) | ||
| except FileNotFoundError: | ||
| return self._get_file_from_repo( | ||
| file_path, commit_hash=self.base_commit_hash | ||
| ) | ||
| @cached_property | ||
@@ -257,14 +282,5 @@ def _changesets(self) -> list[dict]: | ||
| @cached_property | ||
| def _bug_metadata(self) -> dict | None: | ||
| id = self.bug_id | ||
| bugs = bugzilla.get(id) | ||
| def bug(self) -> Bug: | ||
| return Bug.get(self.bug_id) | ||
| if id not in bugs: | ||
| logger.warning( | ||
| "Bug %d not found in Bugzilla. This might be a private bug.", id | ||
| ) | ||
| return None | ||
| return bugs[id] | ||
| @property | ||
@@ -274,10 +290,6 @@ def bug_id(self) -> int: | ||
| @cached_property | ||
| @property | ||
| def bug_title(self) -> str: | ||
| if not self._bug_metadata: | ||
| # Use a placeholder when the bug metadata is not available | ||
| return "--" | ||
| return self.bug.summary | ||
| return self._bug_metadata["summary"] | ||
| @cached_property | ||
@@ -288,2 +300,6 @@ def patch_title(self) -> str: | ||
| @property | ||
| def patch_description(self) -> str: | ||
| return self._revision_metadata["fields"].get("summary", "") | ||
| @property | ||
| def revision_id(self) -> int: | ||
@@ -290,0 +306,0 @@ return self._revision_metadata["id"] |
@@ -56,2 +56,6 @@ # -*- coding: utf-8 -*- | ||
| @property | ||
| def patch_description(self) -> str: | ||
| raise NotImplementedError | ||
| @cached_property | ||
@@ -58,0 +62,0 @@ def bug_title(self) -> str: |
+1
-1
| Metadata-Version: 2.1 | ||
| Name: bugbug | ||
| Version: 0.0.614 | ||
| Version: 0.0.615 | ||
| Summary: ML tools for Mozilla projects | ||
@@ -5,0 +5,0 @@ Author: Marco Castelluccio |
+9
-9
| amqp==5.3.1 | ||
| beautifulsoup4==4.14.2 | ||
| beautifulsoup4==4.14.3 | ||
| boto3==1.41.2 | ||
| imbalanced-learn==0.14.0 | ||
| langchain==1.0.8 | ||
| langchain-anthropic==1.1.0 | ||
| langchain==1.2.0 | ||
| langchain-anthropic==1.3.0 | ||
| langchain-classic==1.0.0 | ||
| langchain-community==0.4.1 | ||
| langchain-google-genai==3.1.0 | ||
| langchain-google-genai==4.0.0 | ||
| langchain-mistralai==1.0.1 | ||
| langchain-openai==1.0.3 | ||
| langgraph==1.0.3 | ||
| langchain-openai==1.1.3 | ||
| langgraph==1.0.5 | ||
| libmozdata==0.2.12 | ||
@@ -23,3 +23,3 @@ llama-cpp-python==0.2.90 | ||
| numpy==2.3.5 | ||
| orjson==3.11.4 | ||
| orjson==3.11.5 | ||
| ortools==9.14.6206 | ||
@@ -31,3 +31,3 @@ pandas==2.3.3 | ||
| python-hglib==2.6.2 | ||
| qdrant-client==1.16.0 | ||
| qdrant-client==1.15.1 | ||
| ratelimit==2.2.1 | ||
@@ -42,3 +42,3 @@ requests==2.32.5 | ||
| tabulate==0.9.0 | ||
| taskcluster==93.1.5 | ||
| taskcluster==94.1.1 | ||
| tenacity==9.1.2 | ||
@@ -45,0 +45,0 @@ tqdm==4.67.1 |
@@ -5,6 +5,7 @@ # %% | ||
| from scripts.code_review_tool_evaluator import get_latest_evaluation_results_file | ||
| import scripts.code_review_tool_evaluator as evaluator_script | ||
| evaluation_results = pd.read_csv( | ||
| get_latest_evaluation_results_file("../evaluation_results") | ||
| # evaluator_script.get_latest_evaluation_results_file("../evaluation_results") | ||
| evaluator_script.get_ongoing_evaluation_results_file("../evaluation_results") | ||
| ) | ||
@@ -11,0 +12,0 @@ |
@@ -35,5 +35,5 @@ # -*- coding: utf-8 -*- | ||
| from bugbug.tools.core import llms | ||
| from bugbug.tools.core.exceptions import ModelResultError | ||
| from bugbug.vectordb import QdrantVectorDB | ||
| code_review.TARGET_SOFTWARE = "Mozilla Firefox" | ||
| VERBOSE_CODE_REVIEW = False | ||
@@ -225,3 +225,5 @@ | ||
| def get_tool_variants() -> list[tuple[str, code_review.CodeReviewTool]]: | ||
| def get_tool_variants( | ||
| variants: list[str], | ||
| ) -> list[tuple[str, code_review.CodeReviewTool]]: | ||
| """Returns a list of tool variants to evaluate. | ||
@@ -259,27 +261,31 @@ | ||
| tool_variants.append( | ||
| ( | ||
| "Claude", | ||
| code_review.CodeReviewTool( | ||
| llm=llms.create_anthropic_llm(), | ||
| function_search=function_search, | ||
| review_comments_db=review_comments_db, | ||
| suggestions_feedback_db=suggestions_feedback_db, | ||
| verbose=VERBOSE_CODE_REVIEW, | ||
| ), | ||
| if "claude" in variants: | ||
| tool_variants.append( | ||
| ( | ||
| "Claude", | ||
| code_review.CodeReviewTool.create( | ||
| function_search=function_search, | ||
| review_comments_db=review_comments_db, | ||
| suggestions_feedback_db=suggestions_feedback_db, | ||
| verbose=VERBOSE_CODE_REVIEW, | ||
| ), | ||
| ) | ||
| ) | ||
| ) | ||
| tool_variants.append( | ||
| ( | ||
| "GPT", | ||
| code_review.CodeReviewTool( | ||
| llm=llms.create_openai_llm(), | ||
| function_search=function_search, | ||
| review_comments_db=review_comments_db, | ||
| suggestions_feedback_db=suggestions_feedback_db, | ||
| verbose=VERBOSE_CODE_REVIEW, | ||
| ), | ||
| if "gpt" in variants: | ||
| llm = llms.create_openai_llm() | ||
| tool_variants.append( | ||
| ( | ||
| "GPT", | ||
| code_review.CodeReviewTool.create( | ||
| llm=llm, | ||
| summarization_llm=llm, | ||
| filtering_llm=llm, | ||
| function_search=function_search, | ||
| review_comments_db=review_comments_db, | ||
| suggestions_feedback_db=suggestions_feedback_db, | ||
| verbose=VERBOSE_CODE_REVIEW, | ||
| ), | ||
| ) | ||
| ) | ||
| ) | ||
@@ -354,2 +360,22 @@ return tool_variants | ||
| def get_ongoing_evaluation_results_file(results_dir: str | None): | ||
| import glob | ||
| import os | ||
| base_file = get_latest_evaluation_results_file(results_dir) | ||
| files = [ | ||
| file | ||
| for file in glob.glob("evaluation_results_*.csv", root_dir=results_dir) | ||
| if "#" not in file and file > base_file | ||
| ] | ||
| if not files: | ||
| raise FileNotFoundError("No ongoing evaluation results file found.") | ||
| latests_file = max(files) | ||
| if results_dir: | ||
| return os.path.join(results_dir, latests_file) | ||
| return latests_file | ||
| def main(args): | ||
@@ -361,7 +387,6 @@ review_platform = "phabricator" | ||
| tool_variants = get_tool_variants() | ||
| tool_variants = get_tool_variants(args.variants) | ||
| evaluator = FeedbackEvaluator(args.evaluation_dataset) | ||
| is_first_result = True | ||
| result_file = os.path.join( | ||
@@ -371,6 +396,14 @@ args.results_dir, | ||
| ) | ||
| evaluation_results_file = os.path.join( | ||
| args.results_dir, | ||
| f"evaluation_results_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.csv", | ||
| ) | ||
| is_first_result = not os.path.exists(result_file) | ||
| if is_first_result: | ||
| evaluation_results_file = os.path.join( | ||
| args.results_dir, | ||
| f"evaluation_results_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.csv", | ||
| ) | ||
| seen_patches = set() | ||
| else: | ||
| evaluation_results_file = get_ongoing_evaluation_results_file(args.results_dir) | ||
| seen_patches = set(pd.read_csv(evaluation_results_file)["diff_id"].to_list()) | ||
| result_unique_columns = ["Review Request ID", "File", "Line", "Comment Number"] | ||
@@ -430,2 +463,14 @@ result_all_columns = result_unique_columns + [ | ||
| for review_request_id, review_request in selected_review_requests: | ||
| if review_request_id in [227266, 233414]: | ||
| print( | ||
| f"Skipping Review Request ID {review_request_id} because it is known to cause issues." | ||
| ) | ||
| continue | ||
| if review_request.patch_id in seen_patches: | ||
| print( | ||
| f"Skipping Review Request ID {review_request_id} (Diff ID {review_request.patch_id}) because it was already evaluated." | ||
| ) | ||
| continue | ||
| print("---------------------------------------------------------") | ||
@@ -453,2 +498,5 @@ print(f"Review Request ID: {review_request_id}") | ||
| continue | ||
| except ModelResultError as e: | ||
| print("Error while running the tool:", e) | ||
| continue | ||
@@ -559,2 +607,10 @@ print_prettified_comments(comments) | ||
| ) | ||
| parser.add_argument( | ||
| "--variant", | ||
| dest="variants", | ||
| action="append", | ||
| help="the variants to use, use multiple times for multiple variants", | ||
| choices=["claude", "gpt"], | ||
| required=True, | ||
| ) | ||
@@ -561,0 +617,0 @@ args = parser.parse_args() |
+1
-1
@@ -1,1 +0,1 @@ | ||
| 0.0.614 | ||
| 0.0.615 |
| # -*- coding: utf-8 -*- | ||
| # This Source Code Form is subject to the terms of the Mozilla Public | ||
| # License, v. 2.0. If a copy of the MPL was not distributed with this file, | ||
| # You can obtain one at http://mozilla.org/MPL/2.0/. | ||
| import argparse | ||
| import sys | ||
| from bugbug.code_search.function_search import function_search_classes | ||
| from bugbug.tools import code_review | ||
| from bugbug.tools.core import llms | ||
| from bugbug.vectordb import QdrantVectorDB | ||
| def run(args) -> None: | ||
| llm = llms.create_llm_from_args(args) | ||
| function_search = ( | ||
| function_search_classes[args.function_search_type]() | ||
| if args.function_search_type is not None | ||
| else None | ||
| ) | ||
| vector_db = QdrantVectorDB("diff_comments") | ||
| review_comments_db = code_review.ReviewCommentsDB(vector_db) | ||
| code_review_tool = code_review.CodeReviewTool( | ||
| llm, | ||
| function_search=function_search, | ||
| review_comments_db=review_comments_db, | ||
| show_patch_example=False, | ||
| ) | ||
| review_data = code_review.review_data_classes[args.review_platform]() | ||
| revision = review_data.get_review_request_by_id(args.review_request_id) | ||
| patch = review_data.get_patch_by_id(revision.patch_id) | ||
| print(patch) | ||
| print(code_review_tool.run(patch)) | ||
| input() | ||
| def parse_args(args): | ||
| parser = argparse.ArgumentParser( | ||
| formatter_class=argparse.ArgumentDefaultsHelpFormatter | ||
| ) | ||
| parser.add_argument( | ||
| "--review_platform", | ||
| help="Review platform", | ||
| choices=list(code_review.review_data_classes.keys()), | ||
| ) | ||
| parser.add_argument( | ||
| "--review_request_id", | ||
| help="Review request ID", | ||
| ) | ||
| llms.create_llm_to_args(parser) | ||
| parser.add_argument( | ||
| "--function_search_type", | ||
| help="Function search tool", | ||
| choices=list(function_search_classes.keys()), | ||
| ) | ||
| return parser.parse_args(args) | ||
| if __name__ == "__main__": | ||
| args = parse_args(sys.argv[1:]) | ||
| run(args) |
Alert delta unavailable
Currently unable to show alert delta for PyPI packages.
1380673
0.11%22415
0.11%139
-0.71%