bugbug
Advanced tools
| Metadata-Version: 2.1 | ||
| Name: bugbug | ||
| Version: 0.0.610 | ||
| Version: 0.0.611 | ||
| Summary: ML tools for Mozilla projects | ||
@@ -5,0 +5,0 @@ Author: Marco Castelluccio |
| amqp==5.3.1 | ||
| beautifulsoup4==4.14.2 | ||
| boto3==1.40.69 | ||
| boto3==1.40.75 | ||
| imbalanced-learn==0.14.0 | ||
| langchain-anthropic==1.0.2 | ||
| langchain-anthropic==1.1.0 | ||
| langchain-classic==1.0.0 | ||
| langchain-community==0.4.1 | ||
| langchain-google-genai==3.0.1 | ||
| langchain-google-genai==3.0.3 | ||
| langchain-mistralai==1.0.1 | ||
| langchain-openai==1.0.2 | ||
| langchain==1.0.5 | ||
| langgraph==1.0.2 | ||
| langchain-openai==1.0.3 | ||
| langchain==1.0.7 | ||
| langgraph==1.0.3 | ||
| libmozdata==0.2.12 | ||
@@ -19,6 +19,6 @@ llama-cpp-python==0.2.90 | ||
| matplotlib==3.10.7 | ||
| mercurial==7.1.1 | ||
| mercurial==7.1.2 | ||
| microannotate==0.0.24 | ||
| mozci==2.4.3 | ||
| numpy==2.3.4 | ||
| numpy==2.3.5 | ||
| orjson==3.11.4 | ||
@@ -31,3 +31,3 @@ ortools==9.14.6206 | ||
| python-hglib==2.6.2 | ||
| qdrant-client==1.15.1 | ||
| qdrant-client==1.16.0 | ||
| ratelimit==2.2.1 | ||
@@ -42,3 +42,3 @@ requests-html==0.10.0 | ||
| tabulate==0.9.0 | ||
| taskcluster==93.0.0 | ||
| taskcluster==93.1.4 | ||
| tenacity==9.1.2 | ||
@@ -52,4 +52,4 @@ tqdm==4.67.1 | ||
| gensim==4.4.0 | ||
| spacy==3.8.7 | ||
| spacy==3.8.11 | ||
| [nn] |
@@ -7,2 +7,3 @@ # -*- coding: utf-8 -*- | ||
| import collections | ||
| import glob | ||
| import itertools | ||
@@ -17,2 +18,3 @@ import logging | ||
| from datetime import datetime | ||
| from pathlib import Path | ||
| from typing import ( | ||
@@ -31,2 +33,3 @@ Any, | ||
| import tomllib | ||
| from tqdm import tqdm | ||
@@ -901,1 +904,107 @@ | ||
| return r.json() | ||
| manifest_by_path: dict[str, set[str]] | None = None | ||
| def find_manifests_for_paths(repo_dir_str: str, paths: list[str]) -> set[str]: | ||
| global manifest_by_path | ||
| repo_dir = Path(repo_dir_str) | ||
| manifests = set() | ||
| if manifest_by_path is None: | ||
| manifest_by_path = collections.defaultdict(set) | ||
| for toml_path in repo_dir.rglob("*.toml"): | ||
| # HACK: These are not test manifests, skip them. | ||
| if ( | ||
| toml_path.name == "Cargo.toml" | ||
| or toml_path.parent.name == "test-manifest-toml" | ||
| or "third_party" in toml_path.parts | ||
| ): | ||
| continue | ||
| with open(toml_path, "rb") as toml_f: | ||
| data = tomllib.load(toml_f) | ||
| # HACK: If there is no "DEFAULT" key and there is no key that starts with "test", this is unlikely a test manifest. | ||
| if "DEFAULT" not in data and not any( | ||
| key.startswith("test") for key in data.values() | ||
| ): | ||
| continue | ||
| toml_rel = toml_path.relative_to(repo_dir) | ||
| toml_dir = toml_path.parent | ||
| # The manifest path itself, so we schedule it when it is touched. | ||
| manifest_by_path[str(toml_rel)].add(str(toml_rel)) | ||
| # Collect head files. | ||
| head_files = data.get("DEFAULT", {}).get("head", "").split(" ") | ||
| for head_file in head_files: | ||
| if not head_file.strip(): | ||
| continue | ||
| manifest_by_path[ | ||
| str((toml_dir / head_file).resolve().relative_to(repo_dir)) | ||
| ].add(str(toml_rel)) | ||
| # Collect support files. | ||
| def collect_support_files(value): | ||
| support_files = value.get("support-files", []) | ||
| if isinstance(support_files, str): | ||
| support_files = [support_files] | ||
| for support_file in support_files: | ||
| if not support_file.strip(): | ||
| continue | ||
| if support_file.startswith("!"): | ||
| support_file = support_file[1:] | ||
| if support_file.startswith("/"): | ||
| support_file_path = (repo_dir / support_file[1:]).resolve() | ||
| else: | ||
| support_file_path = (toml_dir / support_file).resolve() | ||
| if "*" in support_file: | ||
| files = [ | ||
| Path(f) | ||
| for f in glob.glob(str(support_file_path), recursive=True) | ||
| ] | ||
| else: | ||
| files = [support_file_path] | ||
| for f in files: | ||
| manifest_by_path[str(f.relative_to(repo_dir))].add( | ||
| str(toml_rel) | ||
| ) | ||
| collect_support_files(data.get("DEFAULT", {})) | ||
| # Collect test files. | ||
| for key, val in data.items(): | ||
| if key != "DEFAULT" and isinstance(val, dict): | ||
| collect_support_files(val) | ||
| manifest_by_path[ | ||
| str((toml_dir / key).resolve().relative_to(repo_dir)) | ||
| ].add(str(toml_rel)) | ||
| for path in paths: | ||
| # If a manifest, a test, or a support file is modified, run the manifest that includes it. | ||
| if path in manifest_by_path: | ||
| manifests.update(manifest_by_path[path]) | ||
| else: | ||
| # Find manifests that are in test subfolders close to a modified file (e.g. if dom/battery/BatteryManager.cpp is modified, we should run dom/battery/test/chrome.toml and dom/battery/test/mochitest.toml). | ||
| for sibling in (repo_dir / path).parent.rglob("*"): | ||
| if sibling.is_dir() and repository.is_test(f"{str(sibling)}/"): | ||
| manifests.update( | ||
| str(f.relative_to(repo_dir)) | ||
| for f in sibling.rglob("*.toml") | ||
| if f.is_file() | ||
| ) | ||
| return manifests |
@@ -29,68 +29,72 @@ # -*- coding: utf-8 -*- | ||
| PROMPT_TEMPLATE_REVIEW = """**Task**: | ||
| PROMPT_TEMPLATE_REVIEW = """<task> | ||
| Generate high-quality code review comments for the patch provided below. | ||
| </task> | ||
| **Instructions**: | ||
| <instructions> | ||
| <analyze_changes> | ||
| **Analyze the Changes**: | ||
| * Understand the intent and structure of the changes in the patch. | ||
| * Use the provided summarization for context, but prioritize what's visible in the diff. | ||
| </analyze_changes> | ||
| 1. **Analyze the Changes**: | ||
| <identify_issues> | ||
| **Identify Issues**: | ||
| * Detect bugs, logical errors, performance concerns, security issues, or violations of the `{target_code_consistency}` coding standards. | ||
| * Focus only on **new or changed lines** (lines beginning with `+`). | ||
| * **Prioritize**: Security vulnerabilities > Functional bugs > Performance issues > Style/readability concerns. | ||
| </identify_issues> | ||
| * Understand the intent and structure of the changes in the patch. | ||
| * Use the provided summarization for context, but prioritize what's visible in the diff. | ||
| <assess_confidence> | ||
| **Assess Confidence and Order**: | ||
| * **Only include comments where you are at least 80% confident the issue is valid**. | ||
| * **Sort the comments by descending confidence and importance**: | ||
| * Start with issues you are **certain are valid**. | ||
| * Also, prioritize important issues that you are **confident about**. | ||
| * Follow with issues that are **plausible but uncertain** (possible false positives). | ||
| * **When uncertain, use available tools to verify before commenting**. | ||
| * Assign each comment a numeric `order`, starting at 1. | ||
| </assess_confidence> | ||
| 2. **Identify Issues**: | ||
| <write_comments> | ||
| **Write Clear, Constructive Comments**: | ||
| * Use **direct, declarative language**. State the problem definitively, then suggest the fix. | ||
| * Keep comments **short and specific**. | ||
| * Focus strictly on code-related concerns. | ||
| * **Banned phrases**: "maybe", "might want to", "consider", "possibly", "could be", "you may want to". | ||
| * **Use directive language**: "Fix", "Remove", "Change", "Add", "Validate", "Check" (not "Consider checking"). | ||
| * Avoid repeating what the code is doing unless it supports your critique. | ||
| </write_comments> | ||
| * Detect bugs, logical errors, performance concerns, security issues, or violations of the `{target_code_consistency}` coding standards. | ||
| * Focus only on **new or changed lines** (lines beginning with `+`). | ||
| <use_tools> | ||
| **Use available tools to verify concerns**: | ||
| * Use tools to gather context when you suspect an issue but need verification. | ||
| * Use `find_function_definition` to check if error handling or validation exists elsewhere. | ||
| * Use `expand_context` to see if edge cases are handled in surrounding code. | ||
| * **Do not suggest issues you cannot verify with available context and tools**. | ||
| </use_tools> | ||
| 3. **Assess Confidence and Order**: | ||
| * **Sort the comments by descending confidence and importance**: | ||
| * Start with issues you are **certain are valid**. | ||
| * Also, prioritize important issues that you are **confident about**. | ||
| * Follow with issues that are **plausible but uncertain** (possible false positives). | ||
| * Assign each comment a numeric `order`, starting at 1. | ||
| 4. **Write Clear, Constructive Comments**: | ||
| * Use **direct, declarative language**. | ||
| * Keep comments **short and specific**. | ||
| * Focus strictly on code-related concerns. | ||
| * Avoid hedging language (e.g., don’t use “maybe”, “might want to”, or form questions). | ||
| * Avoid repeating what the code is doing unless it supports your critique. | ||
| 5. **Use available tools**: | ||
| * Consider using available tools to better understand the context of the code changes you are reviewing. | ||
| * Limit the use of tools to only when you need more context to analyze the code changes. | ||
| <avoid> | ||
| **Avoid Comments That**: | ||
| * Refer to unmodified code (lines without a `+` prefix). | ||
| * Ask for verification or confirmation (e.g., “Check if…”). | ||
| * Ask for verification or confirmation (e.g., "Check if…", "Ensure that…"). | ||
| * Provide praise or restate obvious facts. | ||
| * Focus on testing. | ||
| * Point out issues that are already handled in the visible code. | ||
| * Suggest problems based on assumptions without verifying the context. | ||
| * Flag style preferences without clear `{target_code_consistency}` standard violations. | ||
| </avoid> | ||
| </instructions> | ||
| --- | ||
| **Output Format**: | ||
| <output_format> | ||
| {output_instructions} | ||
| </output_format> | ||
| --- | ||
| **Examples**: | ||
| <examples> | ||
| {comment_examples} | ||
| {approved_examples} | ||
| </examples> | ||
| --- | ||
| **Patch Summary**: | ||
| {patch_summarization} | ||
| --- | ||
| <context> | ||
| **Review Context**: | ||
| Target Software: {target_software} | ||
@@ -100,8 +104,11 @@ Bug Title: {bug_title} | ||
| Source URL: {patch_url} | ||
| </context> | ||
| --- | ||
| <patch_summary> | ||
| {patch_summarization} | ||
| </patch_summary> | ||
| **Patch to Review**: | ||
| <patch> | ||
| {patch} | ||
| </patch> | ||
| """ | ||
@@ -116,2 +123,3 @@ | ||
| * `"explanation"`: A brief rationale for the comment, including how confident you are and why. | ||
| * `"order"`: An integer representing the priority of the comment, with 1 being the highest confidence/importance. | ||
| """ | ||
@@ -118,0 +126,0 @@ |
| gensim==4.4.0 | ||
| spacy==3.8.7 | ||
| spacy==3.8.11 |
+1
-1
| Metadata-Version: 2.1 | ||
| Name: bugbug | ||
| Version: 0.0.610 | ||
| Version: 0.0.611 | ||
| Summary: ML tools for Mozilla projects | ||
@@ -5,0 +5,0 @@ Author: Marco Castelluccio |
+10
-10
| amqp==5.3.1 | ||
| beautifulsoup4==4.14.2 | ||
| boto3==1.40.69 | ||
| boto3==1.40.75 | ||
| imbalanced-learn==0.14.0 | ||
| langchain==1.0.5 | ||
| langchain-anthropic==1.0.2 | ||
| langchain==1.0.7 | ||
| langchain-anthropic==1.1.0 | ||
| langchain-classic==1.0.0 | ||
| langchain-community==0.4.1 | ||
| langchain-google-genai==3.0.1 | ||
| langchain-google-genai==3.0.3 | ||
| langchain-mistralai==1.0.1 | ||
| langchain-openai==1.0.2 | ||
| langgraph==1.0.2 | ||
| langchain-openai==1.0.3 | ||
| langgraph==1.0.3 | ||
| libmozdata==0.2.12 | ||
@@ -19,6 +19,6 @@ llama-cpp-python==0.2.90 | ||
| matplotlib==3.10.7 | ||
| mercurial==7.1.1 | ||
| mercurial==7.1.2 | ||
| microannotate==0.0.24 | ||
| mozci==2.4.3 | ||
| numpy==2.3.4 | ||
| numpy==2.3.5 | ||
| orjson==3.11.4 | ||
@@ -31,3 +31,3 @@ ortools==9.14.6206 | ||
| python-hglib==2.6.2 | ||
| qdrant-client==1.15.1 | ||
| qdrant-client==1.16.0 | ||
| ratelimit==2.2.1 | ||
@@ -42,3 +42,3 @@ requests==2.32.5 | ||
| tabulate==0.9.0 | ||
| taskcluster==93.0.0 | ||
| taskcluster==93.1.4 | ||
| tenacity==9.1.2 | ||
@@ -45,0 +45,0 @@ tqdm==4.67.1 |
@@ -11,5 +11,6 @@ # -*- coding: utf-8 -*- | ||
| from collections import defaultdict | ||
| from concurrent.futures import ALL_COMPLETED, ThreadPoolExecutor, wait | ||
| from concurrent.futures import ALL_COMPLETED, ThreadPoolExecutor, as_completed, wait | ||
| from datetime import datetime, timedelta | ||
| from logging import INFO, basicConfig, getLogger | ||
| from threading import Lock | ||
@@ -32,3 +33,13 @@ import requests | ||
| file_locks = {} | ||
| primary_lock = Lock() | ||
| def get_lock_for_file(path: str) -> Lock: | ||
| with primary_lock: | ||
| if path not in file_locks: | ||
| file_locks[path] = Lock() | ||
| return file_locks[path] | ||
| def download_dbs(): | ||
@@ -185,35 +196,47 @@ assert db.download(repository.COMMITS_DB) | ||
| for push in tqdm( | ||
| fixed_by_commit_pushes.values(), total=len(fixed_by_commit_pushes) | ||
| ): | ||
| for failure in push["failures"]: | ||
| task_id = failure["task_id"] | ||
| retry_id = failure["retry_id"] | ||
| all_failures = [ | ||
| failure | ||
| for push in fixed_by_commit_pushes.values() | ||
| for failure in push["failures"] | ||
| ] | ||
| log_path = os.path.join( | ||
| "data", "ci_failures_logs", f"{task_id}.{retry_id}.log" | ||
| ) | ||
| log_zst_path = f"{log_path}.zst" | ||
| if os.path.exists(log_path) or os.path.exists(log_zst_path): | ||
| continue | ||
| with ThreadPoolExecutor() as executor: | ||
| futures = [ | ||
| executor.submit(process_logs, failure, upload) for failure in all_failures | ||
| ] | ||
| if upload and utils.exists_s3(log_zst_path): | ||
| continue | ||
| # We iterate over the futures as they finish so tqdm can update the progress bar. | ||
| all(tqdm(as_completed(futures), total=len(futures))) | ||
| try: | ||
| utils.download_check_etag( | ||
| f"https://firefox-ci-tc.services.mozilla.com/api/queue/v1/task/{task_id}/runs/{retry_id}/artifacts/public/logs/live.log", | ||
| log_path, | ||
| ) | ||
| utils.zstd_compress(log_path) | ||
| def process_logs(failure, upload): | ||
| task_id = failure["task_id"] | ||
| retry_id = failure["retry_id"] | ||
| os.remove(log_path) | ||
| log_path = os.path.join("data", "ci_failures_logs", f"{task_id}.{retry_id}.log") | ||
| log_zst_path = f"{log_path}.zst" | ||
| if upload: | ||
| utils.upload_s3([log_zst_path]) | ||
| except requests.exceptions.HTTPError: | ||
| pass | ||
| with get_lock_for_file(log_zst_path): | ||
| if os.path.exists(log_path) or os.path.exists(log_zst_path): | ||
| return | ||
| if upload and utils.exists_s3(log_zst_path): | ||
| return | ||
| try: | ||
| utils.download_check_etag( | ||
| f"https://firefox-ci-tc.services.mozilla.com/api/queue/v1/task/{task_id}/runs/{retry_id}/artifacts/public/logs/live.log", | ||
| log_path, | ||
| ) | ||
| utils.zstd_compress(log_path) | ||
| os.remove(log_path) | ||
| if upload: | ||
| utils.upload_s3([log_zst_path]) | ||
| except requests.exceptions.HTTPError: | ||
| pass | ||
| def diff_failure_vs_fix(repo, failure_commits, fix_commits): | ||
@@ -220,0 +243,0 @@ try: |
+1
-1
@@ -1,1 +0,1 @@ | ||
| 0.0.610 | ||
| 0.0.611 |
Alert delta unavailable
Currently unable to show alert delta for PyPI packages.
1377214
0.42%22356
0.56%