Latest Threat Research:SANDWORM_MODE: Shai-Hulud-Style npm Worm Hijacks CI Workflows and Poisons AI Toolchains.Details
Socket
Book a DemoInstallSign in
Socket

bugbug

Package Overview
Dependencies
Maintainers
2
Versions
543
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

bugbug - npm Package Compare versions

Comparing version
0.0.610
to
0.0.611
+1
-1
bugbug.egg-info/PKG-INFO
Metadata-Version: 2.1
Name: bugbug
Version: 0.0.610
Version: 0.0.611
Summary: ML tools for Mozilla projects

@@ -5,0 +5,0 @@ Author: Marco Castelluccio

amqp==5.3.1
beautifulsoup4==4.14.2
boto3==1.40.69
boto3==1.40.75
imbalanced-learn==0.14.0
langchain-anthropic==1.0.2
langchain-anthropic==1.1.0
langchain-classic==1.0.0
langchain-community==0.4.1
langchain-google-genai==3.0.1
langchain-google-genai==3.0.3
langchain-mistralai==1.0.1
langchain-openai==1.0.2
langchain==1.0.5
langgraph==1.0.2
langchain-openai==1.0.3
langchain==1.0.7
langgraph==1.0.3
libmozdata==0.2.12

@@ -19,6 +19,6 @@ llama-cpp-python==0.2.90

matplotlib==3.10.7
mercurial==7.1.1
mercurial==7.1.2
microannotate==0.0.24
mozci==2.4.3
numpy==2.3.4
numpy==2.3.5
orjson==3.11.4

@@ -31,3 +31,3 @@ ortools==9.14.6206

python-hglib==2.6.2
qdrant-client==1.15.1
qdrant-client==1.16.0
ratelimit==2.2.1

@@ -42,3 +42,3 @@ requests-html==0.10.0

tabulate==0.9.0
taskcluster==93.0.0
taskcluster==93.1.4
tenacity==9.1.2

@@ -52,4 +52,4 @@ tqdm==4.67.1

gensim==4.4.0
spacy==3.8.7
spacy==3.8.11
[nn]

@@ -7,2 +7,3 @@ # -*- coding: utf-8 -*-

import collections
import glob
import itertools

@@ -17,2 +18,3 @@ import logging

from datetime import datetime
from pathlib import Path
from typing import (

@@ -31,2 +33,3 @@ Any,

import tomllib
from tqdm import tqdm

@@ -901,1 +904,107 @@

return r.json()
manifest_by_path: dict[str, set[str]] | None = None
def find_manifests_for_paths(repo_dir_str: str, paths: list[str]) -> set[str]:
global manifest_by_path
repo_dir = Path(repo_dir_str)
manifests = set()
if manifest_by_path is None:
manifest_by_path = collections.defaultdict(set)
for toml_path in repo_dir.rglob("*.toml"):
# HACK: These are not test manifests, skip them.
if (
toml_path.name == "Cargo.toml"
or toml_path.parent.name == "test-manifest-toml"
or "third_party" in toml_path.parts
):
continue
with open(toml_path, "rb") as toml_f:
data = tomllib.load(toml_f)
# HACK: If there is no "DEFAULT" key and there is no key that starts with "test", this is unlikely a test manifest.
if "DEFAULT" not in data and not any(
key.startswith("test") for key in data.values()
):
continue
toml_rel = toml_path.relative_to(repo_dir)
toml_dir = toml_path.parent
# The manifest path itself, so we schedule it when it is touched.
manifest_by_path[str(toml_rel)].add(str(toml_rel))
# Collect head files.
head_files = data.get("DEFAULT", {}).get("head", "").split(" ")
for head_file in head_files:
if not head_file.strip():
continue
manifest_by_path[
str((toml_dir / head_file).resolve().relative_to(repo_dir))
].add(str(toml_rel))
# Collect support files.
def collect_support_files(value):
support_files = value.get("support-files", [])
if isinstance(support_files, str):
support_files = [support_files]
for support_file in support_files:
if not support_file.strip():
continue
if support_file.startswith("!"):
support_file = support_file[1:]
if support_file.startswith("/"):
support_file_path = (repo_dir / support_file[1:]).resolve()
else:
support_file_path = (toml_dir / support_file).resolve()
if "*" in support_file:
files = [
Path(f)
for f in glob.glob(str(support_file_path), recursive=True)
]
else:
files = [support_file_path]
for f in files:
manifest_by_path[str(f.relative_to(repo_dir))].add(
str(toml_rel)
)
collect_support_files(data.get("DEFAULT", {}))
# Collect test files.
for key, val in data.items():
if key != "DEFAULT" and isinstance(val, dict):
collect_support_files(val)
manifest_by_path[
str((toml_dir / key).resolve().relative_to(repo_dir))
].add(str(toml_rel))
for path in paths:
# If a manifest, a test, or a support file is modified, run the manifest that includes it.
if path in manifest_by_path:
manifests.update(manifest_by_path[path])
else:
# Find manifests that are in test subfolders close to a modified file (e.g. if dom/battery/BatteryManager.cpp is modified, we should run dom/battery/test/chrome.toml and dom/battery/test/mochitest.toml).
for sibling in (repo_dir / path).parent.rglob("*"):
if sibling.is_dir() and repository.is_test(f"{str(sibling)}/"):
manifests.update(
str(f.relative_to(repo_dir))
for f in sibling.rglob("*.toml")
if f.is_file()
)
return manifests

@@ -29,68 +29,72 @@ # -*- coding: utf-8 -*-

PROMPT_TEMPLATE_REVIEW = """**Task**:
PROMPT_TEMPLATE_REVIEW = """<task>
Generate high-quality code review comments for the patch provided below.
</task>
**Instructions**:
<instructions>
<analyze_changes>
**Analyze the Changes**:
* Understand the intent and structure of the changes in the patch.
* Use the provided summarization for context, but prioritize what's visible in the diff.
</analyze_changes>
1. **Analyze the Changes**:
<identify_issues>
**Identify Issues**:
* Detect bugs, logical errors, performance concerns, security issues, or violations of the `{target_code_consistency}` coding standards.
* Focus only on **new or changed lines** (lines beginning with `+`).
* **Prioritize**: Security vulnerabilities > Functional bugs > Performance issues > Style/readability concerns.
</identify_issues>
* Understand the intent and structure of the changes in the patch.
* Use the provided summarization for context, but prioritize what's visible in the diff.
<assess_confidence>
**Assess Confidence and Order**:
* **Only include comments where you are at least 80% confident the issue is valid**.
* **Sort the comments by descending confidence and importance**:
* Start with issues you are **certain are valid**.
* Also, prioritize important issues that you are **confident about**.
* Follow with issues that are **plausible but uncertain** (possible false positives).
* **When uncertain, use available tools to verify before commenting**.
* Assign each comment a numeric `order`, starting at 1.
</assess_confidence>
2. **Identify Issues**:
<write_comments>
**Write Clear, Constructive Comments**:
* Use **direct, declarative language**. State the problem definitively, then suggest the fix.
* Keep comments **short and specific**.
* Focus strictly on code-related concerns.
* **Banned phrases**: "maybe", "might want to", "consider", "possibly", "could be", "you may want to".
* **Use directive language**: "Fix", "Remove", "Change", "Add", "Validate", "Check" (not "Consider checking").
* Avoid repeating what the code is doing unless it supports your critique.
</write_comments>
* Detect bugs, logical errors, performance concerns, security issues, or violations of the `{target_code_consistency}` coding standards.
* Focus only on **new or changed lines** (lines beginning with `+`).
<use_tools>
**Use available tools to verify concerns**:
* Use tools to gather context when you suspect an issue but need verification.
* Use `find_function_definition` to check if error handling or validation exists elsewhere.
* Use `expand_context` to see if edge cases are handled in surrounding code.
* **Do not suggest issues you cannot verify with available context and tools**.
</use_tools>
3. **Assess Confidence and Order**:
* **Sort the comments by descending confidence and importance**:
* Start with issues you are **certain are valid**.
* Also, prioritize important issues that you are **confident about**.
* Follow with issues that are **plausible but uncertain** (possible false positives).
* Assign each comment a numeric `order`, starting at 1.
4. **Write Clear, Constructive Comments**:
* Use **direct, declarative language**.
* Keep comments **short and specific**.
* Focus strictly on code-related concerns.
* Avoid hedging language (e.g., don’t use “maybe”, “might want to”, or form questions).
* Avoid repeating what the code is doing unless it supports your critique.
5. **Use available tools**:
* Consider using available tools to better understand the context of the code changes you are reviewing.
* Limit the use of tools to only when you need more context to analyze the code changes.
<avoid>
**Avoid Comments That**:
* Refer to unmodified code (lines without a `+` prefix).
* Ask for verification or confirmation (e.g., “Check if…”).
* Ask for verification or confirmation (e.g., "Check if…", "Ensure that…").
* Provide praise or restate obvious facts.
* Focus on testing.
* Point out issues that are already handled in the visible code.
* Suggest problems based on assumptions without verifying the context.
* Flag style preferences without clear `{target_code_consistency}` standard violations.
</avoid>
</instructions>
---
**Output Format**:
<output_format>
{output_instructions}
</output_format>
---
**Examples**:
<examples>
{comment_examples}
{approved_examples}
</examples>
---
**Patch Summary**:
{patch_summarization}
---
<context>
**Review Context**:
Target Software: {target_software}

@@ -100,8 +104,11 @@ Bug Title: {bug_title}

Source URL: {patch_url}
</context>
---
<patch_summary>
{patch_summarization}
</patch_summary>
**Patch to Review**:
<patch>
{patch}
</patch>
"""

@@ -116,2 +123,3 @@

* `"explanation"`: A brief rationale for the comment, including how confident you are and why.
* `"order"`: An integer representing the priority of the comment, with 1 being the highest confidence/importance.
"""

@@ -118,0 +126,0 @@

gensim==4.4.0
spacy==3.8.7
spacy==3.8.11
Metadata-Version: 2.1
Name: bugbug
Version: 0.0.610
Version: 0.0.611
Summary: ML tools for Mozilla projects

@@ -5,0 +5,0 @@ Author: Marco Castelluccio

amqp==5.3.1
beautifulsoup4==4.14.2
boto3==1.40.69
boto3==1.40.75
imbalanced-learn==0.14.0
langchain==1.0.5
langchain-anthropic==1.0.2
langchain==1.0.7
langchain-anthropic==1.1.0
langchain-classic==1.0.0
langchain-community==0.4.1
langchain-google-genai==3.0.1
langchain-google-genai==3.0.3
langchain-mistralai==1.0.1
langchain-openai==1.0.2
langgraph==1.0.2
langchain-openai==1.0.3
langgraph==1.0.3
libmozdata==0.2.12

@@ -19,6 +19,6 @@ llama-cpp-python==0.2.90

matplotlib==3.10.7
mercurial==7.1.1
mercurial==7.1.2
microannotate==0.0.24
mozci==2.4.3
numpy==2.3.4
numpy==2.3.5
orjson==3.11.4

@@ -31,3 +31,3 @@ ortools==9.14.6206

python-hglib==2.6.2
qdrant-client==1.15.1
qdrant-client==1.16.0
ratelimit==2.2.1

@@ -42,3 +42,3 @@ requests==2.32.5

tabulate==0.9.0
taskcluster==93.0.0
taskcluster==93.1.4
tenacity==9.1.2

@@ -45,0 +45,0 @@ tqdm==4.67.1

@@ -11,5 +11,6 @@ # -*- coding: utf-8 -*-

from collections import defaultdict
from concurrent.futures import ALL_COMPLETED, ThreadPoolExecutor, wait
from concurrent.futures import ALL_COMPLETED, ThreadPoolExecutor, as_completed, wait
from datetime import datetime, timedelta
from logging import INFO, basicConfig, getLogger
from threading import Lock

@@ -32,3 +33,13 @@ import requests

file_locks = {}
primary_lock = Lock()
def get_lock_for_file(path: str) -> Lock:
with primary_lock:
if path not in file_locks:
file_locks[path] = Lock()
return file_locks[path]
def download_dbs():

@@ -185,35 +196,47 @@ assert db.download(repository.COMMITS_DB)

for push in tqdm(
fixed_by_commit_pushes.values(), total=len(fixed_by_commit_pushes)
):
for failure in push["failures"]:
task_id = failure["task_id"]
retry_id = failure["retry_id"]
all_failures = [
failure
for push in fixed_by_commit_pushes.values()
for failure in push["failures"]
]
log_path = os.path.join(
"data", "ci_failures_logs", f"{task_id}.{retry_id}.log"
)
log_zst_path = f"{log_path}.zst"
if os.path.exists(log_path) or os.path.exists(log_zst_path):
continue
with ThreadPoolExecutor() as executor:
futures = [
executor.submit(process_logs, failure, upload) for failure in all_failures
]
if upload and utils.exists_s3(log_zst_path):
continue
# We iterate over the futures as they finish so tqdm can update the progress bar.
all(tqdm(as_completed(futures), total=len(futures)))
try:
utils.download_check_etag(
f"https://firefox-ci-tc.services.mozilla.com/api/queue/v1/task/{task_id}/runs/{retry_id}/artifacts/public/logs/live.log",
log_path,
)
utils.zstd_compress(log_path)
def process_logs(failure, upload):
task_id = failure["task_id"]
retry_id = failure["retry_id"]
os.remove(log_path)
log_path = os.path.join("data", "ci_failures_logs", f"{task_id}.{retry_id}.log")
log_zst_path = f"{log_path}.zst"
if upload:
utils.upload_s3([log_zst_path])
except requests.exceptions.HTTPError:
pass
with get_lock_for_file(log_zst_path):
if os.path.exists(log_path) or os.path.exists(log_zst_path):
return
if upload and utils.exists_s3(log_zst_path):
return
try:
utils.download_check_etag(
f"https://firefox-ci-tc.services.mozilla.com/api/queue/v1/task/{task_id}/runs/{retry_id}/artifacts/public/logs/live.log",
log_path,
)
utils.zstd_compress(log_path)
os.remove(log_path)
if upload:
utils.upload_s3([log_zst_path])
except requests.exceptions.HTTPError:
pass
def diff_failure_vs_fix(repo, failure_commits, fix_commits):

@@ -220,0 +243,0 @@ try:

@@ -1,1 +0,1 @@

0.0.610
0.0.611