Latest Threat Research:SANDWORM_MODE: Shai-Hulud-Style npm Worm Hijacks CI Workflows and Poisons AI Toolchains.Details
Socket
Book a DemoInstallSign in
Socket

adaptor

Package Overview
Dependencies
Maintainers
1
Versions
13
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

adaptor - npm Package Compare versions

Comparing version
0.1.5
to
0.1.6
+2
-2
adaptor.egg-info/PKG-INFO
Metadata-Version: 2.1
Name: adaptor
Version: 0.1.5
Version: 0.1.6
Summary: Adaptor: Objective-centric Adaptation Framework for Language Models.
Home-page: https://github.com/gaussalgo/adaptor
Author: Michal Stefanik
Author: Michal Stefanik & Adaptor Authors & Contributors
Author-email: stefanik.m@mail.muni.cz

@@ -8,0 +8,0 @@ License: MIT

@@ -7,3 +7,2 @@ import abc

from typing import List, Sequence
from sacrebleu import corpus_bleu
import torch

@@ -107,2 +106,3 @@ from transformers import PreTrainedTokenizer

def evaluate_str(self, expected_list: Sequence[str], actual_list: Sequence[str]) -> float:
from sacrebleu import corpus_bleu
return corpus_bleu(actual_list, [[e] for e in expected_list]).score

@@ -191,6 +191,2 @@ import abc

# the objective was not active in the recent progress_bar interval -> it should not be marked converged
if not any(self.evaluations_history["train"][e] for e in self.evaluators['train']):
return False
passed_patience_evals = len(self.evaluations_history["eval"][stopping_evaluator]) > patience

@@ -197,0 +193,0 @@ if not passed_patience_evals:

@@ -6,16 +6,6 @@ import abc

import torch
from transformers import (
TrainerCallback,
TrainingArguments,
TrainerState,
TrainerControl,
BatchEncoding,
)
from transformers import TrainerCallback, TrainingArguments, TrainerState, TrainerControl, BatchEncoding
from adaptor.objectives.objective_base import Objective
from adaptor.utils import (
TransformerAdaptationDataset,
StoppingStrategy,
AdaptationArguments,
)
from adaptor.utils import TransformerAdaptationDataset, StoppingStrategy, AdaptationArguments

@@ -41,8 +31,6 @@ logger = logging.getLogger()

def __init__(
self,
objectives: List[Objective],
args: AdaptationArguments,
extra_eval_objectives: Iterable[Objective] = (),
):
def __init__(self,
objectives: List[Objective],
args: AdaptationArguments,
extra_eval_objectives: Iterable[Objective] = ()):
"""

@@ -57,6 +45,4 @@ Initialises queues of objectives outputs and training flow modification parameters.

# eval objectives = train + eval => train objectives are evaluated implicitly
self.objectives = {
"train": {id(o): o for o in objectives},
"eval": {id(o): o for o in objectives + list(extra_eval_objectives)},
}
self.objectives = {"train": {id(o): o for o in objectives},
"eval": {id(o): o for o in objectives + list(extra_eval_objectives)}}

@@ -71,5 +57,3 @@ # initially, let the user know the total number of samples that will be used for training and evaluation

if not num_samples:
logger.warning(
"Make sure that you do not want to pass any %s samples!", split
)
logger.warning("Make sure that you do not want to pass any %s samples!", split)

@@ -118,12 +102,6 @@ self.objectives_outputs_queue = []

# a number of epochs per all objectives is an upper-bound of the training duration
obj_passed_epochs = [
oid
for oid in self.objectives["train"].keys()
if self._objective_passed_epochs(oid)
]
obj_passed_epochs = [oid for oid in self.objectives["train"].keys() if self._objective_passed_epochs(oid)]
if len(obj_passed_epochs) == len(self.objectives["train"]):
logger.warning(
"Scheduler reached the given maximum number of epochs for all objectives. "
"Triggering termination."
)
logger.warning("Scheduler reached the given maximum number of epochs for all objectives. "
"Triggering termination.")
return True, StoppingStrategy.ALL_OBJECTIVES_NUM_EPOCHS

@@ -133,44 +111,21 @@ # if the upper bound does not apply, check for the user-selected stopping strategy

# strategies based on objectives' convergence
if self.args.stopping_strategy in (
StoppingStrategy.FIRST_OBJECTIVE_CONVERGED,
StoppingStrategy.ALL_OBJECTIVES_CONVERGED,
):
self.converged_objectives = [
obj
for obj in self.objectives["train"].values()
if obj.is_finished(convergence_patience=self.args.stopping_patience)
]
logger.warning(
"Converged objectives: %s" % [str(o) for o in self.converged_objectives]
)
if (
self.args.stopping_strategy
== StoppingStrategy.FIRST_OBJECTIVE_CONVERGED
):
if self.args.stopping_strategy in (StoppingStrategy.FIRST_OBJECTIVE_CONVERGED,
StoppingStrategy.ALL_OBJECTIVES_CONVERGED):
self.converged_objectives = [obj for obj in self.objectives["train"].values()
if obj.is_finished(convergence_patience=self.args.stopping_patience)]
logger.warning("Converged objectives: %s" % [str(o) for o in self.converged_objectives])
if self.args.stopping_strategy == StoppingStrategy.FIRST_OBJECTIVE_CONVERGED:
return len(self.converged_objectives) > 0, self.args.stopping_strategy
else:
return (
len(self.converged_objectives) == len(self.objectives["train"]),
self.args.stopping_strategy,
)
return len(self.converged_objectives) == len(self.objectives["train"]), self.args.stopping_strategy
# strategies based on objectives' number of epochs
elif self.args.stopping_strategy in (
StoppingStrategy.FIRST_OBJECTIVE_NUM_EPOCHS,
StoppingStrategy.ALL_OBJECTIVES_NUM_EPOCHS,
):
logger.warning(
"Objectives that passed max_epochs: %s"
% [str(self.objectives["train"][o]) for o in obj_passed_epochs]
)
if (
self.args.stopping_strategy
== StoppingStrategy.FIRST_OBJECTIVE_NUM_EPOCHS
):
elif self.args.stopping_strategy in (StoppingStrategy.FIRST_OBJECTIVE_NUM_EPOCHS,
StoppingStrategy.ALL_OBJECTIVES_NUM_EPOCHS):
logger.warning("Objectives that passed max_epochs: %s" % [str(self.objectives["train"][o])
for o in obj_passed_epochs])
if self.args.stopping_strategy == StoppingStrategy.FIRST_OBJECTIVE_NUM_EPOCHS:
return len(obj_passed_epochs) > 0, self.args.stopping_strategy
else:
return (
len(obj_passed_epochs) == len(self.objectives["train"]),
self.args.stopping_strategy,
)
return len(obj_passed_epochs) == len(self.objectives["train"]), self.args.stopping_strategy

@@ -184,16 +139,6 @@ # strategies based on a number of steps

elif self.args.stopping_strategy == StoppingStrategy.ALL_OBJECTIVES_NUM_STEPS:
max_steps_objectives = [
o
for o in self.objectives["train"].values()
if o.num_steps >= self.args.max_steps
]
logger.warning(
"Objectives that passed max_steps: %s"
% [str(o) for o in max_steps_objectives]
)
max_steps_objectives = [o for o in self.objectives["train"].values() if o.num_steps >= self.args.max_steps]
logger.warning("Objectives that passed max_steps: %s" % [str(o) for o in max_steps_objectives])
return (
len(max_steps_objectives) == len(self.objectives["train"]),
StoppingStrategy.ALL_OBJECTIVES_NUM_STEPS,
)
return len(max_steps_objectives) == len(self.objectives["train"]), StoppingStrategy.ALL_OBJECTIVES_NUM_STEPS

@@ -210,10 +155,5 @@ return False, self.args.stopping_strategy

class AdaptationStoppingCallback(TrainerCallback):
def on_log(
cls,
args: TrainingArguments,
state: TrainerState,
control: TrainerControl,
**kwargs
) -> None:
"""Event called by Trainer after the given `logging_steps`."""
def on_log(cls, args: TrainingArguments, state: TrainerState, control: TrainerControl, **kwargs) -> None:
""" Event called by Trainer after the given `logging_steps`."""
self.remember_if_should_stop()

@@ -230,12 +170,8 @@

if self.should_stop:
logger.warning(
"Scheduler reached a termination condition: %s" % stopping_strategy.name
)
logger.warning("Scheduler reached a termination condition: %s" % stopping_strategy.name)
def compute_loss(
self,
logit_outputs: torch.FloatTensor,
labels: torch.Tensor,
inputs: Optional[Union[BatchEncoding, Dict[str, torch.Tensor]]] = None,
) -> torch.FloatTensor:
def compute_loss(self,
logit_outputs: torch.FloatTensor,
labels: torch.Tensor,
inputs: Optional[Union[BatchEncoding, Dict[str, torch.Tensor]]] = None) -> torch.FloatTensor:
"""

@@ -253,11 +189,7 @@ Retrieves a loss from the corresponding objective.

# the objective loss arrives aggregated into a single item
loss = self.objectives[split][oid].compute_loss(
logit_outputs, labels, inputs, split
)
loss = self.objectives[split][oid].compute_loss(logit_outputs, labels, inputs, split)
return loss
def _one_round_eval_objective_sampler(
self, objective: Objective, obj_i: int
) -> Iterator[Dict[str, Any]]:
def _one_round_eval_objective_sampler(self, objective: Objective, obj_i: int) -> Iterator[Dict[str, Any]]:
"""

@@ -276,5 +208,3 @@ Default evaluation data sampling strategy: constructs a single-round iterator

def _infinite_train_objective_sampler(
self, objective: Objective, obj_i: int
) -> Iterator[Dict[str, Any]]:
def _infinite_train_objective_sampler(self, objective: Objective, obj_i: int) -> Iterator[Dict[str, Any]]:
"""

@@ -297,5 +227,3 @@ Default training data sampling strategy: constructs infinite iterator

def _sample_objective_dataset(
self, objective: Objective, obj_i: int, split: str
) -> Iterator[Dict[str, Any]]:
def _sample_objective_dataset(self, objective: Objective, obj_i: int, split: str) -> Iterator[Dict[str, Any]]:
if split == "train":

@@ -320,10 +248,6 @@ # infinite iteration of the training resources, until the termination condition apply

# evaluation split uses simple, sequential evaluation over objectives
objective_sampler = SequentialSchedule.single_iteration_eval_sampling(
self.objectives["eval"].values()
)
objective_sampler = SequentialSchedule.single_iteration_eval_sampling(self.objectives["eval"].values())
objectives_data_samplers = {
obj: self._sample_objective_dataset(obj, obj_i, split)
for obj_i, obj in enumerate(self.objectives[split].values())
}
objectives_data_samplers = {obj: self._sample_objective_dataset(obj, obj_i, split)
for obj_i, obj in enumerate(self.objectives[split].values())}
for i, objective in enumerate(objective_sampler):

@@ -346,14 +270,7 @@ try:

"""
length_combined = int(
sum(
(o.dataset_length[split] // o.batch_size)
for o in self.objectives[split].values()
)
)
length_combined = int(sum((o.dataset_length[split] // o.batch_size) for o in self.objectives[split].values()))
if split == "train":
length_combined *= int(self.args.num_train_epochs)
return TransformerAdaptationDataset(
self._combine_datasets(split), length_combined
)
return TransformerAdaptationDataset(self._combine_datasets(split), length_combined)

@@ -376,6 +293,3 @@

for _ in range(objective.dataset_length[split]):
if (
objective in self.converged_objectives
and not self.args.log_converged_objectives
):
if objective in self.converged_objectives and not self.args.log_converged_objectives:
continue

@@ -385,5 +299,3 @@ yield objective

@staticmethod
def single_iteration_eval_sampling(
objectives: Iterable[Objective],
) -> Iterable[Objective]:
def single_iteration_eval_sampling(objectives: Iterable[Objective]) -> Iterable[Objective]:
"""

@@ -411,7 +323,4 @@ Simple finite, single iteration over all objectives. Used by base Schedule for evaluation.

for objective in self.objectives[split].values():
if (
objective in self.converged_objectives
and not self.args.log_converged_objectives
):
if objective in self.converged_objectives and not self.args.log_converged_objectives:
continue
yield objective
Metadata-Version: 2.1
Name: adaptor
Version: 0.1.5
Version: 0.1.6
Summary: Adaptor: Objective-centric Adaptation Framework for Language Models.
Home-page: https://github.com/gaussalgo/adaptor
Author: Michal Stefanik
Author: Michal Stefanik & Adaptor Authors & Contributors
Author-email: stefanik.m@mail.muni.cz

@@ -8,0 +8,0 @@ License: MIT

@@ -12,3 +12,3 @@ #!/usr/bin/env python

name="adaptor",
version='0.1.5',
version='0.1.6',
description="Adaptor: Objective-centric Adaptation Framework for Language Models.",

@@ -24,3 +24,3 @@ long_description_content_type="text/markdown",

],
author="Michal Stefanik",
author="Michal Stefanik & Adaptor Authors & Contributors",
author_email="stefanik.m@mail.muni.cz",

@@ -27,0 +27,0 @@ url="https://github.com/gaussalgo/adaptor",