adaptor - npm Package Compare versions

+15

adaptor/evaluators/language_modeling.py

		from typing import List

		import torch
		from transformers import PreTrainedTokenizer

		from .evaluator_base import EvaluatorBase
		from ..utils import Head, AdaptationDataset


		class Perplexity(EvaluatorBase):

		compatible_heads: List[Head] = [Head.MLM, Head.CLM, Head.SEQ2SEQ]

		def __call__(self, model: torch.nn.Module, tokenizer: PreTrainedTokenizer, dataset: AdaptationDataset) -> float:
		raise NotImplementedError()

+106

adaptor/evaluators/question_answering.py

		import abc
		import itertools

		from adaptor.evaluators.evaluator_base import EvaluatorBase
		from adaptor.utils import Head, AdaptationDataset
		from typing import List, Sequence
		from sacrebleu import corpus_bleu
		import torch
		from transformers import PreTrainedTokenizer


		class ExtractiveQAEvaluator(EvaluatorBase, abc.ABC):
		"""
		Base evaluator for extractive QA Evaluations.
		Providing the prediction routine and compatible head.
		"""

		compatible_heads: List[Head] = [Head.QA]

		def __call__(self,
		model: torch.nn.Module,
		tokenizer: PreTrainedTokenizer,
		dataset: AdaptationDataset) -> float:
		"""
		Extracts resulting answers and compares their BLEU score to the expected answers as a reference.
		Refer to the superclass documentation.
		"""
		expected_str = []
		actual_str = []

		for batch in dataset:
		with torch.no_grad():
		model_outputs = model(**{k: v for k, v in batch.items() if k not in ["oid", "labels",
		"start_position", "end_position"]})
		actual_start_pos = model_outputs.start_logits.argmax(-1)
		actual_end_pos = model_outputs.end_logits.argmax(-1)

		expected_str.extend(tokenizer.batch_decode(batch["labels"]))

		for i, (act_start, act_end) in enumerate(zip(actual_start_pos, actual_end_pos)):
		actual_str.append(tokenizer.decode(batch["input_ids"][i, act_start: act_end]))

		# make sure that reference is not empty:
		expected_not_empty = [" " if string == "" else string for string in expected_str]

		assert len(expected_not_empty) == len(actual_str), \
		"A number of entries does not match. Expected: %s, actual: %s" % (len(expected_not_empty), len(actual_str))

		return self.evaluate_str(list(expected_not_empty), actual_str)

		@abc.abstractmethod
		def evaluate_str(self, expected_list: Sequence[str], actual_list: Sequence[str]) -> float:
		pass


		class ExactMatch(ExtractiveQAEvaluator):
		"""
		Exact Match metric for question answering.
		Computes accuracy of retrieved answers compared to the reference.
		"""

		smaller_is_better: bool = False

		def evaluate_str(self, expected_list: Sequence[str], actual_list: Sequence[str]) -> float:
		matches = sum(e.strip() == a.strip() for e, a in zip(expected_list, actual_list))
		all = len(expected_list)

		return matches / all


		class F1ScoreForQA(ExtractiveQAEvaluator):
		"""
		Token-level F1-Score for question answering evaluation.
		Computes mean f-score over the predicted outputs by tokens segmented by whitespaces.
		"""

		smaller_is_better: bool = False

		@staticmethod
		def _per_sample_f1(expected_answers: List[str], actual_answer: str) -> float:
		expected_answers_set = set(itertools.chain(*[a.split() for a in expected_answers]))
		actual_answer_set = actual_answer.split()

		true_positives = sum(a_word in expected_answers_set for a_word in actual_answer_set)
		false_positives = sum(a_word not in expected_answers_set for a_word in actual_answer_set)
		false_negatives = sum(e_word not in actual_answer_set for e_word in expected_answers_set)

		return true_positives / (true_positives + 0.5 * (false_positives + false_negatives))

		def evaluate_str(self, expected_list: Sequence[str], actual_list: Sequence[str]) -> float:
		f_scores = [self._per_sample_f1([e], a) for e, a in zip(expected_list, actual_list)]

		mean_f_score = sum(f_scores) / len(f_scores)
		return mean_f_score


		class BLEUForQA(ExtractiveQAEvaluator):
		"""
		BLEU evaluator for question answering.
		Computes standard corpus-level BLEU score between the retrieved and expected answers
		"""

		smaller_is_better: bool = False

		def evaluate_str(self, expected_list: Sequence[str], actual_list: Sequence[str]) -> float:
		return corpus_bleu(actual_list, [[e] for e in expected_list]).score

+83

adaptor/objectives/question_answering.py

		from typing import Dict, Optional, Tuple, Union, Iterator, List

		import torch
		from transformers import DataCollatorWithPadding, BatchEncoding
		from transformers.modeling_outputs import QuestionAnsweringModelOutput

		from ..objectives.objective_base import SupervisedObjective
		from ..utils import Head


		class ExtractiveQA(SupervisedObjective):
		compatible_head: Head = Head.QA

		@staticmethod
		def _find_start_end_position(answer_ids: List[int], context_ids: List[int]) -> Tuple[int, int]:
		"""
		Returns first occurrence of subsequence (answer_ids) in the sequence (context_ids).
		If no match is found, (0, 0) is returned.
		"""
		start_positions = [i for i in range(len(context_ids) - len(answer_ids))
		if context_ids[i: i + len(answer_ids)] == answer_ids]
		if not start_positions:
		return 0, 0
		else:
		first_start_position, first_end_position = start_positions[0], start_positions[0] + len(answer_ids)
		return first_start_position, first_end_position

		def _get_inputs_iterator(self, split: str) -> Iterator:
		"""
		Batches and encodes input texts, text pairs and corresponding labels.
		:param split: Selected data split. `train` or `eval`.
		:return: Iterator over batch encodings.
		"""

		collator = DataCollatorWithPadding(self.tokenizer, pad_to_multiple_of=8,
		return_tensors="pt", padding='max_length')

		batch_features = []

		for src_text, text_pair, label in zip(*self._per_split_iterators(split)):
		out_sample = self.tokenizer(src_text, text_pair=text_pair, truncation=True, padding='max_length')
		tokenized_label = self.tokenizer(label, truncation=True, padding='max_length')
		label_wo_padding = self.tokenizer(label)
		start_position, end_position = self._find_start_end_position(label_wo_padding["input_ids"][1:-1],
		out_sample["input_ids"])
		out_sample["label"] = tokenized_label["input_ids"]
		out_sample["start_position"] = start_position
		out_sample["end_position"] = end_position

		batch_features.append(out_sample)
		if len(batch_features) == self.batch_size:
		yield collator(batch_features)
		batch_features = []

		if batch_features:
		# yield residual batch
		yield collator(batch_features)

		def _compute_loss(self,
		model_outputs: QuestionAnsweringModelOutput,
		labels: torch.LongTensor,
		inputs: Optional[Union[BatchEncoding, Dict[str, torch.Tensor]]] = None,
		attention_mask: Optional[torch.LongTensor] = None) -> torch.FloatTensor:
		"""
		Computes a loss for model outputs on a single question answering batch.
		:param model_outputs: QuestionAnsweringModelOutput.
		:param labels: Expected labels.
		:param attention_mask: Mask of the tokens to compute loss on.
		:return: loss value with grad_fn.
		"""
		loss_fct = torch.nn.CrossEntropyLoss()

		# following keys need to be present in the model output
		start_logits = model_outputs["start_logits"]
		end_logits = model_outputs["end_logits"]

		start_positions = inputs["start_position"]
		end_positions = inputs["end_position"]

		start_loss = loss_fct(start_logits, start_positions)
		end_loss = loss_fct(end_logits, end_positions)
		total_loss = (start_loss + end_loss) / 2
		return total_loss

+1

-0

.gitignore

		@@ -15,1 +15,2 @@ .venv/
		.idea
		dist

+1

-1

adaptor.egg-info/PKG-INFO

		Metadata-Version: 2.1
		Name: adaptor
		Version: 0.1.4
		Version: 0.1.5
		Summary: Adaptor: Objective-centric Adaptation Framework for Language Models.
		@@ -5,0 +5,0 @@ Home-page: https://github.com/gaussalgo/adaptor

+1

-1

adaptor.egg-info/requires.txt

		@@ -13,2 +13,2 @@ torch>=1.7
		fairseq
		protobuf
		protobuf<=3.20.1

+3

-2

adaptor.egg-info/SOURCES.txt

		@@ -18,8 +18,8 @@ .gitignore
		adaptor.egg-info/zip-safe
		adaptor/evaluators/LM.py
		adaptor/evaluators/__init__.py
		adaptor/evaluators/evaluator_base.py
		adaptor/evaluators/generative.py
		adaptor/evaluators/generative_new2.py
		adaptor/evaluators/language_modeling.py
		adaptor/evaluators/prism.py
		adaptor/evaluators/question_answering.py
		adaptor/evaluators/sequence_classification.py
		@@ -34,2 +34,3 @@ adaptor/evaluators/token_classification.py
		adaptor/objectives/objective_base.py
		adaptor/objectives/question_answering.py
		adaptor/objectives/seq2seq.py
		@@ -36,0 +37,0 @@ docs/user_flow.png

+1

-1

adaptor/adapter.py

		@@ -77,3 +77,3 @@ import logging
		else:
		loss = self.schedule.compute_loss(outputs, labels)
		loss = self.schedule.compute_loss(outputs, labels, inputs)

		@@ -80,0 +80,0 @@ mock_outputs = torch.tensor([-1, -1])

+8

-2

adaptor/lang_module.py

		import logging
		import inspect
		from typing import List, Dict, Any, Optional
		@@ -7,3 +8,3 @@
		AutoModelForTokenClassification, AutoModelForSeq2SeqLM, AutoModelForCausalLM, \
		AutoModelForMaskedLM
		AutoModelForMaskedLM, AutoModelForQuestionAnswering

		@@ -61,2 +62,4 @@ from .utils import Head
		new_head = AutoModelForMaskedLM.from_pretrained(model_name_or_path, **head_kwargs)
		elif head_type == Head.QA:
		new_head = AutoModelForQuestionAnswering.from_pretrained(model_name_or_path, **head_kwargs)
		else:
		@@ -153,6 +156,9 @@ new_head = torch.load(model_name_or_path, **head_kwargs)
		selected_head_model = self.trainable_models[str(inputs["oid"])]
		# include only correct inputs for a specific model
		list_of_model_specific_inputs = inspect.getfullargspec(selected_head_model.forward).args
		model_specific_inputs = {k: v for k, v in inputs.items() if k in list_of_model_specific_inputs}

		# including labels cause the loss to be computed twice - by objective + by HF models forward()
		# but labels are also used to infer decoder_input_ids of some models, so we need to pass it
		selected_head_output = selected_head_model(**{k: v for k, v in inputs.items() if k not in ("oid",)})
		selected_head_output = selected_head_model(**model_specific_inputs)
		# HF models produce special Output objects instead of a raw output
		@@ -159,0 +165,0 @@ logits = selected_head_output.logits if hasattr(selected_head_output, "logits") else selected_head_output

+2

-33

adaptor/objectives/classification.py

		@@ -1,5 +0,5 @@
		from typing import Dict, Iterable, Optional, Iterator, Union
		from typing import Dict, Iterable, Optional, Union

		import torch
		from transformers import DataCollatorForTokenClassification, DataCollatorWithPadding, BatchEncoding
		from transformers import DataCollatorForTokenClassification, BatchEncoding

		@@ -121,33 +121,2 @@ from ..objectives.objective_base import SupervisedObjective

		def _get_inputs_iterator(self, split: str) -> Iterator:
		"""
		Batches and encodes input texts and corresponding labels.
		:param split: Selected data split. `train` or `eval`.
		:return: Iterator over batch encodings.
		"""

		collator = DataCollatorWithPadding(self.tokenizer, pad_to_multiple_of=8)
		classifying_pairs = None

		batch_features = []
		for src_text, label in zip(*self._per_split_iterators(split)):
		# check from the first sample
		if classifying_pairs is None:
		# if the input texts are tab-separated we will tokenize them as pairs
		classifying_pairs = "\t" in src_text
		if classifying_pairs:
		text, text_pair = src_text.split("\t")
		out_sample = self.tokenizer(text, text_pair=text_pair, truncation=True)
		else:
		out_sample = self.tokenizer(src_text, truncation=True)
		out_sample["label"] = torch.tensor(self.labels_map[label])
		batch_features.append(out_sample)
		if len(batch_features) == self.batch_size:
		yield collator(batch_features)
		batch_features = []

		if batch_features:
		# yield residual batch
		yield collator(batch_features)

		def _compute_loss(self,
		@@ -154,0 +123,0 @@ logit_outputs: torch.FloatTensor,

+78

-8

adaptor/objectives/objective_base.py

		import abc
		import itertools
		import logging
		from typing import List, Union, Optional, Iterable, Tuple, Dict, Sequence, Any
		from typing import List, Union, Optional, Iterable, Tuple, Dict, Sequence, Any, Iterator

		import torch
		from tqdm import trange
		from transformers import BatchEncoding
		from transformers import BatchEncoding, DataCollatorWithPadding

		@@ -14,3 +14,2 @@ from ..evaluators.evaluator_base import EvaluatorBase


		logger = logging.getLogger()
		@@ -444,3 +443,2 @@
		class SupervisedObjective(UnsupervisedObjective, abc.ABC):

		labels_path: Optional[str] = None
		@@ -452,2 +450,8 @@ labels: Optional[List[str]] = None

		text_pair_path: Optional[str] = None
		text_pair: Optional[List[str]] = None

		val_text_pair_path: Optional[str] = None
		val_text_pair: Optional[List[str]] = None

		labels_map: Dict[str, int] = {}
		@@ -459,2 +463,4 @@
		val_labels_or_path: Optional[Union[str, List[str]]] = None,
		text_pair_or_path: Optional[Union[str, List[str]]] = None,
		val_text_pair_or_path: Optional[Union[str, List[str]]] = None,
		**kwargs):
		@@ -473,2 +479,14 @@

		if text_pair_or_path is not None:
		if isinstance(text_pair_or_path, str):
		self.text_pair_path = text_pair_or_path
		else:
		self.text_pair = text_pair_or_path

		if val_text_pair_or_path is not None:
		if isinstance(val_text_pair_or_path, str):
		self.val_text_pair_path = val_text_pair_or_path
		else:
		self.val_text_pair = val_text_pair_or_path

		# init will call register_compatible_head_model, which resolves num_labels for new head config from self.labels
		@@ -509,4 +527,41 @@ super().__init__(args, *kwargs)

		def _per_split_iterators(self, split: str) -> Tuple[Iterable[str], Iterable[str]]:
		def _get_inputs_iterator(self, split: str) -> Iterator[Union[BatchEncoding, Dict[str, torch.Tensor]]]:
		"""
		Batches and encodes input texts and corresponding labels.
		:param split: Selected data split. `train` or `eval`.
		:return: Iterator over batch encodings.
		"""

		collator = DataCollatorWithPadding(self.tokenizer, pad_to_multiple_of=8)
		classifying_pairs = None

		batch_features = []
		for source_target_tuple in zip(*self._per_split_iterators(split)):
		# check from the first sample
		if classifying_pairs is None:
		# if the input texts are tab-separated we will tokenize them as pairs
		classifying_pairs = len(source_target_tuple) > 2
		if classifying_pairs:
		assert len(source_target_tuple) == 3, "Expecting tuples of (source, source_pair, target) texts"
		if classifying_pairs:
		text, text_pair, label = source_target_tuple
		out_sample = self.tokenizer(text, text_pair=text_pair, truncation=True)
		else:
		text, label = source_target_tuple
		out_sample = self.tokenizer(text, truncation=True)

		out_sample["label"] = torch.tensor(self.labels_map[label])

		batch_features.append(out_sample)
		if len(batch_features) == self.batch_size:
		yield collator(batch_features)
		batch_features = []

		if batch_features:
		# yield residual batch
		yield collator(batch_features)

		def _per_split_iterators(self, split: str) -> Union[Tuple[Iterable[str], Iterable[str]],
		Tuple[Iterable[str], Iterable[str], Iterable[str]]]:
		"""
		Default inputs iterator for supervised objectives. Returns a pair of iterators, over input texts and labels.
		@@ -524,4 +579,10 @@ Not meant to be overriden when implementing custom data set. Instead choose to inherit either
		targets_iter = AdaptationDataset.iter_text_file_per_line(self.labels_path)
		if self.text_pair is not None:
		source_pairs_iter = iter(self.text_pair)
		elif self.text_pair_path is not None:
		source_pairs_iter = AdaptationDataset.iter_text_file_per_line(self.text_pair_path)
		else:
		source_pairs_iter = None

		elif split == "eval":

		if self.val_labels is not None:
		@@ -535,5 +596,14 @@ targets_iter = iter(self.val_labels)
		"or set Objective(val_labels) param." % self)

		if self.val_text_pair is not None:
		source_pairs_iter = iter(self.val_text_pair)
		elif self.val_text_pair_path is not None:
		source_pairs_iter = AdaptationDataset.iter_text_file_per_line(self.val_text_pair_path)
		else:
		source_pairs_iter = None
		else:
		raise ValueError("Unrecognized split: %s" % split)

		return sources_iter, targets_iter
		if source_pairs_iter is not None:
		return sources_iter, source_pairs_iter, targets_iter
		else:
		return sources_iter, targets_iter

+2

-1

adaptor/objectives/seq2seq.py

		@@ -104,3 +104,4 @@ import abc
		loss_fct = torch.nn.CrossEntropyLoss()
		lm_loss = loss_fct(lm_logit_outputs.view(-1, self.tokenizer.vocab_size), labels.view(-1))
		# vocab-agnostic loss circumvents incorrectly-set vocab_size of some models (e.g. mt5)
		lm_loss = loss_fct(lm_logit_outputs.flatten(end_dim=1), labels.flatten())

		@@ -107,0 +108,0 @@ return lm_loss

+139

-48

adaptor/schedules.py

		@@ -6,6 +6,16 @@ import abc
		import torch
		from transformers import TrainerCallback, TrainingArguments, TrainerState, TrainerControl, BatchEncoding
		from transformers import (
		TrainerCallback,
		TrainingArguments,
		TrainerState,
		TrainerControl,
		BatchEncoding,
		)

		from adaptor.objectives.objective_base import Objective
		from adaptor.utils import TransformerAdaptationDataset, StoppingStrategy, AdaptationArguments
		from adaptor.utils import (
		TransformerAdaptationDataset,
		StoppingStrategy,
		AdaptationArguments,
		)

		@@ -31,6 +41,8 @@ logger = logging.getLogger()

		def __init__(self,
		objectives: List[Objective],
		args: AdaptationArguments,
		extra_eval_objectives: Iterable[Objective] = ()):
		def __init__(
		self,
		objectives: List[Objective],
		args: AdaptationArguments,
		extra_eval_objectives: Iterable[Objective] = (),
		):
		"""
		@@ -45,4 +57,6 @@ Initialises queues of objectives outputs and training flow modification parameters.
		# eval objectives = train + eval => train objectives are evaluated implicitly
		self.objectives = {"train": {id(o): o for o in objectives},
		"eval": {id(o): o for o in objectives + list(extra_eval_objectives)}}
		self.objectives = {
		"train": {id(o): o for o in objectives},
		"eval": {id(o): o for o in objectives + list(extra_eval_objectives)},
		}

		@@ -57,3 +71,5 @@ # initially, let the user know the total number of samples that will be used for training and evaluation
		if not num_samples:
		logger.warning("Make sure that you do not want to pass any %s samples!", split)
		logger.warning(
		"Make sure that you do not want to pass any %s samples!", split
		)

		@@ -102,6 +118,12 @@ self.objectives_outputs_queue = []
		# a number of epochs per all objectives is an upper-bound of the training duration
		obj_passed_epochs = [oid for oid in self.objectives["train"].keys() if self._objective_passed_epochs(oid)]
		obj_passed_epochs = [
		oid
		for oid in self.objectives["train"].keys()
		if self._objective_passed_epochs(oid)
		]
		if len(obj_passed_epochs) == len(self.objectives["train"]):
		logger.warning("Scheduler reached the given maximum number of epochs for all objectives. "
		"Triggering termination.")
		logger.warning(
		"Scheduler reached the given maximum number of epochs for all objectives. "
		"Triggering termination."
		)
		return True, StoppingStrategy.ALL_OBJECTIVES_NUM_EPOCHS
		@@ -111,21 +133,44 @@ # if the upper bound does not apply, check for the user-selected stopping strategy
		# strategies based on objectives' convergence
		if self.args.stopping_strategy in (StoppingStrategy.FIRST_OBJECTIVE_CONVERGED,
		StoppingStrategy.ALL_OBJECTIVES_CONVERGED):
		self.converged_objectives = [obj for obj in self.objectives["train"].values()
		if obj.is_finished(convergence_patience=self.args.stopping_patience)]
		logger.warning("Converged objectives: %s" % [str(o) for o in self.converged_objectives])
		if self.args.stopping_strategy == StoppingStrategy.FIRST_OBJECTIVE_CONVERGED:
		if self.args.stopping_strategy in (
		StoppingStrategy.FIRST_OBJECTIVE_CONVERGED,
		StoppingStrategy.ALL_OBJECTIVES_CONVERGED,
		):
		self.converged_objectives = [
		obj
		for obj in self.objectives["train"].values()
		if obj.is_finished(convergence_patience=self.args.stopping_patience)
		]
		logger.warning(
		"Converged objectives: %s" % [str(o) for o in self.converged_objectives]
		)
		if (
		self.args.stopping_strategy
		== StoppingStrategy.FIRST_OBJECTIVE_CONVERGED
		):
		return len(self.converged_objectives) > 0, self.args.stopping_strategy
		else:
		return len(self.converged_objectives) == len(self.objectives["train"]), self.args.stopping_strategy
		return (
		len(self.converged_objectives) == len(self.objectives["train"]),
		self.args.stopping_strategy,
		)

		# strategies based on objectives' number of epochs
		elif self.args.stopping_strategy in (StoppingStrategy.FIRST_OBJECTIVE_NUM_EPOCHS,
		StoppingStrategy.ALL_OBJECTIVES_NUM_EPOCHS):
		logger.warning("Objectives that passed max_epochs: %s" % [str(self.objectives["train"][o])
		for o in obj_passed_epochs])
		if self.args.stopping_strategy == StoppingStrategy.FIRST_OBJECTIVE_NUM_EPOCHS:
		elif self.args.stopping_strategy in (
		StoppingStrategy.FIRST_OBJECTIVE_NUM_EPOCHS,
		StoppingStrategy.ALL_OBJECTIVES_NUM_EPOCHS,
		):
		logger.warning(
		"Objectives that passed max_epochs: %s"
		% [str(self.objectives["train"][o]) for o in obj_passed_epochs]
		)
		if (
		self.args.stopping_strategy
		== StoppingStrategy.FIRST_OBJECTIVE_NUM_EPOCHS
		):
		return len(obj_passed_epochs) > 0, self.args.stopping_strategy
		else:
		return len(obj_passed_epochs) == len(self.objectives["train"]), self.args.stopping_strategy
		return (
		len(obj_passed_epochs) == len(self.objectives["train"]),
		self.args.stopping_strategy,
		)

		@@ -139,6 +184,16 @@ # strategies based on a number of steps
		elif self.args.stopping_strategy == StoppingStrategy.ALL_OBJECTIVES_NUM_STEPS:
		max_steps_objectives = [o for o in self.objectives["train"].values() if o.num_steps >= self.args.max_steps]
		logger.warning("Objectives that passed max_steps: %s" % [str(o) for o in max_steps_objectives])
		max_steps_objectives = [
		o
		for o in self.objectives["train"].values()
		if o.num_steps >= self.args.max_steps
		]
		logger.warning(
		"Objectives that passed max_steps: %s"
		% [str(o) for o in max_steps_objectives]
		)

		return len(max_steps_objectives) == len(self.objectives["train"]), StoppingStrategy.ALL_OBJECTIVES_NUM_STEPS
		return (
		len(max_steps_objectives) == len(self.objectives["train"]),
		StoppingStrategy.ALL_OBJECTIVES_NUM_STEPS,
		)

		@@ -155,5 +210,10 @@ return False, self.args.stopping_strategy
		class AdaptationStoppingCallback(TrainerCallback):

		def on_log(cls, args: TrainingArguments, state: TrainerState, control: TrainerControl, **kwargs) -> None:
		""" Event called by Trainer after the given `logging_steps`."""
		def on_log(
		cls,
		args: TrainingArguments,
		state: TrainerState,
		control: TrainerControl,
		**kwargs
		) -> None:
		"""Event called by Trainer after the given `logging_steps`."""
		self.remember_if_should_stop()
		@@ -170,8 +230,12 @@
		if self.should_stop:
		logger.warning("Scheduler reached a termination condition: %s" % stopping_strategy.name)
		logger.warning(
		"Scheduler reached a termination condition: %s" % stopping_strategy.name
		)

		def compute_loss(self,
		logit_outputs: torch.FloatTensor,
		labels: torch.Tensor,
		inputs: Optional[Union[BatchEncoding, Dict[str, torch.Tensor]]] = None) -> torch.FloatTensor:
		def compute_loss(
		self,
		logit_outputs: torch.FloatTensor,
		labels: torch.Tensor,
		inputs: Optional[Union[BatchEncoding, Dict[str, torch.Tensor]]] = None,
		) -> torch.FloatTensor:
		"""
		@@ -189,7 +253,11 @@ Retrieves a loss from the corresponding objective.
		# the objective loss arrives aggregated into a single item
		loss = self.objectives[split][oid].compute_loss(logit_outputs, labels, inputs, split)
		loss = self.objectives[split][oid].compute_loss(
		logit_outputs, labels, inputs, split
		)

		return loss

		def _one_round_eval_objective_sampler(self, objective: Objective, obj_i: int) -> Iterator[Dict[str, Any]]:
		def _one_round_eval_objective_sampler(
		self, objective: Objective, obj_i: int
		) -> Iterator[Dict[str, Any]]:
		"""
		@@ -208,3 +276,5 @@ Default evaluation data sampling strategy: constructs a single-round iterator

		def _infinite_train_objective_sampler(self, objective: Objective, obj_i: int) -> Iterator[Dict[str, Any]]:
		def _infinite_train_objective_sampler(
		self, objective: Objective, obj_i: int
		) -> Iterator[Dict[str, Any]]:
		"""
		@@ -227,3 +297,5 @@ Default training data sampling strategy: constructs infinite iterator

		def _sample_objective_dataset(self, objective: Objective, obj_i: int, split: str) -> Iterator[Dict[str, Any]]:
		def _sample_objective_dataset(
		self, objective: Objective, obj_i: int, split: str
		) -> Iterator[Dict[str, Any]]:
		if split == "train":
		@@ -248,6 +320,10 @@ # infinite iteration of the training resources, until the termination condition apply
		# evaluation split uses simple, sequential evaluation over objectives
		objective_sampler = SequentialSchedule.single_iteration_eval_sampling(self.objectives["eval"].values())
		objective_sampler = SequentialSchedule.single_iteration_eval_sampling(
		self.objectives["eval"].values()
		)

		objectives_data_samplers = {obj: self._sample_objective_dataset(obj, obj_i, split)
		for obj_i, obj in enumerate(self.objectives[split].values())}
		objectives_data_samplers = {
		obj: self._sample_objective_dataset(obj, obj_i, split)
		for obj_i, obj in enumerate(self.objectives[split].values())
		}
		for i, objective in enumerate(objective_sampler):
		@@ -270,7 +346,14 @@ try:
		"""
		length_combined = int(sum((o.dataset_length[split] // o.batch_size) for o in self.objectives[split].values()))
		length_combined = int(
		sum(
		(o.dataset_length[split] // o.batch_size)
		for o in self.objectives[split].values()
		)
		)
		if split == "train":
		length_combined *= int(self.args.num_train_epochs)

		return TransformerAdaptationDataset(self._combine_datasets(split), length_combined)
		return TransformerAdaptationDataset(
		self._combine_datasets(split), length_combined
		)

		@@ -293,3 +376,6 @@
		for _ in range(objective.dataset_length[split]):
		if objective in self.converged_objectives and not self.args.log_converged_objectives:
		if (
		objective in self.converged_objectives
		and not self.args.log_converged_objectives
		):
		continue
		@@ -299,3 +385,5 @@ yield objective
		@staticmethod
		def single_iteration_eval_sampling(objectives: Iterable[Objective]) -> Iterable[Objective]:
		def single_iteration_eval_sampling(
		objectives: Iterable[Objective],
		) -> Iterable[Objective]:
		"""
		@@ -323,4 +411,7 @@ Simple finite, single iteration over all objectives. Used by base Schedule for evaluation.
		for objective in self.objectives[split].values():
		if objective in self.converged_objectives and not self.args.log_converged_objectives:
		if (
		objective in self.converged_objectives
		and not self.args.log_converged_objectives
		):
		continue
		yield objective

+30

-18

adaptor/utils.py

		@@ -16,3 +16,4 @@ import abc
		MLM = 6
		UNKNOWN = 7
		QA = 7
		UNKNOWN = 8

		@@ -52,4 +53,7 @@
		class TransformerAdaptationDataset(AdaptationDataset):

		def __init__(self, batch_encoding_params: Iterable[Dict[str, torch.LongTensor]], length: Optional[int] = None):
		def __init__(
		self,
		batch_encoding_params: Iterable[Dict[str, torch.LongTensor]],
		length: Optional[int] = None,
		):
		"""
		@@ -82,17 +86,20 @@ :param batch_encoding_params: Arguments to be passed to BatchEncoding (input_ids, attention_mask, labels)
		fixed_adaptation_args = {
		"per_device_train_batch_size": 1, # batching is done by Objective, no two distinct batches
		"per_device_eval_batch_size": 1, # should be present in a single infer batch
		"per_gpu_train_batch_size": None, # aggregation over multiple objectives can be done using
		"per_gpu_eval_batch_size": None, # `gradient_accumulation_steps` > 1
		"do_predict": False, # we do not want to mangle with multi-objective reports here,
		# models are separately reloadable
		"disable_tqdm": True, # scheduler takes care of top-level terminal monitoring
		"dataloader_pin_memory": False, # does not necessarily match the shapes in multi-objective training
		"per_device_train_batch_size": 1, # batching is done by Objective, no two distinct batches
		"per_device_eval_batch_size": 1, # should be present in a single infer batch
		"per_gpu_train_batch_size": None, # aggregation over multiple objectives can be done using
		"per_gpu_eval_batch_size": None, # `gradient_accumulation_steps` > 1
		"do_predict": False, # we do not want to mangle with multi-objective reports here,
		# models are separately reloadable
		"disable_tqdm": True, # scheduler takes care of top-level terminal monitoring
		"dataloader_pin_memory": False, # does not necessarily match the shapes in multi-objective training
		"remove_unused_columns": False, # from transformers 4.19.x, this would remove batches' control attributes
		}

		def __init__(self,
		stopping_strategy: StoppingStrategy,
		stopping_patience: Optional[int] = 10,
		also_log_converged_objectives: Optional[bool] = True,
		**kwargs):
		def __init__(
		self,
		stopping_strategy: StoppingStrategy,
		stopping_patience: Optional[int] = 10,
		also_log_converged_objectives: Optional[bool] = True,
		**kwargs
		):

		@@ -105,5 +112,10 @@ # novel arguments, w.r.t. original TrainingArguments
		# adjustments of the defaults expected by Scheduler
		unexpected_adjusted_args = [arg for arg in kwargs.keys() if arg in self.fixed_adaptation_args.keys()]
		unexpected_adjusted_args = [
		arg for arg in kwargs.keys() if arg in self.fixed_adaptation_args.keys()
		]
		if unexpected_adjusted_args:
		raise ValueError("You should not set these TrainingArgs for Adaptation: %s" % unexpected_adjusted_args)
		raise ValueError(
		"You should not set these TrainingArgs for Adaptation: %s"
		% unexpected_adjusted_args
		)

		@@ -110,0 +122,0 @@ # set default values to fixed args

+1

-1

PKG-INFO

		Metadata-Version: 2.1
		Name: adaptor
		Version: 0.1.4
		Version: 0.1.5
		Summary: Adaptor: Objective-centric Adaptation Framework for Language Models.
		@@ -5,0 +5,0 @@ Home-page: https://github.com/gaussalgo/adaptor

+2

-2

setup.py

		@@ -12,3 +12,3 @@ #!/usr/bin/env python
		name="adaptor",
		version='0.1.4',
		version='0.1.5',
		description="Adaptor: Objective-centric Adaptation Framework for Language Models.",
		@@ -46,3 +46,3 @@ long_description_content_type="text/markdown",
		"fairseq",
		"protobuf",
		"protobuf<=3.20.1",
		],
		@@ -49,0 +49,0 @@ "examples": [

+31

-0

tests/evaluators_test.py

		from adaptor.evaluators.generative import GenerativeEvaluator
		from adaptor.evaluators.question_answering import ExtractiveQAEvaluator
		from adaptor.evaluators.sequence_classification import SeqClassificationEvaluator
		@@ -87,2 +88,17 @@ from adaptor.evaluators.token_classification import TokenClassificationEvaluator

		def assert_qa_evaluator_logs(evaluator: ExtractiveQAEvaluator, split: str) -> None:
		from adaptor.objectives.question_answering import ExtractiveQA
		lang_module = LangModule(test_base_models["extractive_QA"])

		qa_objective = ExtractiveQA(lang_module,
		texts_or_path=paths["texts"]["QA"],
		text_pair_or_path=paths["text_pair"]["QA"],
		labels_or_path=paths["labels"]["QA"],
		batch_size=2,
		train_evaluators=[evaluator],
		val_evaluators=[evaluator])

		assert_evaluator_logs(lang_module, qa_objective, split)


		def test_bleu():
		@@ -137,1 +153,16 @@ from adaptor.evaluators.generative import BLEU
		assert_classification_evaluator_logs(SequenceAccuracy(decides_convergence=False), "train")


		def test_QA_exact_match():
		from adaptor.evaluators.question_answering import ExactMatch
		assert_qa_evaluator_logs(ExactMatch(), "train")


		def test_QA_fscore():
		from adaptor.evaluators.question_answering import F1ScoreForQA
		assert_qa_evaluator_logs(F1ScoreForQA(), "train")


		def test_QA_BLEU():
		from adaptor.evaluators.question_answering import BLEUForQA
		assert_qa_evaluator_logs(BLEUForQA(), "train")

+13

-0

tests/objectives_test.py

		@@ -8,2 +8,3 @@ from adaptor.lang_module import LangModule
		from adaptor.objectives.objective_base import Objective
		from adaptor.objectives.question_answering import ExtractiveQA
		from adaptor.objectives.seq2seq import Sequence2Sequence
		@@ -127,1 +128,13 @@ from utils import paths, test_base_models
		assert_module_objective_ok(lang_module, objective)


		def test_supervised_QA_objective():
		lang_module = LangModule(test_base_models["extractive_QA"])

		objective = ExtractiveQA(lang_module,
		texts_or_path=paths["texts"]["QA"],
		text_pair_or_path=paths["text_pair"]["QA"],
		labels_or_path=paths["labels"]["QA"],
		batch_size=4)

		assert_module_objective_ok(lang_module, objective)

+9

-3

tests/utils.py

		@@ -8,3 +8,4 @@ from adaptor.utils import AdaptationArguments, StoppingStrategy
		"translation": "mock_data/seq2seq_sources.txt",
		"unsup": "mock_data/domain_unsup.txt"
		"unsup": "mock_data/domain_unsup.txt",
		"QA": "mock_data/QA_questions.txt"
		},
		@@ -14,3 +15,7 @@ "labels": {
		"classification": "mock_data/supervised_texts_sequence_labels.txt",
		"translation": "mock_data/seq2seq_targets.txt"
		"translation": "mock_data/seq2seq_targets.txt",
		"QA": "mock_data/QA_answers.txt"
		},
		"text_pair": {
		"QA": "mock_data/QA_contexts.txt"
		}
		@@ -26,3 +31,4 @@ }
		"token_classification": "bert-base-multilingual-cased",
		"sequence_classification": "bert-base-multilingual-cased"
		"sequence_classification": "bert-base-multilingual-cased",
		"extractive_QA": "Unbabel/xlm-roberta-comet-small"
		}
		@@ -29,0 +35,0 @@

adaptor/evaluators/generative_new2.py

-15

adaptor/evaluators/LM.py

		from typing import List

		import torch
		from transformers import PreTrainedTokenizer

		from .evaluator_base import EvaluatorBase
		from ..utils import Head, AdaptationDataset


		class Perplexity(EvaluatorBase):

		compatible_heads: List[Head] = [Head.MLM, Head.CLM, Head.SEQ2SEQ]

		def __call__(self, model: torch.nn.Module, tokenizer: PreTrainedTokenizer, dataset: AdaptationDataset) -> float:
		raise NotImplementedError()

adaptor - npm Package Compare versions

Improved metrics