adaptor
Advanced tools
| from typing import List | ||
| import torch | ||
| from transformers import PreTrainedTokenizer | ||
| from .evaluator_base import EvaluatorBase | ||
| from ..utils import Head, AdaptationDataset | ||
| class Perplexity(EvaluatorBase): | ||
| compatible_heads: List[Head] = [Head.MLM, Head.CLM, Head.SEQ2SEQ] | ||
| def __call__(self, model: torch.nn.Module, tokenizer: PreTrainedTokenizer, dataset: AdaptationDataset) -> float: | ||
| raise NotImplementedError() |
| import abc | ||
| import itertools | ||
| from adaptor.evaluators.evaluator_base import EvaluatorBase | ||
| from adaptor.utils import Head, AdaptationDataset | ||
| from typing import List, Sequence | ||
| from sacrebleu import corpus_bleu | ||
| import torch | ||
| from transformers import PreTrainedTokenizer | ||
| class ExtractiveQAEvaluator(EvaluatorBase, abc.ABC): | ||
| """ | ||
| Base evaluator for extractive QA Evaluations. | ||
| Providing the prediction routine and compatible head. | ||
| """ | ||
| compatible_heads: List[Head] = [Head.QA] | ||
| def __call__(self, | ||
| model: torch.nn.Module, | ||
| tokenizer: PreTrainedTokenizer, | ||
| dataset: AdaptationDataset) -> float: | ||
| """ | ||
| Extracts resulting answers and compares their BLEU score to the expected answers as a reference. | ||
| Refer to the superclass documentation. | ||
| """ | ||
| expected_str = [] | ||
| actual_str = [] | ||
| for batch in dataset: | ||
| with torch.no_grad(): | ||
| model_outputs = model(**{k: v for k, v in batch.items() if k not in ["oid", "labels", | ||
| "start_position", "end_position"]}) | ||
| actual_start_pos = model_outputs.start_logits.argmax(-1) | ||
| actual_end_pos = model_outputs.end_logits.argmax(-1) | ||
| expected_str.extend(tokenizer.batch_decode(batch["labels"])) | ||
| for i, (act_start, act_end) in enumerate(zip(actual_start_pos, actual_end_pos)): | ||
| actual_str.append(tokenizer.decode(batch["input_ids"][i, act_start: act_end])) | ||
| # make sure that reference is not empty: | ||
| expected_not_empty = [" " if string == "" else string for string in expected_str] | ||
| assert len(expected_not_empty) == len(actual_str), \ | ||
| "A number of entries does not match. Expected: %s, actual: %s" % (len(expected_not_empty), len(actual_str)) | ||
| return self.evaluate_str(list(expected_not_empty), actual_str) | ||
| @abc.abstractmethod | ||
| def evaluate_str(self, expected_list: Sequence[str], actual_list: Sequence[str]) -> float: | ||
| pass | ||
| class ExactMatch(ExtractiveQAEvaluator): | ||
| """ | ||
| Exact Match metric for question answering. | ||
| Computes accuracy of retrieved answers compared to the reference. | ||
| """ | ||
| smaller_is_better: bool = False | ||
| def evaluate_str(self, expected_list: Sequence[str], actual_list: Sequence[str]) -> float: | ||
| matches = sum(e.strip() == a.strip() for e, a in zip(expected_list, actual_list)) | ||
| all = len(expected_list) | ||
| return matches / all | ||
| class F1ScoreForQA(ExtractiveQAEvaluator): | ||
| """ | ||
| Token-level F1-Score for question answering evaluation. | ||
| Computes mean f-score over the predicted outputs by tokens segmented by whitespaces. | ||
| """ | ||
| smaller_is_better: bool = False | ||
| @staticmethod | ||
| def _per_sample_f1(expected_answers: List[str], actual_answer: str) -> float: | ||
| expected_answers_set = set(itertools.chain(*[a.split() for a in expected_answers])) | ||
| actual_answer_set = actual_answer.split() | ||
| true_positives = sum(a_word in expected_answers_set for a_word in actual_answer_set) | ||
| false_positives = sum(a_word not in expected_answers_set for a_word in actual_answer_set) | ||
| false_negatives = sum(e_word not in actual_answer_set for e_word in expected_answers_set) | ||
| return true_positives / (true_positives + 0.5 * (false_positives + false_negatives)) | ||
| def evaluate_str(self, expected_list: Sequence[str], actual_list: Sequence[str]) -> float: | ||
| f_scores = [self._per_sample_f1([e], a) for e, a in zip(expected_list, actual_list)] | ||
| mean_f_score = sum(f_scores) / len(f_scores) | ||
| return mean_f_score | ||
| class BLEUForQA(ExtractiveQAEvaluator): | ||
| """ | ||
| BLEU evaluator for question answering. | ||
| Computes standard corpus-level BLEU score between the retrieved and expected answers | ||
| """ | ||
| smaller_is_better: bool = False | ||
| def evaluate_str(self, expected_list: Sequence[str], actual_list: Sequence[str]) -> float: | ||
| return corpus_bleu(actual_list, [[e] for e in expected_list]).score |
| from typing import Dict, Optional, Tuple, Union, Iterator, List | ||
| import torch | ||
| from transformers import DataCollatorWithPadding, BatchEncoding | ||
| from transformers.modeling_outputs import QuestionAnsweringModelOutput | ||
| from ..objectives.objective_base import SupervisedObjective | ||
| from ..utils import Head | ||
| class ExtractiveQA(SupervisedObjective): | ||
| compatible_head: Head = Head.QA | ||
| @staticmethod | ||
| def _find_start_end_position(answer_ids: List[int], context_ids: List[int]) -> Tuple[int, int]: | ||
| """ | ||
| Returns first occurrence of subsequence (answer_ids) in the sequence (context_ids). | ||
| If no match is found, (0, 0) is returned. | ||
| """ | ||
| start_positions = [i for i in range(len(context_ids) - len(answer_ids)) | ||
| if context_ids[i: i + len(answer_ids)] == answer_ids] | ||
| if not start_positions: | ||
| return 0, 0 | ||
| else: | ||
| first_start_position, first_end_position = start_positions[0], start_positions[0] + len(answer_ids) | ||
| return first_start_position, first_end_position | ||
| def _get_inputs_iterator(self, split: str) -> Iterator: | ||
| """ | ||
| Batches and encodes input texts, text pairs and corresponding labels. | ||
| :param split: Selected data split. `train` or `eval`. | ||
| :return: Iterator over batch encodings. | ||
| """ | ||
| collator = DataCollatorWithPadding(self.tokenizer, pad_to_multiple_of=8, | ||
| return_tensors="pt", padding='max_length') | ||
| batch_features = [] | ||
| for src_text, text_pair, label in zip(*self._per_split_iterators(split)): | ||
| out_sample = self.tokenizer(src_text, text_pair=text_pair, truncation=True, padding='max_length') | ||
| tokenized_label = self.tokenizer(label, truncation=True, padding='max_length') | ||
| label_wo_padding = self.tokenizer(label) | ||
| start_position, end_position = self._find_start_end_position(label_wo_padding["input_ids"][1:-1], | ||
| out_sample["input_ids"]) | ||
| out_sample["label"] = tokenized_label["input_ids"] | ||
| out_sample["start_position"] = start_position | ||
| out_sample["end_position"] = end_position | ||
| batch_features.append(out_sample) | ||
| if len(batch_features) == self.batch_size: | ||
| yield collator(batch_features) | ||
| batch_features = [] | ||
| if batch_features: | ||
| # yield residual batch | ||
| yield collator(batch_features) | ||
| def _compute_loss(self, | ||
| model_outputs: QuestionAnsweringModelOutput, | ||
| labels: torch.LongTensor, | ||
| inputs: Optional[Union[BatchEncoding, Dict[str, torch.Tensor]]] = None, | ||
| attention_mask: Optional[torch.LongTensor] = None) -> torch.FloatTensor: | ||
| """ | ||
| Computes a loss for model outputs on a single question answering batch. | ||
| :param model_outputs: QuestionAnsweringModelOutput. | ||
| :param labels: Expected labels. | ||
| :param attention_mask: Mask of the tokens to compute loss on. | ||
| :return: loss value with grad_fn. | ||
| """ | ||
| loss_fct = torch.nn.CrossEntropyLoss() | ||
| # following keys need to be present in the model output | ||
| start_logits = model_outputs["start_logits"] | ||
| end_logits = model_outputs["end_logits"] | ||
| start_positions = inputs["start_position"] | ||
| end_positions = inputs["end_position"] | ||
| start_loss = loss_fct(start_logits, start_positions) | ||
| end_loss = loss_fct(end_logits, end_positions) | ||
| total_loss = (start_loss + end_loss) / 2 | ||
| return total_loss |
+1
-0
@@ -15,1 +15,2 @@ .venv/ | ||
| .idea | ||
| dist |
| Metadata-Version: 2.1 | ||
| Name: adaptor | ||
| Version: 0.1.4 | ||
| Version: 0.1.5 | ||
| Summary: Adaptor: Objective-centric Adaptation Framework for Language Models. | ||
@@ -5,0 +5,0 @@ Home-page: https://github.com/gaussalgo/adaptor |
@@ -13,2 +13,2 @@ torch>=1.7 | ||
| fairseq | ||
| protobuf | ||
| protobuf<=3.20.1 |
@@ -18,8 +18,8 @@ .gitignore | ||
| adaptor.egg-info/zip-safe | ||
| adaptor/evaluators/LM.py | ||
| adaptor/evaluators/__init__.py | ||
| adaptor/evaluators/evaluator_base.py | ||
| adaptor/evaluators/generative.py | ||
| adaptor/evaluators/generative_new2.py | ||
| adaptor/evaluators/language_modeling.py | ||
| adaptor/evaluators/prism.py | ||
| adaptor/evaluators/question_answering.py | ||
| adaptor/evaluators/sequence_classification.py | ||
@@ -34,2 +34,3 @@ adaptor/evaluators/token_classification.py | ||
| adaptor/objectives/objective_base.py | ||
| adaptor/objectives/question_answering.py | ||
| adaptor/objectives/seq2seq.py | ||
@@ -36,0 +37,0 @@ docs/user_flow.png |
@@ -77,3 +77,3 @@ import logging | ||
| else: | ||
| loss = self.schedule.compute_loss(outputs, labels) | ||
| loss = self.schedule.compute_loss(outputs, labels, inputs) | ||
@@ -80,0 +80,0 @@ mock_outputs = torch.tensor([-1, -1]) |
| import logging | ||
| import inspect | ||
| from typing import List, Dict, Any, Optional | ||
@@ -7,3 +8,3 @@ | ||
| AutoModelForTokenClassification, AutoModelForSeq2SeqLM, AutoModelForCausalLM, \ | ||
| AutoModelForMaskedLM | ||
| AutoModelForMaskedLM, AutoModelForQuestionAnswering | ||
@@ -61,2 +62,4 @@ from .utils import Head | ||
| new_head = AutoModelForMaskedLM.from_pretrained(model_name_or_path, **head_kwargs) | ||
| elif head_type == Head.QA: | ||
| new_head = AutoModelForQuestionAnswering.from_pretrained(model_name_or_path, **head_kwargs) | ||
| else: | ||
@@ -153,6 +156,9 @@ new_head = torch.load(model_name_or_path, **head_kwargs) | ||
| selected_head_model = self.trainable_models[str(inputs["oid"])] | ||
| # include only correct inputs for a specific model | ||
| list_of_model_specific_inputs = inspect.getfullargspec(selected_head_model.forward).args | ||
| model_specific_inputs = {k: v for k, v in inputs.items() if k in list_of_model_specific_inputs} | ||
| # including labels cause the loss to be computed twice - by objective + by HF models forward() | ||
| # but labels are also used to infer decoder_input_ids of some models, so we need to pass it | ||
| selected_head_output = selected_head_model(**{k: v for k, v in inputs.items() if k not in ("oid",)}) | ||
| selected_head_output = selected_head_model(**model_specific_inputs) | ||
| # HF models produce special Output objects instead of a raw output | ||
@@ -159,0 +165,0 @@ logits = selected_head_output.logits if hasattr(selected_head_output, "logits") else selected_head_output |
@@ -1,5 +0,5 @@ | ||
| from typing import Dict, Iterable, Optional, Iterator, Union | ||
| from typing import Dict, Iterable, Optional, Union | ||
| import torch | ||
| from transformers import DataCollatorForTokenClassification, DataCollatorWithPadding, BatchEncoding | ||
| from transformers import DataCollatorForTokenClassification, BatchEncoding | ||
@@ -121,33 +121,2 @@ from ..objectives.objective_base import SupervisedObjective | ||
| def _get_inputs_iterator(self, split: str) -> Iterator: | ||
| """ | ||
| Batches and encodes input texts and corresponding labels. | ||
| :param split: Selected data split. `train` or `eval`. | ||
| :return: Iterator over batch encodings. | ||
| """ | ||
| collator = DataCollatorWithPadding(self.tokenizer, pad_to_multiple_of=8) | ||
| classifying_pairs = None | ||
| batch_features = [] | ||
| for src_text, label in zip(*self._per_split_iterators(split)): | ||
| # check from the first sample | ||
| if classifying_pairs is None: | ||
| # if the input texts are tab-separated we will tokenize them as pairs | ||
| classifying_pairs = "\t" in src_text | ||
| if classifying_pairs: | ||
| text, text_pair = src_text.split("\t") | ||
| out_sample = self.tokenizer(text, text_pair=text_pair, truncation=True) | ||
| else: | ||
| out_sample = self.tokenizer(src_text, truncation=True) | ||
| out_sample["label"] = torch.tensor(self.labels_map[label]) | ||
| batch_features.append(out_sample) | ||
| if len(batch_features) == self.batch_size: | ||
| yield collator(batch_features) | ||
| batch_features = [] | ||
| if batch_features: | ||
| # yield residual batch | ||
| yield collator(batch_features) | ||
| def _compute_loss(self, | ||
@@ -154,0 +123,0 @@ logit_outputs: torch.FloatTensor, |
| import abc | ||
| import itertools | ||
| import logging | ||
| from typing import List, Union, Optional, Iterable, Tuple, Dict, Sequence, Any | ||
| from typing import List, Union, Optional, Iterable, Tuple, Dict, Sequence, Any, Iterator | ||
| import torch | ||
| from tqdm import trange | ||
| from transformers import BatchEncoding | ||
| from transformers import BatchEncoding, DataCollatorWithPadding | ||
@@ -14,3 +14,2 @@ from ..evaluators.evaluator_base import EvaluatorBase | ||
| logger = logging.getLogger() | ||
@@ -444,3 +443,2 @@ | ||
| class SupervisedObjective(UnsupervisedObjective, abc.ABC): | ||
| labels_path: Optional[str] = None | ||
@@ -452,2 +450,8 @@ labels: Optional[List[str]] = None | ||
| text_pair_path: Optional[str] = None | ||
| text_pair: Optional[List[str]] = None | ||
| val_text_pair_path: Optional[str] = None | ||
| val_text_pair: Optional[List[str]] = None | ||
| labels_map: Dict[str, int] = {} | ||
@@ -459,2 +463,4 @@ | ||
| val_labels_or_path: Optional[Union[str, List[str]]] = None, | ||
| text_pair_or_path: Optional[Union[str, List[str]]] = None, | ||
| val_text_pair_or_path: Optional[Union[str, List[str]]] = None, | ||
| **kwargs): | ||
@@ -473,2 +479,14 @@ | ||
| if text_pair_or_path is not None: | ||
| if isinstance(text_pair_or_path, str): | ||
| self.text_pair_path = text_pair_or_path | ||
| else: | ||
| self.text_pair = text_pair_or_path | ||
| if val_text_pair_or_path is not None: | ||
| if isinstance(val_text_pair_or_path, str): | ||
| self.val_text_pair_path = val_text_pair_or_path | ||
| else: | ||
| self.val_text_pair = val_text_pair_or_path | ||
| # init will call register_compatible_head_model, which resolves num_labels for new head config from self.labels | ||
@@ -509,4 +527,41 @@ super().__init__(*args, **kwargs) | ||
| def _per_split_iterators(self, split: str) -> Tuple[Iterable[str], Iterable[str]]: | ||
| def _get_inputs_iterator(self, split: str) -> Iterator[Union[BatchEncoding, Dict[str, torch.Tensor]]]: | ||
| """ | ||
| Batches and encodes input texts and corresponding labels. | ||
| :param split: Selected data split. `train` or `eval`. | ||
| :return: Iterator over batch encodings. | ||
| """ | ||
| collator = DataCollatorWithPadding(self.tokenizer, pad_to_multiple_of=8) | ||
| classifying_pairs = None | ||
| batch_features = [] | ||
| for source_target_tuple in zip(*self._per_split_iterators(split)): | ||
| # check from the first sample | ||
| if classifying_pairs is None: | ||
| # if the input texts are tab-separated we will tokenize them as pairs | ||
| classifying_pairs = len(source_target_tuple) > 2 | ||
| if classifying_pairs: | ||
| assert len(source_target_tuple) == 3, "Expecting tuples of (source, source_pair, target) texts" | ||
| if classifying_pairs: | ||
| text, text_pair, label = source_target_tuple | ||
| out_sample = self.tokenizer(text, text_pair=text_pair, truncation=True) | ||
| else: | ||
| text, label = source_target_tuple | ||
| out_sample = self.tokenizer(text, truncation=True) | ||
| out_sample["label"] = torch.tensor(self.labels_map[label]) | ||
| batch_features.append(out_sample) | ||
| if len(batch_features) == self.batch_size: | ||
| yield collator(batch_features) | ||
| batch_features = [] | ||
| if batch_features: | ||
| # yield residual batch | ||
| yield collator(batch_features) | ||
| def _per_split_iterators(self, split: str) -> Union[Tuple[Iterable[str], Iterable[str]], | ||
| Tuple[Iterable[str], Iterable[str], Iterable[str]]]: | ||
| """ | ||
| Default inputs iterator for supervised objectives. Returns a pair of iterators, over input texts and labels. | ||
@@ -524,4 +579,10 @@ Not meant to be overriden when implementing custom data set. Instead choose to inherit either | ||
| targets_iter = AdaptationDataset.iter_text_file_per_line(self.labels_path) | ||
| if self.text_pair is not None: | ||
| source_pairs_iter = iter(self.text_pair) | ||
| elif self.text_pair_path is not None: | ||
| source_pairs_iter = AdaptationDataset.iter_text_file_per_line(self.text_pair_path) | ||
| else: | ||
| source_pairs_iter = None | ||
| elif split == "eval": | ||
| if self.val_labels is not None: | ||
@@ -535,5 +596,14 @@ targets_iter = iter(self.val_labels) | ||
| "or set Objective(val_labels) param." % self) | ||
| if self.val_text_pair is not None: | ||
| source_pairs_iter = iter(self.val_text_pair) | ||
| elif self.val_text_pair_path is not None: | ||
| source_pairs_iter = AdaptationDataset.iter_text_file_per_line(self.val_text_pair_path) | ||
| else: | ||
| source_pairs_iter = None | ||
| else: | ||
| raise ValueError("Unrecognized split: %s" % split) | ||
| return sources_iter, targets_iter | ||
| if source_pairs_iter is not None: | ||
| return sources_iter, source_pairs_iter, targets_iter | ||
| else: | ||
| return sources_iter, targets_iter |
@@ -104,3 +104,4 @@ import abc | ||
| loss_fct = torch.nn.CrossEntropyLoss() | ||
| lm_loss = loss_fct(lm_logit_outputs.view(-1, self.tokenizer.vocab_size), labels.view(-1)) | ||
| # vocab-agnostic loss circumvents incorrectly-set vocab_size of some models (e.g. mt5) | ||
| lm_loss = loss_fct(lm_logit_outputs.flatten(end_dim=1), labels.flatten()) | ||
@@ -107,0 +108,0 @@ return lm_loss |
+139
-48
@@ -6,6 +6,16 @@ import abc | ||
| import torch | ||
| from transformers import TrainerCallback, TrainingArguments, TrainerState, TrainerControl, BatchEncoding | ||
| from transformers import ( | ||
| TrainerCallback, | ||
| TrainingArguments, | ||
| TrainerState, | ||
| TrainerControl, | ||
| BatchEncoding, | ||
| ) | ||
| from adaptor.objectives.objective_base import Objective | ||
| from adaptor.utils import TransformerAdaptationDataset, StoppingStrategy, AdaptationArguments | ||
| from adaptor.utils import ( | ||
| TransformerAdaptationDataset, | ||
| StoppingStrategy, | ||
| AdaptationArguments, | ||
| ) | ||
@@ -31,6 +41,8 @@ logger = logging.getLogger() | ||
| def __init__(self, | ||
| objectives: List[Objective], | ||
| args: AdaptationArguments, | ||
| extra_eval_objectives: Iterable[Objective] = ()): | ||
| def __init__( | ||
| self, | ||
| objectives: List[Objective], | ||
| args: AdaptationArguments, | ||
| extra_eval_objectives: Iterable[Objective] = (), | ||
| ): | ||
| """ | ||
@@ -45,4 +57,6 @@ Initialises queues of objectives outputs and training flow modification parameters. | ||
| # eval objectives = train + eval => train objectives are evaluated implicitly | ||
| self.objectives = {"train": {id(o): o for o in objectives}, | ||
| "eval": {id(o): o for o in objectives + list(extra_eval_objectives)}} | ||
| self.objectives = { | ||
| "train": {id(o): o for o in objectives}, | ||
| "eval": {id(o): o for o in objectives + list(extra_eval_objectives)}, | ||
| } | ||
@@ -57,3 +71,5 @@ # initially, let the user know the total number of samples that will be used for training and evaluation | ||
| if not num_samples: | ||
| logger.warning("Make sure that you do not want to pass any %s samples!", split) | ||
| logger.warning( | ||
| "Make sure that you do not want to pass any %s samples!", split | ||
| ) | ||
@@ -102,6 +118,12 @@ self.objectives_outputs_queue = [] | ||
| # a number of epochs per all objectives is an upper-bound of the training duration | ||
| obj_passed_epochs = [oid for oid in self.objectives["train"].keys() if self._objective_passed_epochs(oid)] | ||
| obj_passed_epochs = [ | ||
| oid | ||
| for oid in self.objectives["train"].keys() | ||
| if self._objective_passed_epochs(oid) | ||
| ] | ||
| if len(obj_passed_epochs) == len(self.objectives["train"]): | ||
| logger.warning("Scheduler reached the given maximum number of epochs for all objectives. " | ||
| "Triggering termination.") | ||
| logger.warning( | ||
| "Scheduler reached the given maximum number of epochs for all objectives. " | ||
| "Triggering termination." | ||
| ) | ||
| return True, StoppingStrategy.ALL_OBJECTIVES_NUM_EPOCHS | ||
@@ -111,21 +133,44 @@ # if the upper bound does not apply, check for the user-selected stopping strategy | ||
| # strategies based on objectives' convergence | ||
| if self.args.stopping_strategy in (StoppingStrategy.FIRST_OBJECTIVE_CONVERGED, | ||
| StoppingStrategy.ALL_OBJECTIVES_CONVERGED): | ||
| self.converged_objectives = [obj for obj in self.objectives["train"].values() | ||
| if obj.is_finished(convergence_patience=self.args.stopping_patience)] | ||
| logger.warning("Converged objectives: %s" % [str(o) for o in self.converged_objectives]) | ||
| if self.args.stopping_strategy == StoppingStrategy.FIRST_OBJECTIVE_CONVERGED: | ||
| if self.args.stopping_strategy in ( | ||
| StoppingStrategy.FIRST_OBJECTIVE_CONVERGED, | ||
| StoppingStrategy.ALL_OBJECTIVES_CONVERGED, | ||
| ): | ||
| self.converged_objectives = [ | ||
| obj | ||
| for obj in self.objectives["train"].values() | ||
| if obj.is_finished(convergence_patience=self.args.stopping_patience) | ||
| ] | ||
| logger.warning( | ||
| "Converged objectives: %s" % [str(o) for o in self.converged_objectives] | ||
| ) | ||
| if ( | ||
| self.args.stopping_strategy | ||
| == StoppingStrategy.FIRST_OBJECTIVE_CONVERGED | ||
| ): | ||
| return len(self.converged_objectives) > 0, self.args.stopping_strategy | ||
| else: | ||
| return len(self.converged_objectives) == len(self.objectives["train"]), self.args.stopping_strategy | ||
| return ( | ||
| len(self.converged_objectives) == len(self.objectives["train"]), | ||
| self.args.stopping_strategy, | ||
| ) | ||
| # strategies based on objectives' number of epochs | ||
| elif self.args.stopping_strategy in (StoppingStrategy.FIRST_OBJECTIVE_NUM_EPOCHS, | ||
| StoppingStrategy.ALL_OBJECTIVES_NUM_EPOCHS): | ||
| logger.warning("Objectives that passed max_epochs: %s" % [str(self.objectives["train"][o]) | ||
| for o in obj_passed_epochs]) | ||
| if self.args.stopping_strategy == StoppingStrategy.FIRST_OBJECTIVE_NUM_EPOCHS: | ||
| elif self.args.stopping_strategy in ( | ||
| StoppingStrategy.FIRST_OBJECTIVE_NUM_EPOCHS, | ||
| StoppingStrategy.ALL_OBJECTIVES_NUM_EPOCHS, | ||
| ): | ||
| logger.warning( | ||
| "Objectives that passed max_epochs: %s" | ||
| % [str(self.objectives["train"][o]) for o in obj_passed_epochs] | ||
| ) | ||
| if ( | ||
| self.args.stopping_strategy | ||
| == StoppingStrategy.FIRST_OBJECTIVE_NUM_EPOCHS | ||
| ): | ||
| return len(obj_passed_epochs) > 0, self.args.stopping_strategy | ||
| else: | ||
| return len(obj_passed_epochs) == len(self.objectives["train"]), self.args.stopping_strategy | ||
| return ( | ||
| len(obj_passed_epochs) == len(self.objectives["train"]), | ||
| self.args.stopping_strategy, | ||
| ) | ||
@@ -139,6 +184,16 @@ # strategies based on a number of steps | ||
| elif self.args.stopping_strategy == StoppingStrategy.ALL_OBJECTIVES_NUM_STEPS: | ||
| max_steps_objectives = [o for o in self.objectives["train"].values() if o.num_steps >= self.args.max_steps] | ||
| logger.warning("Objectives that passed max_steps: %s" % [str(o) for o in max_steps_objectives]) | ||
| max_steps_objectives = [ | ||
| o | ||
| for o in self.objectives["train"].values() | ||
| if o.num_steps >= self.args.max_steps | ||
| ] | ||
| logger.warning( | ||
| "Objectives that passed max_steps: %s" | ||
| % [str(o) for o in max_steps_objectives] | ||
| ) | ||
| return len(max_steps_objectives) == len(self.objectives["train"]), StoppingStrategy.ALL_OBJECTIVES_NUM_STEPS | ||
| return ( | ||
| len(max_steps_objectives) == len(self.objectives["train"]), | ||
| StoppingStrategy.ALL_OBJECTIVES_NUM_STEPS, | ||
| ) | ||
@@ -155,5 +210,10 @@ return False, self.args.stopping_strategy | ||
| class AdaptationStoppingCallback(TrainerCallback): | ||
| def on_log(cls, args: TrainingArguments, state: TrainerState, control: TrainerControl, **kwargs) -> None: | ||
| """ Event called by Trainer after the given `logging_steps`.""" | ||
| def on_log( | ||
| cls, | ||
| args: TrainingArguments, | ||
| state: TrainerState, | ||
| control: TrainerControl, | ||
| **kwargs | ||
| ) -> None: | ||
| """Event called by Trainer after the given `logging_steps`.""" | ||
| self.remember_if_should_stop() | ||
@@ -170,8 +230,12 @@ | ||
| if self.should_stop: | ||
| logger.warning("Scheduler reached a termination condition: %s" % stopping_strategy.name) | ||
| logger.warning( | ||
| "Scheduler reached a termination condition: %s" % stopping_strategy.name | ||
| ) | ||
| def compute_loss(self, | ||
| logit_outputs: torch.FloatTensor, | ||
| labels: torch.Tensor, | ||
| inputs: Optional[Union[BatchEncoding, Dict[str, torch.Tensor]]] = None) -> torch.FloatTensor: | ||
| def compute_loss( | ||
| self, | ||
| logit_outputs: torch.FloatTensor, | ||
| labels: torch.Tensor, | ||
| inputs: Optional[Union[BatchEncoding, Dict[str, torch.Tensor]]] = None, | ||
| ) -> torch.FloatTensor: | ||
| """ | ||
@@ -189,7 +253,11 @@ Retrieves a loss from the corresponding objective. | ||
| # the objective loss arrives aggregated into a single item | ||
| loss = self.objectives[split][oid].compute_loss(logit_outputs, labels, inputs, split) | ||
| loss = self.objectives[split][oid].compute_loss( | ||
| logit_outputs, labels, inputs, split | ||
| ) | ||
| return loss | ||
| def _one_round_eval_objective_sampler(self, objective: Objective, obj_i: int) -> Iterator[Dict[str, Any]]: | ||
| def _one_round_eval_objective_sampler( | ||
| self, objective: Objective, obj_i: int | ||
| ) -> Iterator[Dict[str, Any]]: | ||
| """ | ||
@@ -208,3 +276,5 @@ Default evaluation data sampling strategy: constructs a single-round iterator | ||
| def _infinite_train_objective_sampler(self, objective: Objective, obj_i: int) -> Iterator[Dict[str, Any]]: | ||
| def _infinite_train_objective_sampler( | ||
| self, objective: Objective, obj_i: int | ||
| ) -> Iterator[Dict[str, Any]]: | ||
| """ | ||
@@ -227,3 +297,5 @@ Default training data sampling strategy: constructs infinite iterator | ||
| def _sample_objective_dataset(self, objective: Objective, obj_i: int, split: str) -> Iterator[Dict[str, Any]]: | ||
| def _sample_objective_dataset( | ||
| self, objective: Objective, obj_i: int, split: str | ||
| ) -> Iterator[Dict[str, Any]]: | ||
| if split == "train": | ||
@@ -248,6 +320,10 @@ # infinite iteration of the training resources, until the termination condition apply | ||
| # evaluation split uses simple, sequential evaluation over objectives | ||
| objective_sampler = SequentialSchedule.single_iteration_eval_sampling(self.objectives["eval"].values()) | ||
| objective_sampler = SequentialSchedule.single_iteration_eval_sampling( | ||
| self.objectives["eval"].values() | ||
| ) | ||
| objectives_data_samplers = {obj: self._sample_objective_dataset(obj, obj_i, split) | ||
| for obj_i, obj in enumerate(self.objectives[split].values())} | ||
| objectives_data_samplers = { | ||
| obj: self._sample_objective_dataset(obj, obj_i, split) | ||
| for obj_i, obj in enumerate(self.objectives[split].values()) | ||
| } | ||
| for i, objective in enumerate(objective_sampler): | ||
@@ -270,7 +346,14 @@ try: | ||
| """ | ||
| length_combined = int(sum((o.dataset_length[split] // o.batch_size) for o in self.objectives[split].values())) | ||
| length_combined = int( | ||
| sum( | ||
| (o.dataset_length[split] // o.batch_size) | ||
| for o in self.objectives[split].values() | ||
| ) | ||
| ) | ||
| if split == "train": | ||
| length_combined *= int(self.args.num_train_epochs) | ||
| return TransformerAdaptationDataset(self._combine_datasets(split), length_combined) | ||
| return TransformerAdaptationDataset( | ||
| self._combine_datasets(split), length_combined | ||
| ) | ||
@@ -293,3 +376,6 @@ | ||
| for _ in range(objective.dataset_length[split]): | ||
| if objective in self.converged_objectives and not self.args.log_converged_objectives: | ||
| if ( | ||
| objective in self.converged_objectives | ||
| and not self.args.log_converged_objectives | ||
| ): | ||
| continue | ||
@@ -299,3 +385,5 @@ yield objective | ||
| @staticmethod | ||
| def single_iteration_eval_sampling(objectives: Iterable[Objective]) -> Iterable[Objective]: | ||
| def single_iteration_eval_sampling( | ||
| objectives: Iterable[Objective], | ||
| ) -> Iterable[Objective]: | ||
| """ | ||
@@ -323,4 +411,7 @@ Simple finite, single iteration over all objectives. Used by base Schedule for evaluation. | ||
| for objective in self.objectives[split].values(): | ||
| if objective in self.converged_objectives and not self.args.log_converged_objectives: | ||
| if ( | ||
| objective in self.converged_objectives | ||
| and not self.args.log_converged_objectives | ||
| ): | ||
| continue | ||
| yield objective |
+30
-18
@@ -16,3 +16,4 @@ import abc | ||
| MLM = 6 | ||
| UNKNOWN = 7 | ||
| QA = 7 | ||
| UNKNOWN = 8 | ||
@@ -52,4 +53,7 @@ | ||
| class TransformerAdaptationDataset(AdaptationDataset): | ||
| def __init__(self, batch_encoding_params: Iterable[Dict[str, torch.LongTensor]], length: Optional[int] = None): | ||
| def __init__( | ||
| self, | ||
| batch_encoding_params: Iterable[Dict[str, torch.LongTensor]], | ||
| length: Optional[int] = None, | ||
| ): | ||
| """ | ||
@@ -82,17 +86,20 @@ :param batch_encoding_params: Arguments to be passed to BatchEncoding (input_ids, attention_mask, labels) | ||
| fixed_adaptation_args = { | ||
| "per_device_train_batch_size": 1, # batching is done by Objective, no two distinct batches | ||
| "per_device_eval_batch_size": 1, # should be present in a single infer batch | ||
| "per_gpu_train_batch_size": None, # aggregation over multiple objectives can be done using | ||
| "per_gpu_eval_batch_size": None, # `gradient_accumulation_steps` > 1 | ||
| "do_predict": False, # we do not want to mangle with multi-objective reports here, | ||
| # models are separately reloadable | ||
| "disable_tqdm": True, # scheduler takes care of top-level terminal monitoring | ||
| "dataloader_pin_memory": False, # does not necessarily match the shapes in multi-objective training | ||
| "per_device_train_batch_size": 1, # batching is done by Objective, no two distinct batches | ||
| "per_device_eval_batch_size": 1, # should be present in a single infer batch | ||
| "per_gpu_train_batch_size": None, # aggregation over multiple objectives can be done using | ||
| "per_gpu_eval_batch_size": None, # `gradient_accumulation_steps` > 1 | ||
| "do_predict": False, # we do not want to mangle with multi-objective reports here, | ||
| # models are separately reloadable | ||
| "disable_tqdm": True, # scheduler takes care of top-level terminal monitoring | ||
| "dataloader_pin_memory": False, # does not necessarily match the shapes in multi-objective training | ||
| "remove_unused_columns": False, # from transformers 4.19.x, this would remove batches' control attributes | ||
| } | ||
| def __init__(self, | ||
| stopping_strategy: StoppingStrategy, | ||
| stopping_patience: Optional[int] = 10, | ||
| also_log_converged_objectives: Optional[bool] = True, | ||
| **kwargs): | ||
| def __init__( | ||
| self, | ||
| stopping_strategy: StoppingStrategy, | ||
| stopping_patience: Optional[int] = 10, | ||
| also_log_converged_objectives: Optional[bool] = True, | ||
| **kwargs | ||
| ): | ||
@@ -105,5 +112,10 @@ # novel arguments, w.r.t. original TrainingArguments | ||
| # adjustments of the defaults expected by Scheduler | ||
| unexpected_adjusted_args = [arg for arg in kwargs.keys() if arg in self.fixed_adaptation_args.keys()] | ||
| unexpected_adjusted_args = [ | ||
| arg for arg in kwargs.keys() if arg in self.fixed_adaptation_args.keys() | ||
| ] | ||
| if unexpected_adjusted_args: | ||
| raise ValueError("You should not set these TrainingArgs for Adaptation: %s" % unexpected_adjusted_args) | ||
| raise ValueError( | ||
| "You should not set these TrainingArgs for Adaptation: %s" | ||
| % unexpected_adjusted_args | ||
| ) | ||
@@ -110,0 +122,0 @@ # set default values to fixed args |
+1
-1
| Metadata-Version: 2.1 | ||
| Name: adaptor | ||
| Version: 0.1.4 | ||
| Version: 0.1.5 | ||
| Summary: Adaptor: Objective-centric Adaptation Framework for Language Models. | ||
@@ -5,0 +5,0 @@ Home-page: https://github.com/gaussalgo/adaptor |
+2
-2
@@ -12,3 +12,3 @@ #!/usr/bin/env python | ||
| name="adaptor", | ||
| version='0.1.4', | ||
| version='0.1.5', | ||
| description="Adaptor: Objective-centric Adaptation Framework for Language Models.", | ||
@@ -46,3 +46,3 @@ long_description_content_type="text/markdown", | ||
| "fairseq", | ||
| "protobuf", | ||
| "protobuf<=3.20.1", | ||
| ], | ||
@@ -49,0 +49,0 @@ "examples": [ |
| from adaptor.evaluators.generative import GenerativeEvaluator | ||
| from adaptor.evaluators.question_answering import ExtractiveQAEvaluator | ||
| from adaptor.evaluators.sequence_classification import SeqClassificationEvaluator | ||
@@ -87,2 +88,17 @@ from adaptor.evaluators.token_classification import TokenClassificationEvaluator | ||
| def assert_qa_evaluator_logs(evaluator: ExtractiveQAEvaluator, split: str) -> None: | ||
| from adaptor.objectives.question_answering import ExtractiveQA | ||
| lang_module = LangModule(test_base_models["extractive_QA"]) | ||
| qa_objective = ExtractiveQA(lang_module, | ||
| texts_or_path=paths["texts"]["QA"], | ||
| text_pair_or_path=paths["text_pair"]["QA"], | ||
| labels_or_path=paths["labels"]["QA"], | ||
| batch_size=2, | ||
| train_evaluators=[evaluator], | ||
| val_evaluators=[evaluator]) | ||
| assert_evaluator_logs(lang_module, qa_objective, split) | ||
| def test_bleu(): | ||
@@ -137,1 +153,16 @@ from adaptor.evaluators.generative import BLEU | ||
| assert_classification_evaluator_logs(SequenceAccuracy(decides_convergence=False), "train") | ||
| def test_QA_exact_match(): | ||
| from adaptor.evaluators.question_answering import ExactMatch | ||
| assert_qa_evaluator_logs(ExactMatch(), "train") | ||
| def test_QA_fscore(): | ||
| from adaptor.evaluators.question_answering import F1ScoreForQA | ||
| assert_qa_evaluator_logs(F1ScoreForQA(), "train") | ||
| def test_QA_BLEU(): | ||
| from adaptor.evaluators.question_answering import BLEUForQA | ||
| assert_qa_evaluator_logs(BLEUForQA(), "train") |
@@ -8,2 +8,3 @@ from adaptor.lang_module import LangModule | ||
| from adaptor.objectives.objective_base import Objective | ||
| from adaptor.objectives.question_answering import ExtractiveQA | ||
| from adaptor.objectives.seq2seq import Sequence2Sequence | ||
@@ -127,1 +128,13 @@ from utils import paths, test_base_models | ||
| assert_module_objective_ok(lang_module, objective) | ||
| def test_supervised_QA_objective(): | ||
| lang_module = LangModule(test_base_models["extractive_QA"]) | ||
| objective = ExtractiveQA(lang_module, | ||
| texts_or_path=paths["texts"]["QA"], | ||
| text_pair_or_path=paths["text_pair"]["QA"], | ||
| labels_or_path=paths["labels"]["QA"], | ||
| batch_size=4) | ||
| assert_module_objective_ok(lang_module, objective) |
+9
-3
@@ -8,3 +8,4 @@ from adaptor.utils import AdaptationArguments, StoppingStrategy | ||
| "translation": "mock_data/seq2seq_sources.txt", | ||
| "unsup": "mock_data/domain_unsup.txt" | ||
| "unsup": "mock_data/domain_unsup.txt", | ||
| "QA": "mock_data/QA_questions.txt" | ||
| }, | ||
@@ -14,3 +15,7 @@ "labels": { | ||
| "classification": "mock_data/supervised_texts_sequence_labels.txt", | ||
| "translation": "mock_data/seq2seq_targets.txt" | ||
| "translation": "mock_data/seq2seq_targets.txt", | ||
| "QA": "mock_data/QA_answers.txt" | ||
| }, | ||
| "text_pair": { | ||
| "QA": "mock_data/QA_contexts.txt" | ||
| } | ||
@@ -26,3 +31,4 @@ } | ||
| "token_classification": "bert-base-multilingual-cased", | ||
| "sequence_classification": "bert-base-multilingual-cased" | ||
| "sequence_classification": "bert-base-multilingual-cased", | ||
| "extractive_QA": "Unbabel/xlm-roberta-comet-small" | ||
| } | ||
@@ -29,0 +35,0 @@ |
| from typing import List | ||
| import torch | ||
| from transformers import PreTrainedTokenizer | ||
| from .evaluator_base import EvaluatorBase | ||
| from ..utils import Head, AdaptationDataset | ||
| class Perplexity(EvaluatorBase): | ||
| compatible_heads: List[Head] = [Head.MLM, Head.CLM, Head.SEQ2SEQ] | ||
| def __call__(self, model: torch.nn.Module, tokenizer: PreTrainedTokenizer, dataset: AdaptationDataset) -> float: | ||
| raise NotImplementedError() |
Alert delta unavailable
Currently unable to show alert delta for PyPI packages.
323609
4.3%43
2.38%3272
11.14%