mindformers.dataset.KeyWordGenDataset

class mindformers.dataset.KeyWordGenDataset(dataset_config: dict = None)[源代码]

Keyword generation dataset.

实际案例

>>> from mindformers.dataset.dataloader.adgen_dataloader import ADGenDataLoader
>>> from mindformers.dataset import build_dataset
>>> from mindformers import MindFormerConfig
>>> cfg = MindFormerConfig("./configs/glm/run_glm_6b_finetune.yaml")
>>> dataset = build_dataset(cfg.eval_dataset_task)
>>> for item in dataset.create_dict_iterator():
>>>     print(item)
>>>     break
classmethod create_position_ids(input_ids, gmask_token_id=130001)[源代码]

generate position ids from input id

classmethod eval_dataset_function(prompt, answer)[源代码]

generates eval dataset

classmethod get_masks(input_ids, bos_token_id=130004)[源代码]

generate mask from input id

classmethod get_position_ids(input_ids, mask_positions, use_gmasks=None, bos_token_id=130004, position_encoding_2d=True)[源代码]

generate position ids from input id and mask positions

classmethod train_dataset_function(prompt, answer)[源代码]

generates train dataset