mindformers.pipeline.zero_shot_image_classification_pipeline 源代码

# Copyright 2022 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

"""ZeroShotImageClassificationPipeline"""
from typing import Union, Optional

from mindspore import Model
from mindspore.ops import operations as P

from mindformers.mindformer_book import MindFormerBook
from mindformers.tools.image_tools import load_image
from mindformers.models import BaseModel, BaseImageProcessor, Tokenizer
from mindformers.tools.register import MindFormerRegister, MindFormerModuleType
from mindformers.auto_class import AutoProcessor, AutoModel
from .base_pipeline import BasePipeline
from ..models import BaseTokenizer


[文档]@MindFormerRegister.register(MindFormerModuleType.PIPELINE, alias="zero_shot_image_classification") class ZeroShotImageClassificationPipeline(BasePipeline): r"""Pipeline For Zero Shot Image Classification Args: model (Union[str, BaseModel]): The model used to perform task, the input could be a supported model name, or a model instance inherited from BaseModel. tokenizer (Optional[BaseTokenizer]): A tokenizer for text processing. image_processor (Optional[BaseImageProcessor]): The image_processor of model, it could be None if the model do not need image_processor. Raises: TypeError: If input model, tokenizer, and image_processor's types are not corrected. ValueError: if the input model is not in support list. Examples: >>> from mindformers.tools.image_tools import load_image >>> from mindformers.pipeline import ZeroShotImageClassificationPipeline >>> classifier = ZeroShotImageClassificationPipeline( ... model='clip_vit_b_32', ... candidate_labels=["sunflower", "tree", "dog", "cat", "toy"], ... hypothesis_template="This is a photo of {}." ... ) >>> img = load_image("https://ascend-repo-modelzoo.obs.cn-east-2." ... "myhuaweicloud.com/XFormer_for_mindspore/clip/sunflower.png") >>> classifier(img) [[{'score': 0.99995565, 'label': 'sunflower'}, {'score': 2.5318595e-05, 'label': 'toy'}, {'score': 9.903885e-06, 'label': 'dog'}, {'score': 6.75336e-06, 'label': 'tree'}, {'score': 2.396818e-06, 'label': 'cat'}]] """ _support_list = MindFormerBook.get_pipeline_support_task_list()['zero_shot_image_classification'].keys() def __init__(self, model: Union[str, BaseModel, Model], tokenizer: Optional[BaseTokenizer] = None, image_processor: Optional[BaseImageProcessor] = None, **kwargs): if isinstance(model, str): if model in self._support_list: if image_processor is None: image_processor = AutoProcessor.from_pretrained(model).image_processor if not isinstance(image_processor, BaseImageProcessor): raise TypeError(f"image_processor should be inherited from" f" BaseImageProcessor, but got {type(image_processor)}.") if tokenizer is None: tokenizer = AutoProcessor.from_pretrained(model).tokenizer if not isinstance(tokenizer, Tokenizer): raise TypeError(f"tokenizer should be inherited from" f" PretrainedTokenizer, but got {type(tokenizer)}.") model = AutoModel.from_pretrained(model) else: raise ValueError(f"{model} is not supported by ZeroShotImageClassificationPipeline," f"please selected from {self._support_list}.") if not isinstance(model, (BaseModel, Model)): raise TypeError(f"model should be inherited from BaseModel or Model, but got type {type(model)}.") if tokenizer is None: raise ValueError("ZeroShotImageClassificationPipeline" " requires for a tokenizer.") if image_processor is None: raise ValueError("ZeroShotImageClassificationPipeline" " requires for a image_processor.") super().__init__(model, tokenizer, image_processor, **kwargs) def _sanitize_parameters(self, **pipeline_parameters): r"""Sanitize Parameters Args: pipeline_parameters (Optional[dict]): The parameter dict to be parsed. """ preprocess_params = {} postprocess_params = {} pre_list = ["candidate_labels", "hypothesis_template", "max_length", "padding", "return_tensors"] for item in pre_list: if item in pipeline_parameters: preprocess_params[item] = pipeline_parameters.get(item) if "top_k" in pipeline_parameters: postprocess_params["top_k"] = pipeline_parameters.get("top_k") return preprocess_params, {}, postprocess_params
[文档] def preprocess(self, inputs: dict, **preprocess_params): r"""Preprocess of ZeroShotImageClassificationPipeline Args: inputs (Union[url, PIL.Image, tensor, numpy]): The image to be classified. candidate_labels (List[str]): The candidate labels for classification. hypothesis_template (Optional[str]): Prompt for text input. max_length (Optional[int]): Max length of tokenizer's output padding (Optional[Union[False, "max_length"]]): Padding for max_length return_tensors (Optional["ms"]): The type of returned tensors Return: Processed data. Raises: ValueError: If candidate_labels or hypothesis_template is None. """ candidate_labels = preprocess_params.pop("candidate_labels", None) hypothesis_template = preprocess_params.pop("hypothesis_template", "a picture of {}.") max_length = preprocess_params.pop("max_length", 77) padding = preprocess_params.pop("padding", "max_length") return_tensors = preprocess_params.pop("return_tensors", "ms") if candidate_labels is None: raise ValueError("candidate_labels are supposed for" " ZeroShotImageClassificationPipeline, but got None.") if hypothesis_template is None: raise ValueError("hypothesis_template is supposed for" " ZeroShotImageClassificationPipeline, but got None.") if isinstance(inputs, dict): inputs = inputs['image'] if isinstance(inputs, str): inputs = load_image(inputs) image_processed = self.image_processor(inputs) sentences = [hypothesis_template.format(candidate_label) for candidate_label in candidate_labels] input_ids = self.tokenizer(sentences, max_length=max_length, padding=padding, return_tensors=return_tensors)["input_ids"] return {"image_processed": image_processed, "input_ids": input_ids, "candidate_labels": candidate_labels}
[文档] def forward(self, model_inputs: dict, **forward_params): r"""Forward process Args: model_inputs (dict): Outputs of preprocess. Return: Probs dict. """ forward_params.pop("None", None) image_processed = model_inputs["image_processed"] input_ids = model_inputs["input_ids"] logits_per_image, _ = self.network(image_processed, input_ids) probs = P.Softmax()(logits_per_image).asnumpy() return {"probs": probs, "candidate_labels": model_inputs["candidate_labels"]}
[文档] def postprocess(self, model_outputs: dict, **postprocess_params): r"""Postprocess Args: model_outputs (dict): Outputs of forward process. top_k (int): Return top_k probs of result Return: Classification results. """ top_k = postprocess_params.pop("top_k", None) labels = model_outputs['candidate_labels'] scores = model_outputs['probs'] outputs = [] for score in scores: sorted_res = sorted(zip(score, labels), key=lambda x: -x[0]) if top_k is not None: sorted_res = sorted_res[:min(top_k, len(labels))] outputs.append([{"score": score_item, "label": label} for score_item, label in sorted_res]) return outputs