时间:2021-05-23
PyTorch 的 Vision 模块提供了图像变换的很多函数.
torchvision/transforms/functional.py
from __future__ import divisionimport torchimport sysimport mathfrom PIL import Image, ImageOps, ImageEnhance, PILLOW_VERSIONtry: import accimageexcept ImportError: accimage = Noneimport numpy as npimport numbersimport collectionsimport warningsimport matplotlib as pltif sys.version_info < (3, 3): Sequence = collections.Sequence Iterable = collections.Iterableelse: Sequence = collections.abc.Sequence Iterable = collections.abc.Iterable以下图为例:
img_file = "test.jpe"img = Image.open(img_file)width, height = img.size #(750, 815)img.show()将 PIL Image 或 nupy.ndarray 转换为 tensor
def to_tensor(pic): """ Args: pic (PIL Image or numpy.ndarray): Image to be converted to tensor. Returns: Tensor: Converted image. """ if not(_is_pil_image(pic) or _is_numpy_image(pic)): raise TypeError('pic should be PIL Image or ndarray. Got {}'.format(type(pic))) if isinstance(pic, np.ndarray): # handle numpy array img = torch.from_numpy(pic.transpose((2, 0, 1))) # backward compatibility if isinstance(img, torch.ByteTensor): return img.float().div(255) else: return img if accimage is not None and isinstance(pic, accimage.Image): nppic = np.zeros([pic.channels, pic.height, pic.width], dtype=np.float32) pic.copyto(nppic) return torch.from_numpy(nppic) # handle PIL Image if pic.mode == 'I': img = torch.from_numpy(np.array(pic, np.int32, copy=False)) elif pic.mode == 'I;16': img = torch.from_numpy(np.array(pic, np.int16, copy=False)) elif pic.mode == 'F': img = torch.from_numpy(np.array(pic, np.float32, copy=False)) elif pic.mode == '1': img = 255 * torch.from_numpy(np.array(pic, np.uint8, copy=False)) else: img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes())) # PIL image mode: L, P, I, F, RGB, YCbCr, RGBA, CMYK if pic.mode == 'YCbCr': nchannel = 3 elif pic.mode == 'I;16': nchannel = 1 else: nchannel = len(pic.mode) img = img.view(pic.size[1], pic.size[0], nchannel) # put it from HWC to CHW format # yikes, this transpose takes 80% of the loading time/CPU img = img.transpose(0, 1).transpose(0, 2).contiguous() if isinstance(img, torch.ByteTensor): return img.float().div(255) else: return img将 tensor 或 ndarray 转换为 PIL Image
def to_pil_image(pic, mode=None): """ Args: pic (Tensor or numpy.ndarray): Image to be converted to PIL Image. mode (`PIL.Image mode`_): color space and pixel depth of input data (optional). .. _PIL.Image mode: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#concept-modes Returns: PIL Image: Image converted to PIL Image. """ if not(isinstance(pic, torch.Tensor) or isinstance(pic, np.ndarray)): raise TypeError('pic should be Tensor or ndarray. Got {}.'.format(type(pic))) elif isinstance(pic, torch.Tensor): if pic.ndimension() not in {2, 3}: raise ValueError('pic should be 2/3 dimensional. Got {} '\ 'dimensions.'.format(pic.ndimension())) elif pic.ndimension() == 2: # if 2D image, add channel dimension (CHW) pic.unsqueeze_(0) elif isinstance(pic, np.ndarray): if pic.ndim not in {2, 3}: raise ValueError('pic should be 2/3 dimensional. Got {} '\ 'dimensions.'.format(pic.ndim)) elif pic.ndim == 2: # if 2D image, add channel dimension (HWC) pic = np.expand_dims(pic, 2) npimg = pic if isinstance(pic, torch.FloatTensor): pic = pic.mul(255).byte() if isinstance(pic, torch.Tensor): npimg = np.transpose(pic.numpy(), (1, 2, 0)) if not isinstance(npimg, np.ndarray): raise TypeError('Input pic must be a torch.Tensor or NumPy ndarray, ' + 'not {}'.format(type(npimg))) if npimg.shape[2] == 1: expected_mode = None npimg = npimg[:, :, 0] if npimg.dtype == np.uint8: expected_mode = 'L' elif npimg.dtype == np.int16: expected_mode = 'I;16' elif npimg.dtype == np.int32: expected_mode = 'I' elif npimg.dtype == np.float32: expected_mode = 'F' if mode is not None and mode != expected_mode: raise ValueError("Incorrect mode ({}) supplied for input type {}. Should be {}" .format(mode, np.dtype, expected_mode)) mode = expected_mode elif npimg.shape[2] == 4: permitted_4_channel_modes = ['RGBA', 'CMYK'] if mode is not None and mode not in permitted_4_channel_modes: raise ValueError("Only modes {} are supported for 4D inputs".format(permitted_4_channel_modes)) if mode is None and npimg.dtype == np.uint8: mode = 'RGBA' else: permitted_3_channel_modes = ['RGB', 'YCbCr', 'HSV'] if mode is not None and mode not in permitted_3_channel_modes: raise ValueError("Only modes {} are supported for 3D inputs".format(permitted_3_channel_modes)) if mode is None and npimg.dtype == np.uint8: mode = 'RGB' if mode is None: raise TypeError('Input type {} is not supported'.format(npimg.dtype)) return Image.fromarray(npimg, mode=mode)归一化 tensor 的图像. in-place 计算.
def normalize(tensor, mean, std): """ Args: tensor (Tensor): Tensor image of size (C, H, W) to be normalized. mean (sequence): Sequence of means for each channel. std (sequence): Sequence of standard deviations for each channely. Returns: Tensor: Normalized Tensor image. """ if not _is_tensor_image(tensor): raise TypeError('tensor is not a torch image.') # This is faster than using broadcasting, don't change without benchmarking for t, m, s in zip(tensor, mean, std): t.sub_(m).div_(s) return tensor# examplemean = [0.485, 0.456, 0.406]std = [0.229, 0.224, 0.225]img_normalize = normalize(img_tensor, mean, std)# visax1 = plt.subplot(1, 2, 1)ax1.imshow(img)ax1.axis("off")ax1.set_title("orig img")ax2 = plt.subplot(1, 2, 2)ax2.imshow(to_pil_image(img_normalize))ax2.axis("off")ax2.set_title("normalize img")plt.show()对输入的 PIL Image 进行 resize 到给定尺寸.
参数 size 为调整后的尺寸.
如果 size 是数组(h, w),则直接调整到该 (h, w) 尺寸.
如果 size 是一个 int 值,则调整后图像的最短边是该值,且保持固定的长宽比.
根据指定的 padding 模式和填充值,对给定的 PIL Image 的所有边进行 pad 处理.
参数 padding - int 或 tuple 形式.
padding:
参数 fill - 像素填充值,默认为 0. 如果值是长度为 3 的 tuple,则分别对 R,G,B 通道进行填充. 仅用于当 padding_mode='constant' 的情况.
参数 padding_mode - 填充的类型,可选:constant,edge,reflect,symmetric. 默认为 constant. 填充常数值.
constant - padding 填充常数值 fill.
edge - padding 图像边缘的最后一个值.
reflect - padding 图像的反射(reflection)值,(不对图像边缘的最后一个像素值进行重复)
如,[1, 2, 3, 4] 在 reflect 模式下在 两边 padding 2 个元素值,会得到:
[3, 2, 1, 2, 3, 4, 3, 2]
symmetric - padding 图像的反射(reflection)值,(对图像边缘的最后一个像素值进行重复).
如,[1, 2, 3, 4] 在 symmetric 模式下在 两边 padding 2 个元素值,会得到:
[2, 1, 1, 2, 3, 4, 4, 3]
裁剪给定的 PIL Image.
def crop(img, i, j, h, w): """ Args: img (PIL Image): Image to be cropped. i: Upper pixel coordinate. j: Left pixel coordinate. h: Height of the cropped image. w: Width of the cropped image. Returns: PIL Image: Cropped image. """ if not _is_pil_image(img): raise TypeError('img should be PIL Image. Got {}'.format(type(img))) return img.crop((j, i, j + w, i + h))# exampleimg_crop = crop(img, 100, 100, 500, 500) # (750, 815) -> (500, 500)ax1 = plt.subplot(1, 2, 1)ax1.imshow(img)ax1.axis("off")ax1.set_title("orig img")ax2 = plt.subplot(1, 2, 2)ax2.imshow(img_crop)ax2.axis("off")ax2.set_title("crop img")plt.show()对给定 PIL Image 进行裁剪,并 resize 到特定尺寸.
def resized_crop(img, i, j, h, w, size, interpolation=Image.BILINEAR): """ Args: img (PIL Image): Image to be cropped. i: Upper pixel coordinate. j: Left pixel coordinate. h: Height of the cropped image. w: Width of the cropped image. size (sequence or int): Desired output size. Same semantics as ``resize``. interpolation (int, optional): Desired interpolation. Default is ``PIL.Image.BILINEAR``. Returns: PIL Image: Cropped image. """ assert _is_pil_image(img), 'img should be PIL Image' img = crop(img, i, j, h, w) img = resize(img, size, interpolation) return img# exampleimg_resizedcrop = resized_crop(img, 100, 100, 500, 500, (256, 256)) # (750, 815) -> (500, 500) -> (256, 256)ax1 = plt.subplot(1, 2, 1)ax1.imshow(img)ax1.axis("off")ax1.set_title("orig img")ax2 = plt.subplot(1, 2, 2)ax2.imshow(img_resizedcrop)ax2.axis("off")ax2.set_title("resizedcrop img")plt.show()水平翻转 (Horizontally flip) 给定的 PIL Image.
def hflip(img): """ Args: img (PIL Image): Image to be flipped. Returns: PIL Image: Horizontall flipped image. """ if not _is_pil_image(img): raise TypeError('img should be PIL Image. Got {}'.format(type(img))) return img.transpose(Image.FLIP_LEFT_RIGHT)垂直翻转 (Vertically flip) 给定的 PIL Image.
def vflip(img): """ Args: img (PIL Image): Image to be flipped. Returns: PIL Image: Vertically flipped image. """ if not _is_pil_image(img): raise TypeError('img should be PIL Image. Got {}'.format(type(img))) return img.transpose(Image.FLIP_TOP_BOTTOM)# example:img_hflip = hflip(img)img_vflip = vflip(img)ax1 = plt.subplot(1, 3, 1)ax1.imshow(img)ax1.axis("off")ax1.set_title("orig img")ax2 = plt.subplot(1, 3, 2)ax2.imshow(img_hflip)ax2.axis("off")ax2.set_title("hflip img")ax3 = plt.subplot(1, 3, 3)ax3.imshow(img_vflip)ax3.axis("off")ax3.set_title("vflip img")plt.show()Crop the given PIL Image into four corners and the central crop.
从给定 PIL Image 的四个角和中间裁剪出五个子图像.
将给定 PIL Image 裁剪出的四个角和中间部分的五个子图像,每个子图像进行翻转处理. 默认时水平翻转.
def ten_crop(img, size, vertical_flip=False): """ Args: size (sequence or int): Desired output size of the crop. If size is an int instead of sequence like (h, w), a square crop (size, size) is made. vertical_flip (bool): Use vertical flipping instead of horizontal Returns: tuple: tuple (tl, tr, bl, br, center, tl_flip, tr_flip, bl_flip, br_flip, center_flip) Corresponding top left, top right, bottom left, bottom right and center crop and same for the flipped image. """ if isinstance(size, numbers.Number): size = (int(size), int(size)) else: assert len(size) == 2, "Please provide only two dimensions (h, w) for size." first_five = five_crop(img, size) if vertical_flip: img = vflip(img) else: img = hflip(img) second_five = five_crop(img, size) return first_five + second_five调整对比度.
def adjust_contrast(img, contrast_factor): """ Args: img (PIL Image): PIL Image to be adjusted. contrast_factor (float): How much to adjust the contrast. Can be any non negative number. 0 gives a solid gray image, 1 gives the original image, 2 increases the contrast by a factor of 2. Returns: PIL Image: Contrast adjusted image. """ if not _is_pil_image(img): raise TypeError('img should be PIL Image. Got {}'.format(type(img))) enhancer = ImageEnhance.Contrast(img) img = enhancer.enhance(contrast_factor) return img# example:img_adjust_contrast = adjust_contrast(img, 2.5)# visax1 = plt.subplot(1, 2, 1)ax1.imshow(img)ax1.axis("off")ax1.set_title("orig img")ax2 = plt.subplot(1, 2, 2)ax2.imshow(img_adjust_contrast)ax2.axis("off")ax2.set_title("adjust_contrast img")plt.show()调整颜色饱和度.
def adjust_saturation(img, saturation_factor): """ Args: img (PIL Image): PIL Image to be adjusted. saturation_factor (float): How much to adjust the saturation. 0 will give a black and white image, 1 will give the original image while 2 will enhance the saturation by a factor of 2. Returns: PIL Image: Saturation adjusted image. """ if not _is_pil_image(img): raise TypeError('img should be PIL Image. Got {}'.format(type(img))) enhancer = ImageEnhance.Color(img) img = enhancer.enhance(saturation_factor) return img# exampleimg_adjust_saturation = adjust_saturation(img, 2.5)# visax1 = plt.subplot(1, 2, 1)ax1.imshow(img)ax1.axis("off")ax1.set_title("orig img")ax2 = plt.subplot(1, 2, 2)ax2.imshow(img_adjust_saturation)ax2.axis("off")ax2.set_title("adjust_saturation img")plt.show()调整图像 HUE.
通过将图像转换为 HSV 空间,并周期地移动在 hue 通道(H) 的强度,以实现图像 hue 的调整.
最后,再将结果转换回原始的图像模式.参数 hue_factor - H 通道平移的因子,其值必须在区间 [-0.5, 0.5].
def adjust_hue(img, hue_factor): """ Args: img (PIL Image): PIL Image to be adjusted. hue_factor (float): How much to shift the hue channel. Should be in [-0.5, 0.5]. 0.5 and -0.5 give complete reversal of hue channel in HSV space in positive and negative direction respectively. 0 means no shift. Therefore, both -0.5 and 0.5 will give an image with complementary colors while 0 gives the original image. Returns: PIL Image: Hue adjusted image. """ if not(-0.5 <= hue_factor <= 0.5): raise ValueError('hue_factor is not in [-0.5, 0.5].'.format(hue_factor)) if not _is_pil_image(img): raise TypeError('img should be PIL Image. Got {}'.format(type(img))) input_mode = img.mode if input_mode in {'L', '1', 'I', 'F'}: return img h, s, v = img.convert('HSV').split() np_h = np.array(h, dtype=np.uint8) # uint8 addition take cares of rotation across boundaries with np.errstate(over='ignore'): np_h += np.uint8(hue_factor * 255) h = Image.fromarray(np_h, 'L') img = Image.merge('HSV', (h, s, v)).convert(input_mode) return img# example:img_adjust_hue = adjust_hue(img, 0.5)# visax1 = plt.subplot(1, 2, 1)ax1.imshow(img)ax1.axis("off")ax1.set_title("orig img")ax2 = plt.subplot(1, 2, 2)ax2.imshow(img_adjust_hue)ax2.axis("off")ax2.set_title("adjust_hue img")plt.show()对图像进行伽马校正(gamma correction). 也被叫作 Power Law Transform.
def adjust_gamma(img, gamma, gain=1): """ Args: img (PIL Image): PIL Image to be adjusted. gamma (float): Non negative real number, 如公式中的 \gamma 值. gamma larger than 1 make the shadows darker, while gamma smaller than 1 make dark regions lighter. gain (float): The constant multiplier. """ if not _is_pil_image(img): raise TypeError('img should be PIL Image. Got {}'.format(type(img))) if gamma < 0: raise ValueError('Gamma should be a non-negative real number') input_mode = img.mode img = img.convert('RGB') gamma_map = [255 * gain * pow(ele / 255., gamma) for ele in range(256)] * 3 img = img.point(gamma_map) # use PIL's point-function to accelerate this part img = img.convert(input_mode) return img# example:img_adjust_gamma = adjust_gamma(img, 0.5)# visax1 = plt.subplot(1, 2, 1)ax1.imshow(img)ax1.axis("off")ax1.set_title("orig img")ax2 = plt.subplot(1, 2, 2)ax2.imshow(img_adjust_gamma)ax2.axis("off")ax2.set_title("adjust_gamma img")plt.show()旋转图像.
参数 resample
可选值:PIL.Image.NEAREST, PIL.Image.BILINEAR, PIL.Image.BICUBIC.
如果参数 resample 被忽略,或图像的模式是 1 或 P,则resample=PIL.Image.NEAREST.
参数 expand
如果 expand=True,则延展输出图像,以能包含旋转后的全部图像.
如果 expand=False 或被忽略,则保持输出图像与输入图像的尺寸一致.
expand 假设旋转是以中心进行旋转,且没有平移.
保持图像中心不变,进行仿射变换.
def _get_inverse_affine_matrix(center, angle, translate, scale, shear): # Helper method to compute inverse matrix for affine transformation # As it is explained in PIL.Image.rotate # We need compute INVERSE of affine transformation matrix: M = T * C * RSS * C^-1 # where T is translation matrix: [1, 0, tx | 0, 1, ty | 0, 0, 1] # C is translation matrix to keep center: [1, 0, cx | 0, 1, cy | 0, 0, 1] # RSS is rotation with scale and shear matrix # RSS(a, scale, shear) = [ cos(a)*scale -sin(a + shear)*scale 0] # [ sin(a)*scale cos(a + shear)*scale 0] # [ 0 0 1] # Thus, the inverse is M^-1 = C * RSS^-1 * C^-1 * T^-1 angle = math.radians(angle) shear = math.radians(shear) scale = 1.0 / scale # Inverted rotation matrix with scale and shear d = math.cos(angle + shear) * math.cos(angle) + math.sin(angle + shear) * math.sin(angle) matrix = [ math.cos(angle + shear), math.sin(angle + shear), 0, -math.sin(angle), math.cos(angle), 0 ] matrix = [scale / d * m for m in matrix] # Apply inverse of translation and of center translation: RSS^-1 * C^-1 * T^-1 matrix[2] += matrix[0] * (-center[0] - translate[0]) + matrix[1] * (-center[1] - translate[1]) matrix[5] += matrix[3] * (-center[0] - translate[0]) + matrix[4] * (-center[1] - translate[1]) # Apply center translation: C * RSS^-1 * C^-1 * T^-1 matrix[2] += center[0] matrix[5] += center[1] return matrixdef affine(img, angle, translate, scale, shear, resample=0, fillcolor=None): """ Args: img (PIL Image): PIL Image to be rotated. angle (float or int): rotation angle in degrees between -180 and 180, clockwise direction. translate (list or tuple of integers): horizontal and vertical translations (post-rotation translation) scale (float): overall scale shear (float): shear angle value in degrees between -180 to 180, clockwise direction. resample (``PIL.Image.NEAREST`` or ``PIL.Image.BILINEAR`` or ``PIL.Image.BICUBIC``, optional): fillcolor (int): Optional fill color for the area outside the transform in the output image. (Pillow>=5.0.0) """ if not _is_pil_image(img): raise TypeError('img should be PIL Image. Got {}'.format(type(img))) assert isinstance(translate, (tuple, list)) and len(translate) == 2, \ "Argument translate should be a list or tuple of length 2" assert scale > 0.0, "Argument scale should be positive" output_size = img.size center = (img.size[0] * 0.5 + 0.5, img.size[1] * 0.5 + 0.5) matrix = _get_inverse_affine_matrix(center, angle, translate, scale, shear) kwargs = {"fillcolor": fillcolor} if PILLOW_VERSION[0] == '5' else {} return img.transform(output_size, Image.AFFINE, matrix, resample, **kwargs)将图像转换为灰度图.
def to_grayscale(img, num_output_channels=1): """ Args: img (PIL Image): Image to be converted to grayscale. Returns: PIL Image: Grayscale version of the image. if num_output_channels = 1 : returned image is single channel if num_output_channels = 3 : returned image is 3 channel with r = g = b """ if not _is_pil_image(img): raise TypeError('img should be PIL Image. Got {}'.format(type(img))) if num_output_channels == 1: img = img.convert('L') elif num_output_channels == 3: img = img.convert('L') np_img = np.array(img, dtype=np.uint8) np_img = np.dstack([np_img, np_img, np_img]) img = Image.fromarray(np_img, 'RGB') else: raise ValueError('num_output_channels should be either 1 or 3') return imghttps://www.aiuai.cn/aifarm759.html
到此这篇关于Pytorch 图像变换函数集合小结的文章就介绍到这了,更多相关Pytorch 图像变换函数内容请搜索以前的文章或继续浏览下面的相关文章希望大家以后多多支持!
声明:本页内容来源网络,仅供用户参考;我单位不保证亦不表示资料全面及准确无误,也不保证亦不表示这些资料为最新信息,如因任何原因,本网内容或者用户因倚赖本网内容造成任何损失或损害,我单位将不会负任何法律责任。如涉及版权问题,请提交至online#300.cn邮箱联系删除。
pytorch中的transforms模块中包含了很多种对图像数据进行变换的函数,这些都是在我们进行图像数据读入步骤中必不可少的,下面我们讲解几种最常用的函数,
图像处理图像变换就是找到一个函数,把原始图像矩阵经过函数处理后,转换为目标图像矩阵. 可以分为两种方式,即像素级别的变换和区域级别的变换Pointoperat
离散余弦变换/Discretecosinetransform,根据离散傅里叶变换的性质,实偶函数的傅里叶变换只含实的余弦项,而数字图像都是实数矩阵,因此构造了一
计算机视觉方面朋友都需要跟图像打交道,在pytorch中图像与我们平时在matlab中见到的图像数据格式有所不同。matlab中我们通常使用函数imread()
本文实例讲述了C#图像线性变换的方法。分享给大家供大家参考。具体如下://定义图像线性运算函数(y=kx+v)privatestaticBitmapLinear