tfjs-core/src/ops/browser.ts

/**
 * @license
 * Copyright 2019 Google LLC. All Rights Reserved.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * =============================================================================
 */

import {ENGINE} from '../engine';
import {env} from '../environment';
import {Draw, DrawAttrs, DrawInputs, FromPixels, FromPixelsAttrs, FromPixelsInputs} from '../kernel_names';
import {getKernel, NamedAttrMap} from '../kernel_registry';
import {Tensor, Tensor2D, Tensor3D} from '../tensor';
import {NamedTensorMap} from '../tensor_types';
import {convertToTensor} from '../tensor_util_env';
import {DrawOptions, ImageOptions, PixelData, TensorLike} from '../types';

import {cast} from './cast';
import {op} from './operation';
import {tensor3d} from './tensor3d';

let fromPixels2DContext: CanvasRenderingContext2D;
let hasToPixelsWarned = false;

/**
 * Creates a `tf.Tensor` from an image.
 *
 * ```js
 * const image = new ImageData(1, 1);
 * image.data[0] = 100;
 * image.data[1] = 150;
 * image.data[2] = 200;
 * image.data[3] = 255;
 *
 * tf.browser.fromPixels(image).print();
 * ```
 *
 * @param pixels The input image to construct the tensor from. The
 * supported image types are all 4-channel. You can also pass in an image
 * object with following attributes:
 * `{data: Uint8Array; width: number; height: number}`
 * @param numChannels The number of channels of the output tensor. A
 * numChannels value less than 4 allows you to ignore channels. Defaults to
 * 3 (ignores alpha channel of input image).
 *
 * @returns A Tensor3D with the shape `[height, width, numChannels]`.
 *
 * Note: fromPixels can be lossy in some cases, same image may result in
 * slightly different tensor values, if rendered by different rendering
 * engines. This means that results from different browsers, or even same
 * browser with CPU and GPU rendering engines can be different. See discussion
 * in details:
 * https://github.com/tensorflow/tfjs/issues/5482
 *
 * @doc {heading: 'Browser', namespace: 'browser', ignoreCI: true}
 */
function fromPixels_(
    pixels: PixelData|ImageData|HTMLImageElement|HTMLCanvasElement|
    HTMLVideoElement|ImageBitmap,
    numChannels = 3): Tensor3D {
  // Sanity checks.
  if (numChannels > 4) {
    throw new Error(
        'Cannot construct Tensor with more than 4 channels from pixels.');
  }
  if (pixels == null) {
    throw new Error('pixels passed to tf.browser.fromPixels() can not be null');
  }
  let isPixelData = false;
  let isImageData = false;
  let isVideo = false;
  let isImage = false;
  let isCanvasLike = false;
  let isImageBitmap = false;
  if ((pixels as PixelData).data instanceof Uint8Array) {
    isPixelData = true;
  } else if (
      typeof (ImageData) !== 'undefined' && pixels instanceof ImageData) {
    isImageData = true;
  } else if (
      typeof (HTMLVideoElement) !== 'undefined' &&
      pixels instanceof HTMLVideoElement) {
    isVideo = true;
  } else if (
      typeof (HTMLImageElement) !== 'undefined' &&
      pixels instanceof HTMLImageElement) {
    isImage = true;
    // tslint:disable-next-line: no-any
  } else if ((pixels as any).getContext != null) {
    isCanvasLike = true;
  } else if (
      typeof (ImageBitmap) !== 'undefined' && pixels instanceof ImageBitmap) {
    isImageBitmap = true;
  } else {
    throw new Error(
        'pixels passed to tf.browser.fromPixels() must be either an ' +
        `HTMLVideoElement, HTMLImageElement, HTMLCanvasElement, ImageData ` +
        `in browser, or OffscreenCanvas, ImageData in webworker` +
        ` or {data: Uint32Array, width: number, height: number}, ` +
        `but was ${(pixels as {}).constructor.name}`);
  }
  // If the current backend has 'FromPixels' registered, it has a more
  // efficient way of handling pixel uploads, so we call that.
  const kernel = getKernel(FromPixels, ENGINE.backendName);
  if (kernel != null) {
    const inputs: FromPixelsInputs = {pixels};
    const attrs: FromPixelsAttrs = {numChannels};
    return ENGINE.runKernel(
        FromPixels, inputs as unknown as NamedTensorMap,
        attrs as unknown as NamedAttrMap);
  }

  const [width, height] = isVideo ?
      [
        (pixels as HTMLVideoElement).videoWidth,
        (pixels as HTMLVideoElement).videoHeight
      ] :
      [pixels.width, pixels.height];
  let vals: Uint8ClampedArray|Uint8Array;

  if (isCanvasLike) {
    vals =
        // tslint:disable-next-line:no-any
        (pixels as any).getContext('2d').getImageData(0, 0, width, height).data;
  } else if (isImageData || isPixelData) {
    vals = (pixels as PixelData | ImageData).data;
  } else if (isImage || isVideo || isImageBitmap) {
    if (fromPixels2DContext == null) {
      if (typeof document === 'undefined') {
        if (typeof OffscreenCanvas !== 'undefined' &&
            typeof OffscreenCanvasRenderingContext2D !== 'undefined') {
          // @ts-ignore
          fromPixels2DContext = new OffscreenCanvas(1, 1).getContext('2d');
        } else {
          throw new Error(
              'Cannot parse input in current context. ' +
              'Reason: OffscreenCanvas Context2D rendering is not supported.');
        }
      } else {
        fromPixels2DContext = document.createElement('canvas').getContext(
            '2d', {willReadFrequently: true});
      }
    }
    fromPixels2DContext.canvas.width = width;
    fromPixels2DContext.canvas.height = height;
    fromPixels2DContext.drawImage(
        pixels as HTMLVideoElement, 0, 0, width, height);
    vals = fromPixels2DContext.getImageData(0, 0, width, height).data;
  }
  let values: Int32Array;
  if (numChannels === 4) {
    values = new Int32Array(vals);
  } else {
    const numPixels = width * height;
    values = new Int32Array(numPixels * numChannels);
    for (let i = 0; i < numPixels; i++) {
      for (let channel = 0; channel < numChannels; ++channel) {
        values[i * numChannels + channel] = vals[i * 4 + channel];
      }
    }
  }
  const outShape: [number, number, number] = [height, width, numChannels];
  return tensor3d(values, outShape, 'int32');
}

// Helper functions for |fromPixelsAsync| to check whether the input can
// be wrapped into imageBitmap.
function isPixelData(pixels: PixelData|ImageData|HTMLImageElement|
                     HTMLCanvasElement|HTMLVideoElement|
                     ImageBitmap): pixels is PixelData {
  return (pixels != null) && ((pixels as PixelData).data instanceof Uint8Array);
}

function isImageBitmapFullySupported() {
  return typeof window !== 'undefined' &&
      typeof (ImageBitmap) !== 'undefined' &&
      window.hasOwnProperty('createImageBitmap');
}

function isNonEmptyPixels(pixels: PixelData|ImageData|HTMLImageElement|
                          HTMLCanvasElement|HTMLVideoElement|ImageBitmap) {
  return pixels != null && pixels.width !== 0 && pixels.height !== 0;
}

function canWrapPixelsToImageBitmap(pixels: PixelData|ImageData|
                                    HTMLImageElement|HTMLCanvasElement|
                                    HTMLVideoElement|ImageBitmap) {
  return isImageBitmapFullySupported() && !(pixels instanceof ImageBitmap) &&
      isNonEmptyPixels(pixels) && !isPixelData(pixels);
}

/**
 * Creates a `tf.Tensor` from an image in async way.
 *
 * ```js
 * const image = new ImageData(1, 1);
 * image.data[0] = 100;
 * image.data[1] = 150;
 * image.data[2] = 200;
 * image.data[3] = 255;
 *
 * (await tf.browser.fromPixelsAsync(image)).print();
 * ```
 * This API is the async version of fromPixels. The API will first
 * check |WRAP_TO_IMAGEBITMAP| flag, and try to wrap the input to
 * imageBitmap if the flag is set to true.
 *
 * @param pixels The input image to construct the tensor from. The
 * supported image types are all 4-channel. You can also pass in an image
 * object with following attributes:
 * `{data: Uint8Array; width: number; height: number}`
 * @param numChannels The number of channels of the output tensor. A
 * numChannels value less than 4 allows you to ignore channels. Defaults to
 * 3 (ignores alpha channel of input image).
 *
 * @doc {heading: 'Browser', namespace: 'browser', ignoreCI: true}
 */
export async function fromPixelsAsync(
    pixels: PixelData|ImageData|HTMLImageElement|HTMLCanvasElement|
    HTMLVideoElement|ImageBitmap,
    numChannels = 3) {
  let inputs: PixelData|ImageData|HTMLImageElement|HTMLCanvasElement|
      HTMLVideoElement|ImageBitmap = null;

  // Check whether the backend needs to wrap |pixels| to imageBitmap and
  // whether |pixels| can be wrapped to imageBitmap.
  if (env().getBool('WRAP_TO_IMAGEBITMAP') &&
      canWrapPixelsToImageBitmap(pixels)) {
    // Force the imageBitmap creation to not do any premultiply alpha
    // ops.
    let imageBitmap;

    try {
      // wrap in try-catch block, because createImageBitmap may not work
      // properly in some browsers, e.g.
      // https://bugzilla.mozilla.org/show_bug.cgi?id=1335594
      // tslint:disable-next-line: no-any
      imageBitmap = await (createImageBitmap as any)(
          pixels as ImageBitmapSource, {premultiplyAlpha: 'none'});
    } catch (e) {
      imageBitmap = null;
    }

    // createImageBitmap will clip the source size.
    // In some cases, the input will have larger size than its content.
    // E.g. new Image(10, 10) but with 1 x 1 content. Using
    // createImageBitmap will clip the size from 10 x 10 to 1 x 1, which
    // is not correct. We should avoid wrapping such resouce to
    // imageBitmap.
    if (imageBitmap != null && imageBitmap.width === pixels.width &&
        imageBitmap.height === pixels.height) {
      inputs = imageBitmap;
    } else {
      inputs = pixels;
    }
  } else {
    inputs = pixels;
  }

  return fromPixels_(inputs, numChannels);
}

function validateImgTensor(img: Tensor2D|Tensor3D) {
  if (img.rank !== 2 && img.rank !== 3) {
    throw new Error(
        `toPixels only supports rank 2 or 3 tensors, got rank ${img.rank}.`);
  }
  const depth = img.rank === 2 ? 1 : img.shape[2];

  if (depth > 4 || depth === 2) {
    throw new Error(
        `toPixels only supports depth of size ` +
        `1, 3 or 4 but got ${depth}`);
  }

  if (img.dtype !== 'float32' && img.dtype !== 'int32') {
    throw new Error(
        `Unsupported type for toPixels: ${img.dtype}.` +
        ` Please use float32 or int32 tensors.`);
  }
}

function validateImageOptions(imageOptions: ImageOptions) {
  const alpha = imageOptions ?.alpha || 1;
  if (alpha > 1 || alpha < 0) {
    throw new Error(`Alpha value ${alpha} is suppoed to be in range [0 - 1].`);
  }
}

/**
 * Draws a `tf.Tensor` of pixel values to a byte array or optionally a
 * canvas.
 *
 * When the dtype of the input is 'float32', we assume values in the range
 * [0-1]. Otherwise, when input is 'int32', we assume values in the range
 * [0-255].
 *
 * Returns a promise that resolves when the canvas has been drawn to.
 *
 * @param img A rank-2 tensor with shape `[height, width]`, or a rank-3 tensor
 * of shape `[height, width, numChannels]`. If rank-2, draws grayscale. If
 * rank-3, must have depth of 1, 3 or 4. When depth of 1, draws
 * grayscale. When depth of 3, we draw with the first three components of
 * the depth dimension corresponding to r, g, b and alpha = 1. When depth of
 * 4, all four components of the depth dimension correspond to r, g, b, a.
 * @param canvas The canvas to draw to.
 *
 * @doc {heading: 'Browser', namespace: 'browser'}
 */
export async function toPixels(
    img: Tensor2D|Tensor3D|TensorLike,
    canvas?: HTMLCanvasElement): Promise<Uint8ClampedArray> {
  let $img = convertToTensor(img, 'img', 'toPixels');
  if (!(img instanceof Tensor)) {
    // Assume int32 if user passed a native array.
    const originalImgTensor = $img;
    $img = cast(originalImgTensor, 'int32');
    originalImgTensor.dispose();
  }
  validateImgTensor($img);

  const [height, width] = $img.shape.slice(0, 2);
  const depth = $img.rank === 2 ? 1 : $img.shape[2];
  const data = await $img.data();
  const multiplier = $img.dtype === 'float32' ? 255 : 1;
  const bytes = new Uint8ClampedArray(width * height * 4);

  for (let i = 0; i < height * width; ++i) {
    const rgba = [0, 0, 0, 255];

    for (let d = 0; d < depth; d++) {
      const value = data[i * depth + d];

      if ($img.dtype === 'float32') {
        if (value < 0 || value > 1) {
          throw new Error(
              `Tensor values for a float32 Tensor must be in the ` +
              `range [0 - 1] but encountered ${value}.`);
        }
      } else if ($img.dtype === 'int32') {
        if (value < 0 || value > 255) {
          throw new Error(
              `Tensor values for a int32 Tensor must be in the ` +
              `range [0 - 255] but encountered ${value}.`);
        }
      }

      if (depth === 1) {
        rgba[0] = value * multiplier;
        rgba[1] = value * multiplier;
        rgba[2] = value * multiplier;
      } else {
        rgba[d] = value * multiplier;
      }
    }

    const j = i * 4;
    bytes[j + 0] = Math.round(rgba[0]);
    bytes[j + 1] = Math.round(rgba[1]);
    bytes[j + 2] = Math.round(rgba[2]);
    bytes[j + 3] = Math.round(rgba[3]);
  }

  if (canvas != null) {
    if (!hasToPixelsWarned) {
      const kernel = getKernel(Draw, ENGINE.backendName);
      if (kernel != null) {
        console.warn(
            'tf.browser.toPixels is not efficient to draw tensor on canvas. ' +
            'Please try tf.browser.draw instead.');
        hasToPixelsWarned = true;
      }
    }

    canvas.width = width;
    canvas.height = height;
    const ctx = canvas.getContext('2d');
    const imageData = new ImageData(bytes, width, height);
    ctx.putImageData(imageData, 0, 0);
  }
  if ($img !== img) {
    $img.dispose();
  }
  return bytes;
}

/**
 * Draws a `tf.Tensor` to a canvas.
 *
 * When the dtype of the input is 'float32', we assume values in the range
 * [0-1]. Otherwise, when input is 'int32', we assume values in the range
 * [0-255].
 *
 * @param image The tensor to draw on the canvas. Must match one of
 * these shapes:
 *   - Rank-2 with shape `[height, width`]: Drawn as grayscale.
 *   - Rank-3 with shape `[height, width, 1]`: Drawn as grayscale.
 *   - Rank-3 with shape `[height, width, 3]`: Drawn as RGB with alpha set in
 *     `imageOptions` (defaults to 1, which is opaque).
 *   - Rank-3 with shape `[height, width, 4]`: Drawn as RGBA.
 * @param canvas The canvas to draw to.
 * @param options The configuration arguments for image to be drawn and the
 *     canvas to draw to.
 *
 * @doc {heading: 'Browser', namespace: 'browser'}
 */
export function draw(
    image: Tensor2D|Tensor3D|TensorLike, canvas: HTMLCanvasElement,
    options?: DrawOptions): void {
  let $img = convertToTensor(image, 'img', 'draw');
  if (!(image instanceof Tensor)) {
    // Assume int32 if user passed a native array.
    const originalImgTensor = $img;
    $img = cast(originalImgTensor, 'int32');
    originalImgTensor.dispose();
  }
  validateImgTensor($img);
  validateImageOptions(options?.imageOptions);

  const inputs: DrawInputs = {image: $img};
  const attrs: DrawAttrs = {canvas, options};
  ENGINE.runKernel(
      Draw, inputs as unknown as NamedTensorMap,
      attrs as unknown as NamedAttrMap);
}

export const fromPixels = /* @__PURE__ */ op({fromPixels_});