How to use the larq.utils.register_keras_custom_object function in larq

To help you get started, we’ve selected a few larq examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github larq / zoo / larq_zoo / literature / dorefanet.py View on Github external
@lq.utils.register_keras_custom_object
@lq.utils.set_precision(1)
def magnitude_aware_sign_unclipped(x):
    """
    Scaled sign function with identity pseudo-gradient as used for the weights
    in the DoReFa paper. The Scale factor is calculated per layer.
    """
    scale_factor = tf.stop_gradient(tf.reduce_mean(tf.abs(x)))

    @tf.custom_gradient
    def _magnitude_aware_sign(x):
        return lq.math.sign(x) * scale_factor, lambda dy: dy

    return _magnitude_aware_sign(x)
github larq / larq / larq / optimizers_v2.py View on Github external
if predicate(var):
                    self.var_opt_mapping[var.name] = optimizer_index
                    num_optimizers += 1

            if num_optimizers > 1:
                raise ValueError(f"Variable `{var}` claimed by multiple optimizers.")
            if num_optimizers == 0:
                if self.default is not None:
                    self.var_opt_mapping[var.name] = self.DEFAULT_OPT_INDEX
                else:
                    warnings.warn(
                        f"No `default_optimizer` provided to train variable `{var}`."
                    )


@utils.register_keras_custom_object
class Bop(tf.keras.optimizers.Optimizer):
    """Binary optimizer (Bop).

    Bop is a latent-free optimizer for Binarized Neural Networks (BNNs) and
    Binary Weight Networks (BWN).

    Bop maintains an exponential moving average of the gradients controlled by
    `gamma`. If this average exceeds the `threshold`, a weight is flipped.
    Additionally, Bop accepts a regular optimizer that is applied to the
    non-binary weights in the network.

    The hyperparameter `gamma` is somewhat analogues to the learning rate in
    SGD methods: a high `gamma` results in rapid convergence but also makes
    training more noisy.

    Note that the default `threshold` is not optimal for all situations.
github larq / larq / larq / quantizers.py View on Github external
@utils.register_keras_custom_object
@utils.set_precision(1)
@tf.custom_gradient
def approx_sign(x):
    r"""
    Sign binarization function.
    \\[
    q(x) = \begin{cases}
      -1 & x < 0 \\\
      1 & x \geq 0
    \end{cases}
    \\]

    The gradient is estimated using the ApproxSign method.
    \\[\frac{\partial q(x)}{\partial x} = \begin{cases}
      (2 - 2 \left|x\right|) & \left|x\right| \leq 1 \\\
      0 & \left|x\right| > 1
github larq / larq / larq / layers.py View on Github external
use_bias=use_bias,
            input_quantizer=input_quantizer,
            kernel_quantizer=kernel_quantizer,
            kernel_initializer=kernel_initializer,
            bias_initializer=bias_initializer,
            kernel_regularizer=kernel_regularizer,
            bias_regularizer=bias_regularizer,
            activity_regularizer=activity_regularizer,
            kernel_constraint=kernel_constraint,
            bias_constraint=bias_constraint,
            metrics=metrics,
            **kwargs,
        )


@utils.register_keras_custom_object
class QuantConv3D(QuantizerBase, tf.keras.layers.Conv3D):
    """3D convolution layer (e.g. spatial convolution over volumes).

    This layer creates a convolution kernel that is convolved
    with the layer input to produce a tensor of
    outputs. `input_quantizer` and `kernel_quantizer` are the element-wise quantization
    functions to use. If both quantization functions are `None` this layer is
    equivalent to `Conv3D`. If `use_bias` is True, a bias vector is created and
    added to the outputs. Finally, if `activation` is not `None`,
    it is applied to the outputs as well.

    When using this layer as the first layer in a model, provide the keyword argument
    `input_shape` (tuple of integers, does not include the sample axis),
    e.g. `input_shape=(128, 128, 128, 1)` for 128x128x128 volumes
    with a single channel, in `data_format="channels_last"`.
github larq / larq / larq / quantizers.py View on Github external
@utils.register_keras_custom_object
@utils.set_precision(1)
def ste_heaviside(x, clip_value=1.0):
    r"""
    Binarization function with output values 0 and 1.

    \\[
    q(x) = \begin{cases}
    +1 & x > 0 \\\
    0 & x \leq 0
    \end{cases}
    \\]

    The gradient is estimated using the Straight-Through Estimator
    (essentially the binarization is replaced by a clipped identity on the
    backward pass).
github larq / larq / larq / quantizers.py View on Github external
```plot-activation
    quantizers.ste_heaviside
    ```

    # Arguments
    clip_value: Threshold for clipping gradients. If `None` gradients are not clipped.

    # Returns
    AND Binarization function
    """

    def __init__(self, clip_value=1.0):
        super().__init__(ste_heaviside, clip_value=clip_value)


@utils.register_keras_custom_object
class SwishSign(QuantizerFunctionWrapper):
    r"""Sign binarization function.

    \\[
    q(x) = \begin{cases}
      -1 & x < 0 \\\
      1 & x \geq 0
    \end{cases}
    \\]

    The gradient is estimated using the SignSwish method.

    \\[
    \frac{\partial q_{\beta}(x)}{\partial x} = \frac{\beta\left\\{2-\beta x \tanh \left(\frac{\beta x}{2}\right)\right\\}}{1+\cosh (\beta x)}
    \\]
github larq / larq / larq / quantizers.py View on Github external
@utils.register_keras_custom_object
@utils.set_precision(2)
def ste_tern(x, threshold_value=0.05, ternary_weight_networks=False, clip_value=1.0):
    r"""Ternarization function.

    \\[
    q(x) = \begin{cases}
    +1 & x > \Delta \\\
    0 & |x| < \Delta \\\
     -1 & x < - \Delta
    \end{cases}
    \\]

    where \\(\Delta\\) is defined as the threshold and can be passed as an argument,
    or can be calculated as per the Ternary Weight Networks original paper, such that

    \\[