How to use the einops.rearrange function in einops

To help you get started, we’ve selected a few einops examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github fgnt / pb_bss / tests / test_distribution / test_spatial_mm.py View on Github external
Observation = stft(observation)
    num_samples = observation.shape[-1]

    Y_mm = rearrange(Observation, 'd t f -> f t d')

    t = Trainer()
    affiliation = t.fit(
        Y_mm,
        num_classes=3,
        iterations=iterations * 2,
        weight_constant_axis=-1,
    ).predict(Y_mm)
    
    pa = DHTVPermutationAlignment.from_stft_size(512)
    affiliation_pa = pa(rearrange(affiliation, 'f k t -> k f t'))
    affiliation_pa = rearrange(affiliation_pa, 'k f t -> k t f')

    Speech_image_0_est, Speech_image_1_est, Noise_image_est = Observation[reference_channel, :, :] * affiliation_pa

    speech_image_0_est = istft(Speech_image_0_est, num_samples=num_samples)
    speech_image_1_est = istft(Speech_image_1_est, num_samples=num_samples)
    noise_image_est = istft(Noise_image_est, num_samples=num_samples)

    ###########################################################################
    # Calculate the metrics

    speech_image = ex['audio_data']['speech_image']
    noise_image = ex['audio_data']['noise_image']
    speech_source = ex['audio_data']['speech_source']

    Speech_image = stft(speech_image)
    Noise_image = stft(noise_image)
github fgnt / sms_wsj / sms_wsj / examples / reference_systems.py View on Github external
else:
            raise ValueError(postfilter)

    Speech_prediction = apply_beamforming_vector(
        vector=rearrange(beamformers, 'f k d -> k f d', k=K, d=D, f=F),
        mix=rearrange(Observation, 'd t f -> f d t', d=D, t=T, f=F),
    )
    Speech_prediction = postfiler_fn(Speech_prediction)
    speech_prediction = istft(rearrange(Speech_prediction, 'k f t -> k t f', k=K, t=T, f=F), num_samples=N)

    if Speech_image is None:
        speech_contribution = None
    else:
        Speech_contribution = apply_beamforming_vector(
            vector=rearrange(beamformers, 'f k d -> k f d', k=K, d=D, f=F),
            mix=rearrange(Speech_image, '(ksource k) d t f -> ksource k f d t', k=1, d=D, t=T, f=F),
        )
        Speech_contribution = postfiler_fn(Speech_contribution)
        # ksource in [K-1, K]
        speech_contribution = istft(rearrange(Speech_contribution, 'ksource k f t -> ksource k t f', k=K, t=T, f=F), num_samples=N)

    if Noise_image is None:
        noise_contribution = None
    else:
        Noise_contribution = apply_beamforming_vector(
            vector=rearrange(beamformers, 'f k d -> k f d', k=K, d=D, f=F),
            mix=rearrange(Noise_image, '(k d) t f -> k f d t', k=1, d=D, t=T, f=F),
        )
        Noise_contribution = postfiler_fn(Noise_contribution)
        noise_contribution = istft(rearrange(Noise_contribution, 'k f t -> k t f', k=K, t=T, f=F), num_samples=N)

    metric = OutputMetrics(
github fgnt / sms_wsj / sms_wsj / examples / reference_systems.py View on Github external
'mir_eval_sxr_sar': array([149.07223578, 147.06942287]),
     'mir_eval_sxr_selection': array([0, 1]),
     'invasive_sxr_sdr': array([12.32048218,  9.61471296]),
     'invasive_sxr_sir': array([12.41346788,  9.69274082]),
     'invasive_sxr_snr': array([29.06057363, 27.10901422])}

    """
    _, N = speech_source.shape
    K = mask.shape[-2]
    D, T, F = Observation.shape

    assert K < 10, (K, mask.shape, N, D, T, F)
    assert D < 30, (K, N, D, T, F)

    psds = get_power_spectral_density_matrix(
        rearrange(Observation, 'd t f -> f d t', d=D, t=T, f=F),
        rearrange(mask, 't k f -> f k t', k=K, t=T, f=F),
    )  # shape: f, ktarget, d, d

    assert psds.shape == (F, K, D, D), (psds.shape, (F, K, D, D))

    beamformers = list()
    for k_target in range(K):
        target_psd = psds[:, k_target]
        distortion_psd = np.sum(np.delete(psds, k_target, axis=1), axis=1)

        beamformers.append(
            get_single_source_bf_vector(
                bf_algorithm,
                target_psd_matrix=target_psd,
                noise_psd_matrix=distortion_psd,
            )
github fgnt / pb_bss / pb_bss / evaluation / wrapper.py View on Github external
def invasive_sxr(self):
        from pb_bss.evaluation.sxr_module import output_sxr
        invasive_sxr = output_sxr(
            rearrange(
                self.speech_contribution,
                'sources targets samples -> sources targets samples'
            )[:, self.selection, :],
            rearrange(
                self.noise_contribution, 'targets samples -> targets samples'
            )[self.selection, :],
            average_sources=False,
            return_dict=True,
        )
        return invasive_sxr
github fgnt / sms_wsj / sms_wsj / examples / reference_systems.py View on Github external
speech_contribution = None
    else:
        Speech_contribution = apply_beamforming_vector(
            vector=rearrange(beamformers, 'f k d -> k f d', k=K, d=D, f=F),
            mix=rearrange(Speech_image, '(ksource k) d t f -> ksource k f d t', k=1, d=D, t=T, f=F),
        )
        Speech_contribution = postfiler_fn(Speech_contribution)
        # ksource in [K-1, K]
        speech_contribution = istft(rearrange(Speech_contribution, 'ksource k f t -> ksource k t f', k=K, t=T, f=F), num_samples=N)

    if Noise_image is None:
        noise_contribution = None
    else:
        Noise_contribution = apply_beamforming_vector(
            vector=rearrange(beamformers, 'f k d -> k f d', k=K, d=D, f=F),
            mix=rearrange(Noise_image, '(k d) t f -> k f d t', k=1, d=D, t=T, f=F),
        )
        Noise_contribution = postfiler_fn(Noise_contribution)
        noise_contribution = istft(rearrange(Noise_contribution, 'k f t -> k t f', k=K, t=T, f=F), num_samples=N)

    metric = OutputMetrics(
            speech_prediction=speech_prediction,
            speech_source=speech_source,
            speech_contribution=speech_contribution,
            noise_contribution=noise_contribution,
            sample_rate=8000,
            enable_si_sdr=False,
    )

    return metric
github fgnt / pb_bss / pb_bss / evaluation / wrapper.py View on Github external
def stoi(self):
        scores = pb_bss.evaluation.stoi(
            reference=rearrange(
                [self.speech_source] * self.channels,
                'channels sources samples -> sources channels samples'
            ),
            estimation=rearrange(
                [self.observation] * self.K_source,
                'sources channels samples -> sources channels samples'
            ),
            sample_rate=self.sample_rate,
        )
        return scores
github moabitcoin / ig65m-pytorch / ig65m / cli / dreamer.py View on Github external
Rearrange("t h w c -> c t h w"),
        Resize(args.frame_size),
        Normalize(mean=mean, std=std),
    ])

    # Take first clip from video only for now.
    # Could be made to run on the full video.

    dataset = VideoDataset(args.video, clip=32, transform=transform)

    video = next(iter(dataset))
    # video = torch.rand(3, 32, 128, 128)

    assert video.size()[0:2] == (3, 32)

    video = rearrange(video, "c t h w -> () c t h w")
    video = video.data.cpu().numpy()

    # Put video data into grapg leaf node with grads and on device
    video = torch.tensor(video, requires_grad=True, device=device)

    # regularizer term
    variation = TotalVariationLoss()

    denormalize = Denormalize(mean=mean, std=std)

    progress = tqdm(range(args.num_epochs))

    for epoch in progress:
        loss = 0.

        acts = model(video)
github harvardnlp / namedtensor / namedtensor / core.py View on Github external
def _force_order(self, names):
        s = ""
        ex = []
        for d in names:
            if d not in self._schema._names:
                ex.append(d)
                s += " ()"
            else:
                ex.append(d)
                s += " " + d
        tensor = rearrange(self._tensor, "%s -> %s" % (self._to_einops(), s))
        return self.__class__(tensor, ex)
github fgnt / sms_wsj / sms_wsj / examples / reference_systems.py View on Github external
Example:

        >>> from IPython.lib.pretty import pprint
        >>> ex = get_dataset('cv_dev93')[0]
        >>> mask = get_mask_from_oracle(ex, 'IBM')
        >>> metric, result = get_scores(ex, mask)
        >>> pprint(result)
        {'pesq': array([2.014, 1.78 ]),
         'stoi': array([0.68236465, 0.61319396]),
         'mir_eval_sxr_sdr': array([10.23933413, 10.01566298]),
         'invasive_sxr_sdr': array([15.76439393, 13.86230425])}
    """

    if Observation == 'Observation':
        metric = get_multi_speaker_metrics(
            mask=rearrange(mask, 'k t f -> t k f'),  # T Ktarget F
            Observation=ex['audio_data'][Observation],  # D T F (stft signal)
            speech_source=ex['audio_data']['speech_source'],  # Ksource N (time signal)
            Speech_image=ex['audio_data']['Speech_image'],  # Ksource D T F (stft signal)
            Noise_image=ex['audio_data']['Noise_image'],  # D T F (stft signal)
            istft=istft,  # callable(signal, num_samples=num_samples)
            bf_algorithm=beamformer,
            postfilter=postfilter,  # [None, 'mask_mul']
        )
    else:
        assert mask is None, mask
        assert beamformer == 'ch0', beamformer
        assert postfilter is None, postfilter
        metric = OutputMetrics(
            speech_prediction=ex['audio_data'][Observation][:, 0],
            speech_source=ex['audio_data']['speech_source'],
            # speech_contribution=speech_contribution,
github moabitcoin / ig65m-pytorch / ig65m / cli / dreamer.py View on Github external
video.data += args.lr * grad

        # Force video to [0, 1]; note: we are in normalized space
        for i in range(video.size(1)):
            cmin = (0. - mean[i]) / std[i]
            cmax = (1. - mean[i]) / std[i]
            video.data[0, i].clamp_(cmin, cmax)

        video.grad.data.zero_()

        progress.set_postfix({"loss": loss.item(), "tv": tv.item()})

    # Once we have our dream, denormalize it,
    # and turn it into sequence of PIL images.

    video = rearrange(video, "() c t h w -> c t h w")
    video = denormalize(video)
    video = rearrange(video, "c t h w -> t h w c")
    video.clamp_(0, 1)
    video = video.data.cpu().numpy()

    assert video.shape[0] == 32
    assert video.shape[3] == 3

    assert video.dtype == np.float32
    assert (video >= 0).all()
    assert (video <= 1).all()

    video = (video * 255).astype(np.uint8)

    images = [Image.fromarray(v, mode="RGB") for v in video]