110 lines
3.7 KiB
Python
110 lines
3.7 KiB
Python
# Copyright 2022 The HuggingFace Team. All rights reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
import math
|
|
|
|
import numpy as np
|
|
import torch
|
|
from torch import nn
|
|
|
|
|
|
def get_timestep_embedding(
|
|
timesteps, embedding_dim, flip_sin_to_cos=False, downscale_freq_shift=1, scale=1, max_period=10000
|
|
):
|
|
"""
|
|
This matches the implementation in Denoising Diffusion Probabilistic Models: Create sinusoidal timestep embeddings.
|
|
|
|
:param timesteps: a 1-D Tensor of N indices, one per batch element.
|
|
These may be fractional.
|
|
:param embedding_dim: the dimension of the output. :param max_period: controls the minimum frequency of the
|
|
embeddings. :return: an [N x dim] Tensor of positional embeddings.
|
|
"""
|
|
assert len(timesteps.shape) == 1, "Timesteps should be a 1d-array"
|
|
|
|
half_dim = embedding_dim // 2
|
|
exponent = -math.log(max_period) * torch.arange(start=0, end=half_dim, dtype=torch.float32)
|
|
exponent = exponent / (half_dim - downscale_freq_shift)
|
|
|
|
emb = torch.exp(exponent).to(device=timesteps.device)
|
|
emb = timesteps[:, None].float() * emb[None, :]
|
|
|
|
# scale embeddings
|
|
emb = scale * emb
|
|
|
|
# concat sine and cosine embeddings
|
|
emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=-1)
|
|
|
|
# flip sine and cosine embeddings
|
|
if flip_sin_to_cos:
|
|
emb = torch.cat([emb[:, half_dim:], emb[:, :half_dim]], dim=-1)
|
|
|
|
# zero pad
|
|
if embedding_dim % 2 == 1:
|
|
emb = torch.nn.functional.pad(emb, (0, 1, 0, 0))
|
|
return emb
|
|
|
|
|
|
class TimestepEmbedding(nn.Module):
|
|
def __init__(self, channel, time_embed_dim, act_fn="silu"):
|
|
super().__init__()
|
|
|
|
self.linear_1 = nn.Linear(channel, time_embed_dim)
|
|
self.act = None
|
|
if act_fn == "silu":
|
|
self.act = nn.SiLU()
|
|
self.linear_2 = nn.Linear(time_embed_dim, time_embed_dim)
|
|
|
|
def forward(self, sample):
|
|
sample = self.linear_1(sample)
|
|
|
|
if self.act is not None:
|
|
sample = self.act(sample)
|
|
|
|
sample = self.linear_2(sample)
|
|
return sample
|
|
|
|
|
|
class Timesteps(nn.Module):
|
|
def __init__(self, num_channels, flip_sin_to_cos, downscale_freq_shift):
|
|
super().__init__()
|
|
self.num_channels = num_channels
|
|
self.flip_sin_to_cos = flip_sin_to_cos
|
|
self.downscale_freq_shift = downscale_freq_shift
|
|
|
|
def forward(self, timesteps):
|
|
t_emb = get_timestep_embedding(
|
|
timesteps,
|
|
self.num_channels,
|
|
flip_sin_to_cos=self.flip_sin_to_cos,
|
|
downscale_freq_shift=self.downscale_freq_shift,
|
|
)
|
|
return t_emb
|
|
|
|
|
|
class GaussianFourierProjection(nn.Module):
|
|
"""Gaussian Fourier embeddings for noise levels."""
|
|
|
|
def __init__(self, embedding_size=256, scale=1.0):
|
|
super().__init__()
|
|
self.weight = nn.Parameter(torch.randn(embedding_size) * scale, requires_grad=False)
|
|
|
|
# to delete later
|
|
self.W = nn.Parameter(torch.randn(embedding_size) * scale, requires_grad=False)
|
|
|
|
self.weight = self.W
|
|
|
|
def forward(self, x):
|
|
x = torch.log(x)
|
|
x_proj = x[:, None] * self.weight[None, :] * 2 * np.pi
|
|
out = torch.cat([torch.sin(x_proj), torch.cos(x_proj)], dim=-1)
|
|
return out
|