Delete lib/infer_libs/infer_pack
Browse files
lib/infer_libs/infer_pack/modules.py
DELETED
@@ -1,517 +0,0 @@
|
|
1 |
-
import math
|
2 |
-
import torch
|
3 |
-
from torch import nn
|
4 |
-
from torch.nn import Conv1d
|
5 |
-
from torch.nn import functional as F
|
6 |
-
from torch.nn.utils import remove_weight_norm, weight_norm
|
7 |
-
|
8 |
-
from lib.infer.infer_libs.infer_pack import commons
|
9 |
-
from lib.infer.infer_libs.infer_pack.commons import get_padding, init_weights
|
10 |
-
from lib.infer.infer_libs.infer_pack.transforms import piecewise_rational_quadratic_transform
|
11 |
-
|
12 |
-
LRELU_SLOPE = 0.1
|
13 |
-
|
14 |
-
|
15 |
-
class LayerNorm(nn.Module):
|
16 |
-
def __init__(self, channels, eps=1e-5):
|
17 |
-
super().__init__()
|
18 |
-
self.channels = channels
|
19 |
-
self.eps = eps
|
20 |
-
|
21 |
-
self.gamma = nn.Parameter(torch.ones(channels))
|
22 |
-
self.beta = nn.Parameter(torch.zeros(channels))
|
23 |
-
|
24 |
-
def forward(self, x):
|
25 |
-
x = x.transpose(1, -1)
|
26 |
-
x = F.layer_norm(x, (self.channels,), self.gamma, self.beta, self.eps)
|
27 |
-
return x.transpose(1, -1)
|
28 |
-
|
29 |
-
|
30 |
-
class ConvReluNorm(nn.Module):
|
31 |
-
def __init__(
|
32 |
-
self,
|
33 |
-
in_channels,
|
34 |
-
hidden_channels,
|
35 |
-
out_channels,
|
36 |
-
kernel_size,
|
37 |
-
n_layers,
|
38 |
-
p_dropout,
|
39 |
-
):
|
40 |
-
super().__init__()
|
41 |
-
self.in_channels = in_channels
|
42 |
-
self.hidden_channels = hidden_channels
|
43 |
-
self.out_channels = out_channels
|
44 |
-
self.kernel_size = kernel_size
|
45 |
-
self.n_layers = n_layers
|
46 |
-
self.p_dropout = p_dropout
|
47 |
-
assert n_layers > 1, "Number of layers should be larger than 0."
|
48 |
-
|
49 |
-
self.conv_layers = nn.ModuleList()
|
50 |
-
self.norm_layers = nn.ModuleList()
|
51 |
-
self.conv_layers.append(
|
52 |
-
nn.Conv1d(
|
53 |
-
in_channels, hidden_channels, kernel_size, padding=kernel_size // 2
|
54 |
-
)
|
55 |
-
)
|
56 |
-
self.norm_layers.append(LayerNorm(hidden_channels))
|
57 |
-
self.relu_drop = nn.Sequential(nn.ReLU(), nn.Dropout(p_dropout))
|
58 |
-
for _ in range(n_layers - 1):
|
59 |
-
self.conv_layers.append(
|
60 |
-
nn.Conv1d(
|
61 |
-
hidden_channels,
|
62 |
-
hidden_channels,
|
63 |
-
kernel_size,
|
64 |
-
padding=kernel_size // 2,
|
65 |
-
)
|
66 |
-
)
|
67 |
-
self.norm_layers.append(LayerNorm(hidden_channels))
|
68 |
-
self.proj = nn.Conv1d(hidden_channels, out_channels, 1)
|
69 |
-
self.proj.weight.data.zero_()
|
70 |
-
self.proj.bias.data.zero_()
|
71 |
-
|
72 |
-
def forward(self, x, x_mask):
|
73 |
-
x_org = x
|
74 |
-
for i in range(self.n_layers):
|
75 |
-
x = self.conv_layers[i](x * x_mask)
|
76 |
-
x = self.norm_layers[i](x)
|
77 |
-
x = self.relu_drop(x)
|
78 |
-
x = x_org + self.proj(x)
|
79 |
-
return x * x_mask
|
80 |
-
|
81 |
-
|
82 |
-
class DDSConv(nn.Module):
|
83 |
-
"""
|
84 |
-
Dialted and Depth-Separable Convolution
|
85 |
-
"""
|
86 |
-
|
87 |
-
def __init__(self, channels, kernel_size, n_layers, p_dropout=0.0):
|
88 |
-
super().__init__()
|
89 |
-
self.channels = channels
|
90 |
-
self.kernel_size = kernel_size
|
91 |
-
self.n_layers = n_layers
|
92 |
-
self.p_dropout = p_dropout
|
93 |
-
|
94 |
-
self.drop = nn.Dropout(p_dropout)
|
95 |
-
self.convs_sep = nn.ModuleList()
|
96 |
-
self.convs_1x1 = nn.ModuleList()
|
97 |
-
self.norms_1 = nn.ModuleList()
|
98 |
-
self.norms_2 = nn.ModuleList()
|
99 |
-
for i in range(n_layers):
|
100 |
-
dilation = kernel_size**i
|
101 |
-
padding = (kernel_size * dilation - dilation) // 2
|
102 |
-
self.convs_sep.append(
|
103 |
-
nn.Conv1d(
|
104 |
-
channels,
|
105 |
-
channels,
|
106 |
-
kernel_size,
|
107 |
-
groups=channels,
|
108 |
-
dilation=dilation,
|
109 |
-
padding=padding,
|
110 |
-
)
|
111 |
-
)
|
112 |
-
self.convs_1x1.append(nn.Conv1d(channels, channels, 1))
|
113 |
-
self.norms_1.append(LayerNorm(channels))
|
114 |
-
self.norms_2.append(LayerNorm(channels))
|
115 |
-
|
116 |
-
def forward(self, x, x_mask, g=None):
|
117 |
-
if g is not None:
|
118 |
-
x = x + g
|
119 |
-
for i in range(self.n_layers):
|
120 |
-
y = self.convs_sep[i](x * x_mask)
|
121 |
-
y = self.norms_1[i](y)
|
122 |
-
y = F.gelu(y)
|
123 |
-
y = self.convs_1x1[i](y)
|
124 |
-
y = self.norms_2[i](y)
|
125 |
-
y = F.gelu(y)
|
126 |
-
y = self.drop(y)
|
127 |
-
x = x + y
|
128 |
-
return x * x_mask
|
129 |
-
|
130 |
-
|
131 |
-
class WN(torch.nn.Module):
|
132 |
-
def __init__(
|
133 |
-
self,
|
134 |
-
hidden_channels,
|
135 |
-
kernel_size,
|
136 |
-
dilation_rate,
|
137 |
-
n_layers,
|
138 |
-
gin_channels=0,
|
139 |
-
p_dropout=0,
|
140 |
-
):
|
141 |
-
super(WN, self).__init__()
|
142 |
-
assert kernel_size % 2 == 1
|
143 |
-
self.hidden_channels = hidden_channels
|
144 |
-
self.kernel_size = (kernel_size,)
|
145 |
-
self.dilation_rate = dilation_rate
|
146 |
-
self.n_layers = n_layers
|
147 |
-
self.gin_channels = gin_channels
|
148 |
-
self.p_dropout = p_dropout
|
149 |
-
|
150 |
-
self.in_layers = torch.nn.ModuleList()
|
151 |
-
self.res_skip_layers = torch.nn.ModuleList()
|
152 |
-
self.drop = nn.Dropout(p_dropout)
|
153 |
-
|
154 |
-
if gin_channels != 0:
|
155 |
-
cond_layer = torch.nn.Conv1d(
|
156 |
-
gin_channels, 2 * hidden_channels * n_layers, 1
|
157 |
-
)
|
158 |
-
self.cond_layer = torch.nn.utils.weight_norm(cond_layer, name="weight")
|
159 |
-
|
160 |
-
for i in range(n_layers):
|
161 |
-
dilation = dilation_rate**i
|
162 |
-
padding = int((kernel_size * dilation - dilation) / 2)
|
163 |
-
in_layer = torch.nn.Conv1d(
|
164 |
-
hidden_channels,
|
165 |
-
2 * hidden_channels,
|
166 |
-
kernel_size,
|
167 |
-
dilation=dilation,
|
168 |
-
padding=padding,
|
169 |
-
)
|
170 |
-
in_layer = torch.nn.utils.weight_norm(in_layer, name="weight")
|
171 |
-
self.in_layers.append(in_layer)
|
172 |
-
|
173 |
-
# last one is not necessary
|
174 |
-
if i < n_layers - 1:
|
175 |
-
res_skip_channels = 2 * hidden_channels
|
176 |
-
else:
|
177 |
-
res_skip_channels = hidden_channels
|
178 |
-
|
179 |
-
res_skip_layer = torch.nn.Conv1d(hidden_channels, res_skip_channels, 1)
|
180 |
-
res_skip_layer = torch.nn.utils.weight_norm(res_skip_layer, name="weight")
|
181 |
-
self.res_skip_layers.append(res_skip_layer)
|
182 |
-
|
183 |
-
def forward(self, x, x_mask, g=None, **kwargs):
|
184 |
-
output = torch.zeros_like(x)
|
185 |
-
n_channels_tensor = torch.IntTensor([self.hidden_channels])
|
186 |
-
|
187 |
-
if g is not None:
|
188 |
-
g = self.cond_layer(g)
|
189 |
-
|
190 |
-
for i in range(self.n_layers):
|
191 |
-
x_in = self.in_layers[i](x)
|
192 |
-
if g is not None:
|
193 |
-
cond_offset = i * 2 * self.hidden_channels
|
194 |
-
g_l = g[:, cond_offset : cond_offset + 2 * self.hidden_channels, :]
|
195 |
-
else:
|
196 |
-
g_l = torch.zeros_like(x_in)
|
197 |
-
|
198 |
-
acts = commons.fused_add_tanh_sigmoid_multiply(x_in, g_l, n_channels_tensor)
|
199 |
-
acts = self.drop(acts)
|
200 |
-
|
201 |
-
res_skip_acts = self.res_skip_layers[i](acts)
|
202 |
-
if i < self.n_layers - 1:
|
203 |
-
res_acts = res_skip_acts[:, : self.hidden_channels, :]
|
204 |
-
x = (x + res_acts) * x_mask
|
205 |
-
output = output + res_skip_acts[:, self.hidden_channels :, :]
|
206 |
-
else:
|
207 |
-
output = output + res_skip_acts
|
208 |
-
return output * x_mask
|
209 |
-
|
210 |
-
def remove_weight_norm(self):
|
211 |
-
if self.gin_channels != 0:
|
212 |
-
torch.nn.utils.remove_weight_norm(self.cond_layer)
|
213 |
-
for l in self.in_layers:
|
214 |
-
torch.nn.utils.remove_weight_norm(l)
|
215 |
-
for l in self.res_skip_layers:
|
216 |
-
torch.nn.utils.remove_weight_norm(l)
|
217 |
-
|
218 |
-
|
219 |
-
class ResBlock1(torch.nn.Module):
|
220 |
-
def __init__(self, channels, kernel_size=3, dilation=(1, 3, 5)):
|
221 |
-
super(ResBlock1, self).__init__()
|
222 |
-
self.convs1 = nn.ModuleList(
|
223 |
-
[
|
224 |
-
weight_norm(
|
225 |
-
Conv1d(
|
226 |
-
channels,
|
227 |
-
channels,
|
228 |
-
kernel_size,
|
229 |
-
1,
|
230 |
-
dilation=dilation[0],
|
231 |
-
padding=get_padding(kernel_size, dilation[0]),
|
232 |
-
)
|
233 |
-
),
|
234 |
-
weight_norm(
|
235 |
-
Conv1d(
|
236 |
-
channels,
|
237 |
-
channels,
|
238 |
-
kernel_size,
|
239 |
-
1,
|
240 |
-
dilation=dilation[1],
|
241 |
-
padding=get_padding(kernel_size, dilation[1]),
|
242 |
-
)
|
243 |
-
),
|
244 |
-
weight_norm(
|
245 |
-
Conv1d(
|
246 |
-
channels,
|
247 |
-
channels,
|
248 |
-
kernel_size,
|
249 |
-
1,
|
250 |
-
dilation=dilation[2],
|
251 |
-
padding=get_padding(kernel_size, dilation[2]),
|
252 |
-
)
|
253 |
-
),
|
254 |
-
]
|
255 |
-
)
|
256 |
-
self.convs1.apply(init_weights)
|
257 |
-
|
258 |
-
self.convs2 = nn.ModuleList(
|
259 |
-
[
|
260 |
-
weight_norm(
|
261 |
-
Conv1d(
|
262 |
-
channels,
|
263 |
-
channels,
|
264 |
-
kernel_size,
|
265 |
-
1,
|
266 |
-
dilation=1,
|
267 |
-
padding=get_padding(kernel_size, 1),
|
268 |
-
)
|
269 |
-
),
|
270 |
-
weight_norm(
|
271 |
-
Conv1d(
|
272 |
-
channels,
|
273 |
-
channels,
|
274 |
-
kernel_size,
|
275 |
-
1,
|
276 |
-
dilation=1,
|
277 |
-
padding=get_padding(kernel_size, 1),
|
278 |
-
)
|
279 |
-
),
|
280 |
-
weight_norm(
|
281 |
-
Conv1d(
|
282 |
-
channels,
|
283 |
-
channels,
|
284 |
-
kernel_size,
|
285 |
-
1,
|
286 |
-
dilation=1,
|
287 |
-
padding=get_padding(kernel_size, 1),
|
288 |
-
)
|
289 |
-
),
|
290 |
-
]
|
291 |
-
)
|
292 |
-
self.convs2.apply(init_weights)
|
293 |
-
|
294 |
-
def forward(self, x, x_mask=None):
|
295 |
-
for c1, c2 in zip(self.convs1, self.convs2):
|
296 |
-
xt = F.leaky_relu(x, LRELU_SLOPE)
|
297 |
-
if x_mask is not None:
|
298 |
-
xt = xt * x_mask
|
299 |
-
xt = c1(xt)
|
300 |
-
xt = F.leaky_relu(xt, LRELU_SLOPE)
|
301 |
-
if x_mask is not None:
|
302 |
-
xt = xt * x_mask
|
303 |
-
xt = c2(xt)
|
304 |
-
x = xt + x
|
305 |
-
if x_mask is not None:
|
306 |
-
x = x * x_mask
|
307 |
-
return x
|
308 |
-
|
309 |
-
def remove_weight_norm(self):
|
310 |
-
for l in self.convs1:
|
311 |
-
remove_weight_norm(l)
|
312 |
-
for l in self.convs2:
|
313 |
-
remove_weight_norm(l)
|
314 |
-
|
315 |
-
|
316 |
-
class ResBlock2(torch.nn.Module):
|
317 |
-
def __init__(self, channels, kernel_size=3, dilation=(1, 3)):
|
318 |
-
super(ResBlock2, self).__init__()
|
319 |
-
self.convs = nn.ModuleList(
|
320 |
-
[
|
321 |
-
weight_norm(
|
322 |
-
Conv1d(
|
323 |
-
channels,
|
324 |
-
channels,
|
325 |
-
kernel_size,
|
326 |
-
1,
|
327 |
-
dilation=dilation[0],
|
328 |
-
padding=get_padding(kernel_size, dilation[0]),
|
329 |
-
)
|
330 |
-
),
|
331 |
-
weight_norm(
|
332 |
-
Conv1d(
|
333 |
-
channels,
|
334 |
-
channels,
|
335 |
-
kernel_size,
|
336 |
-
1,
|
337 |
-
dilation=dilation[1],
|
338 |
-
padding=get_padding(kernel_size, dilation[1]),
|
339 |
-
)
|
340 |
-
),
|
341 |
-
]
|
342 |
-
)
|
343 |
-
self.convs.apply(init_weights)
|
344 |
-
|
345 |
-
def forward(self, x, x_mask=None):
|
346 |
-
for c in self.convs:
|
347 |
-
xt = F.leaky_relu(x, LRELU_SLOPE)
|
348 |
-
if x_mask is not None:
|
349 |
-
xt = xt * x_mask
|
350 |
-
xt = c(xt)
|
351 |
-
x = xt + x
|
352 |
-
if x_mask is not None:
|
353 |
-
x = x * x_mask
|
354 |
-
return x
|
355 |
-
|
356 |
-
def remove_weight_norm(self):
|
357 |
-
for l in self.convs:
|
358 |
-
remove_weight_norm(l)
|
359 |
-
|
360 |
-
|
361 |
-
class Log(nn.Module):
|
362 |
-
def forward(self, x, x_mask, reverse=False, **kwargs):
|
363 |
-
if not reverse:
|
364 |
-
y = torch.log(torch.clamp_min(x, 1e-5)) * x_mask
|
365 |
-
logdet = torch.sum(-y, [1, 2])
|
366 |
-
return y, logdet
|
367 |
-
else:
|
368 |
-
x = torch.exp(x) * x_mask
|
369 |
-
return x
|
370 |
-
|
371 |
-
|
372 |
-
class Flip(nn.Module):
|
373 |
-
def forward(self, x, *args, reverse=False, **kwargs):
|
374 |
-
x = torch.flip(x, [1])
|
375 |
-
if not reverse:
|
376 |
-
logdet = torch.zeros(x.size(0)).to(dtype=x.dtype, device=x.device)
|
377 |
-
return x, logdet
|
378 |
-
else:
|
379 |
-
return x
|
380 |
-
|
381 |
-
|
382 |
-
class ElementwiseAffine(nn.Module):
|
383 |
-
def __init__(self, channels):
|
384 |
-
super().__init__()
|
385 |
-
self.channels = channels
|
386 |
-
self.m = nn.Parameter(torch.zeros(channels, 1))
|
387 |
-
self.logs = nn.Parameter(torch.zeros(channels, 1))
|
388 |
-
|
389 |
-
def forward(self, x, x_mask, reverse=False, **kwargs):
|
390 |
-
if not reverse:
|
391 |
-
y = self.m + torch.exp(self.logs) * x
|
392 |
-
y = y * x_mask
|
393 |
-
logdet = torch.sum(self.logs * x_mask, [1, 2])
|
394 |
-
return y, logdet
|
395 |
-
else:
|
396 |
-
x = (x - self.m) * torch.exp(-self.logs) * x_mask
|
397 |
-
return x
|
398 |
-
|
399 |
-
|
400 |
-
class ResidualCouplingLayer(nn.Module):
|
401 |
-
def __init__(
|
402 |
-
self,
|
403 |
-
channels,
|
404 |
-
hidden_channels,
|
405 |
-
kernel_size,
|
406 |
-
dilation_rate,
|
407 |
-
n_layers,
|
408 |
-
p_dropout=0,
|
409 |
-
gin_channels=0,
|
410 |
-
mean_only=False,
|
411 |
-
):
|
412 |
-
assert channels % 2 == 0, "channels should be divisible by 2"
|
413 |
-
super().__init__()
|
414 |
-
self.channels = channels
|
415 |
-
self.hidden_channels = hidden_channels
|
416 |
-
self.kernel_size = kernel_size
|
417 |
-
self.dilation_rate = dilation_rate
|
418 |
-
self.n_layers = n_layers
|
419 |
-
self.half_channels = channels // 2
|
420 |
-
self.mean_only = mean_only
|
421 |
-
|
422 |
-
self.pre = nn.Conv1d(self.half_channels, hidden_channels, 1)
|
423 |
-
self.enc = WN(
|
424 |
-
hidden_channels,
|
425 |
-
kernel_size,
|
426 |
-
dilation_rate,
|
427 |
-
n_layers,
|
428 |
-
p_dropout=p_dropout,
|
429 |
-
gin_channels=gin_channels,
|
430 |
-
)
|
431 |
-
self.post = nn.Conv1d(hidden_channels, self.half_channels * (2 - mean_only), 1)
|
432 |
-
self.post.weight.data.zero_()
|
433 |
-
self.post.bias.data.zero_()
|
434 |
-
|
435 |
-
def forward(self, x, x_mask, g=None, reverse=False):
|
436 |
-
x0, x1 = torch.split(x, [self.half_channels] * 2, 1)
|
437 |
-
h = self.pre(x0) * x_mask
|
438 |
-
h = self.enc(h, x_mask, g=g)
|
439 |
-
stats = self.post(h) * x_mask
|
440 |
-
if not self.mean_only:
|
441 |
-
m, logs = torch.split(stats, [self.half_channels] * 2, 1)
|
442 |
-
else:
|
443 |
-
m = stats
|
444 |
-
logs = torch.zeros_like(m)
|
445 |
-
|
446 |
-
if not reverse:
|
447 |
-
x1 = m + x1 * torch.exp(logs) * x_mask
|
448 |
-
x = torch.cat([x0, x1], 1)
|
449 |
-
logdet = torch.sum(logs, [1, 2])
|
450 |
-
return x, logdet
|
451 |
-
else:
|
452 |
-
x1 = (x1 - m) * torch.exp(-logs) * x_mask
|
453 |
-
x = torch.cat([x0, x1], 1)
|
454 |
-
return x
|
455 |
-
|
456 |
-
def remove_weight_norm(self):
|
457 |
-
self.enc.remove_weight_norm()
|
458 |
-
|
459 |
-
|
460 |
-
class ConvFlow(nn.Module):
|
461 |
-
def __init__(
|
462 |
-
self,
|
463 |
-
in_channels,
|
464 |
-
filter_channels,
|
465 |
-
kernel_size,
|
466 |
-
n_layers,
|
467 |
-
num_bins=10,
|
468 |
-
tail_bound=5.0,
|
469 |
-
):
|
470 |
-
super().__init__()
|
471 |
-
self.in_channels = in_channels
|
472 |
-
self.filter_channels = filter_channels
|
473 |
-
self.kernel_size = kernel_size
|
474 |
-
self.n_layers = n_layers
|
475 |
-
self.num_bins = num_bins
|
476 |
-
self.tail_bound = tail_bound
|
477 |
-
self.half_channels = in_channels // 2
|
478 |
-
|
479 |
-
self.pre = nn.Conv1d(self.half_channels, filter_channels, 1)
|
480 |
-
self.convs = DDSConv(filter_channels, kernel_size, n_layers, p_dropout=0.0)
|
481 |
-
self.proj = nn.Conv1d(
|
482 |
-
filter_channels, self.half_channels * (num_bins * 3 - 1), 1
|
483 |
-
)
|
484 |
-
self.proj.weight.data.zero_()
|
485 |
-
self.proj.bias.data.zero_()
|
486 |
-
|
487 |
-
def forward(self, x, x_mask, g=None, reverse=False):
|
488 |
-
x0, x1 = torch.split(x, [self.half_channels] * 2, 1)
|
489 |
-
h = self.pre(x0)
|
490 |
-
h = self.convs(h, x_mask, g=g)
|
491 |
-
h = self.proj(h) * x_mask
|
492 |
-
|
493 |
-
b, c, t = x0.shape
|
494 |
-
h = h.reshape(b, c, -1, t).permute(0, 1, 3, 2) # [b, cx?, t] -> [b, c, t, ?]
|
495 |
-
|
496 |
-
unnormalized_widths = h[..., : self.num_bins] / math.sqrt(self.filter_channels)
|
497 |
-
unnormalized_heights = h[..., self.num_bins : 2 * self.num_bins] / math.sqrt(
|
498 |
-
self.filter_channels
|
499 |
-
)
|
500 |
-
unnormalized_derivatives = h[..., 2 * self.num_bins :]
|
501 |
-
|
502 |
-
x1, logabsdet = piecewise_rational_quadratic_transform(
|
503 |
-
x1,
|
504 |
-
unnormalized_widths,
|
505 |
-
unnormalized_heights,
|
506 |
-
unnormalized_derivatives,
|
507 |
-
inverse=reverse,
|
508 |
-
tails="linear",
|
509 |
-
tail_bound=self.tail_bound,
|
510 |
-
)
|
511 |
-
|
512 |
-
x = torch.cat([x0, x1], 1) * x_mask
|
513 |
-
logdet = torch.sum(logabsdet * x_mask, [1, 2])
|
514 |
-
if not reverse:
|
515 |
-
return x, logdet
|
516 |
-
else:
|
517 |
-
return x
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
lib/infer_libs/infer_pack/transforms.py
DELETED
@@ -1,207 +0,0 @@
|
|
1 |
-
import numpy as np
|
2 |
-
import torch
|
3 |
-
from torch.nn import functional as F
|
4 |
-
|
5 |
-
DEFAULT_MIN_BIN_WIDTH = 1e-3
|
6 |
-
DEFAULT_MIN_BIN_HEIGHT = 1e-3
|
7 |
-
DEFAULT_MIN_DERIVATIVE = 1e-3
|
8 |
-
|
9 |
-
|
10 |
-
def piecewise_rational_quadratic_transform(
|
11 |
-
inputs,
|
12 |
-
unnormalized_widths,
|
13 |
-
unnormalized_heights,
|
14 |
-
unnormalized_derivatives,
|
15 |
-
inverse=False,
|
16 |
-
tails=None,
|
17 |
-
tail_bound=1.0,
|
18 |
-
min_bin_width=DEFAULT_MIN_BIN_WIDTH,
|
19 |
-
min_bin_height=DEFAULT_MIN_BIN_HEIGHT,
|
20 |
-
min_derivative=DEFAULT_MIN_DERIVATIVE,
|
21 |
-
):
|
22 |
-
if tails is None:
|
23 |
-
spline_fn = rational_quadratic_spline
|
24 |
-
spline_kwargs = {}
|
25 |
-
else:
|
26 |
-
spline_fn = unconstrained_rational_quadratic_spline
|
27 |
-
spline_kwargs = {"tails": tails, "tail_bound": tail_bound}
|
28 |
-
|
29 |
-
outputs, logabsdet = spline_fn(
|
30 |
-
inputs=inputs,
|
31 |
-
unnormalized_widths=unnormalized_widths,
|
32 |
-
unnormalized_heights=unnormalized_heights,
|
33 |
-
unnormalized_derivatives=unnormalized_derivatives,
|
34 |
-
inverse=inverse,
|
35 |
-
min_bin_width=min_bin_width,
|
36 |
-
min_bin_height=min_bin_height,
|
37 |
-
min_derivative=min_derivative,
|
38 |
-
**spline_kwargs
|
39 |
-
)
|
40 |
-
return outputs, logabsdet
|
41 |
-
|
42 |
-
|
43 |
-
def searchsorted(bin_locations, inputs, eps=1e-6):
|
44 |
-
bin_locations[..., -1] += eps
|
45 |
-
return torch.sum(inputs[..., None] >= bin_locations, dim=-1) - 1
|
46 |
-
|
47 |
-
|
48 |
-
def unconstrained_rational_quadratic_spline(
|
49 |
-
inputs,
|
50 |
-
unnormalized_widths,
|
51 |
-
unnormalized_heights,
|
52 |
-
unnormalized_derivatives,
|
53 |
-
inverse=False,
|
54 |
-
tails="linear",
|
55 |
-
tail_bound=1.0,
|
56 |
-
min_bin_width=DEFAULT_MIN_BIN_WIDTH,
|
57 |
-
min_bin_height=DEFAULT_MIN_BIN_HEIGHT,
|
58 |
-
min_derivative=DEFAULT_MIN_DERIVATIVE,
|
59 |
-
):
|
60 |
-
inside_interval_mask = (inputs >= -tail_bound) & (inputs <= tail_bound)
|
61 |
-
outside_interval_mask = ~inside_interval_mask
|
62 |
-
|
63 |
-
outputs = torch.zeros_like(inputs)
|
64 |
-
logabsdet = torch.zeros_like(inputs)
|
65 |
-
|
66 |
-
if tails == "linear":
|
67 |
-
unnormalized_derivatives = F.pad(unnormalized_derivatives, pad=(1, 1))
|
68 |
-
constant = np.log(np.exp(1 - min_derivative) - 1)
|
69 |
-
unnormalized_derivatives[..., 0] = constant
|
70 |
-
unnormalized_derivatives[..., -1] = constant
|
71 |
-
|
72 |
-
outputs[outside_interval_mask] = inputs[outside_interval_mask]
|
73 |
-
logabsdet[outside_interval_mask] = 0
|
74 |
-
else:
|
75 |
-
raise RuntimeError("{} tails are not implemented.".format(tails))
|
76 |
-
|
77 |
-
(
|
78 |
-
outputs[inside_interval_mask],
|
79 |
-
logabsdet[inside_interval_mask],
|
80 |
-
) = rational_quadratic_spline(
|
81 |
-
inputs=inputs[inside_interval_mask],
|
82 |
-
unnormalized_widths=unnormalized_widths[inside_interval_mask, :],
|
83 |
-
unnormalized_heights=unnormalized_heights[inside_interval_mask, :],
|
84 |
-
unnormalized_derivatives=unnormalized_derivatives[inside_interval_mask, :],
|
85 |
-
inverse=inverse,
|
86 |
-
left=-tail_bound,
|
87 |
-
right=tail_bound,
|
88 |
-
bottom=-tail_bound,
|
89 |
-
top=tail_bound,
|
90 |
-
min_bin_width=min_bin_width,
|
91 |
-
min_bin_height=min_bin_height,
|
92 |
-
min_derivative=min_derivative,
|
93 |
-
)
|
94 |
-
|
95 |
-
return outputs, logabsdet
|
96 |
-
|
97 |
-
|
98 |
-
def rational_quadratic_spline(
|
99 |
-
inputs,
|
100 |
-
unnormalized_widths,
|
101 |
-
unnormalized_heights,
|
102 |
-
unnormalized_derivatives,
|
103 |
-
inverse=False,
|
104 |
-
left=0.0,
|
105 |
-
right=1.0,
|
106 |
-
bottom=0.0,
|
107 |
-
top=1.0,
|
108 |
-
min_bin_width=DEFAULT_MIN_BIN_WIDTH,
|
109 |
-
min_bin_height=DEFAULT_MIN_BIN_HEIGHT,
|
110 |
-
min_derivative=DEFAULT_MIN_DERIVATIVE,
|
111 |
-
):
|
112 |
-
if torch.min(inputs) < left or torch.max(inputs) > right:
|
113 |
-
raise ValueError("Input to a transform is not within its domain")
|
114 |
-
|
115 |
-
num_bins = unnormalized_widths.shape[-1]
|
116 |
-
|
117 |
-
if min_bin_width * num_bins > 1.0:
|
118 |
-
raise ValueError("Minimal bin width too large for the number of bins")
|
119 |
-
if min_bin_height * num_bins > 1.0:
|
120 |
-
raise ValueError("Minimal bin height too large for the number of bins")
|
121 |
-
|
122 |
-
widths = F.softmax(unnormalized_widths, dim=-1)
|
123 |
-
widths = min_bin_width + (1 - min_bin_width * num_bins) * widths
|
124 |
-
cumwidths = torch.cumsum(widths, dim=-1)
|
125 |
-
cumwidths = F.pad(cumwidths, pad=(1, 0), mode="constant", value=0.0)
|
126 |
-
cumwidths = (right - left) * cumwidths + left
|
127 |
-
cumwidths[..., 0] = left
|
128 |
-
cumwidths[..., -1] = right
|
129 |
-
widths = cumwidths[..., 1:] - cumwidths[..., :-1]
|
130 |
-
|
131 |
-
derivatives = min_derivative + F.softplus(unnormalized_derivatives)
|
132 |
-
|
133 |
-
heights = F.softmax(unnormalized_heights, dim=-1)
|
134 |
-
heights = min_bin_height + (1 - min_bin_height * num_bins) * heights
|
135 |
-
cumheights = torch.cumsum(heights, dim=-1)
|
136 |
-
cumheights = F.pad(cumheights, pad=(1, 0), mode="constant", value=0.0)
|
137 |
-
cumheights = (top - bottom) * cumheights + bottom
|
138 |
-
cumheights[..., 0] = bottom
|
139 |
-
cumheights[..., -1] = top
|
140 |
-
heights = cumheights[..., 1:] - cumheights[..., :-1]
|
141 |
-
|
142 |
-
if inverse:
|
143 |
-
bin_idx = searchsorted(cumheights, inputs)[..., None]
|
144 |
-
else:
|
145 |
-
bin_idx = searchsorted(cumwidths, inputs)[..., None]
|
146 |
-
|
147 |
-
input_cumwidths = cumwidths.gather(-1, bin_idx)[..., 0]
|
148 |
-
input_bin_widths = widths.gather(-1, bin_idx)[..., 0]
|
149 |
-
|
150 |
-
input_cumheights = cumheights.gather(-1, bin_idx)[..., 0]
|
151 |
-
delta = heights / widths
|
152 |
-
input_delta = delta.gather(-1, bin_idx)[..., 0]
|
153 |
-
|
154 |
-
input_derivatives = derivatives.gather(-1, bin_idx)[..., 0]
|
155 |
-
input_derivatives_plus_one = derivatives[..., 1:].gather(-1, bin_idx)[..., 0]
|
156 |
-
|
157 |
-
input_heights = heights.gather(-1, bin_idx)[..., 0]
|
158 |
-
|
159 |
-
if inverse:
|
160 |
-
a = (inputs - input_cumheights) * (
|
161 |
-
input_derivatives + input_derivatives_plus_one - 2 * input_delta
|
162 |
-
) + input_heights * (input_delta - input_derivatives)
|
163 |
-
b = input_heights * input_derivatives - (inputs - input_cumheights) * (
|
164 |
-
input_derivatives + input_derivatives_plus_one - 2 * input_delta
|
165 |
-
)
|
166 |
-
c = -input_delta * (inputs - input_cumheights)
|
167 |
-
|
168 |
-
discriminant = b.pow(2) - 4 * a * c
|
169 |
-
assert (discriminant >= 0).all()
|
170 |
-
|
171 |
-
root = (2 * c) / (-b - torch.sqrt(discriminant))
|
172 |
-
outputs = root * input_bin_widths + input_cumwidths
|
173 |
-
|
174 |
-
theta_one_minus_theta = root * (1 - root)
|
175 |
-
denominator = input_delta + (
|
176 |
-
(input_derivatives + input_derivatives_plus_one - 2 * input_delta)
|
177 |
-
* theta_one_minus_theta
|
178 |
-
)
|
179 |
-
derivative_numerator = input_delta.pow(2) * (
|
180 |
-
input_derivatives_plus_one * root.pow(2)
|
181 |
-
+ 2 * input_delta * theta_one_minus_theta
|
182 |
-
+ input_derivatives * (1 - root).pow(2)
|
183 |
-
)
|
184 |
-
logabsdet = torch.log(derivative_numerator) - 2 * torch.log(denominator)
|
185 |
-
|
186 |
-
return outputs, -logabsdet
|
187 |
-
else:
|
188 |
-
theta = (inputs - input_cumwidths) / input_bin_widths
|
189 |
-
theta_one_minus_theta = theta * (1 - theta)
|
190 |
-
|
191 |
-
numerator = input_heights * (
|
192 |
-
input_delta * theta.pow(2) + input_derivatives * theta_one_minus_theta
|
193 |
-
)
|
194 |
-
denominator = input_delta + (
|
195 |
-
(input_derivatives + input_derivatives_plus_one - 2 * input_delta)
|
196 |
-
* theta_one_minus_theta
|
197 |
-
)
|
198 |
-
outputs = input_cumheights + numerator / denominator
|
199 |
-
|
200 |
-
derivative_numerator = input_delta.pow(2) * (
|
201 |
-
input_derivatives_plus_one * theta.pow(2)
|
202 |
-
+ 2 * input_delta * theta_one_minus_theta
|
203 |
-
+ input_derivatives * (1 - theta).pow(2)
|
204 |
-
)
|
205 |
-
logabsdet = torch.log(derivative_numerator) - 2 * torch.log(denominator)
|
206 |
-
|
207 |
-
return outputs, logabsdet
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|