innnky commited on
Commit
471bf00
1 Parent(s): 04ab017
G_157000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87e9b24e8cd7987f493629494018da15d4a4cbb34b1b788f3291a63851b2aaa1
3
+ size 143295815
app.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import json
3
+ import os
4
+ import gradio as gr
5
+ import librosa
6
+ import numpy as np
7
+ import soundfile
8
+ import torch
9
+ import logging
10
+
11
+ from egs.visinger2.models import SynthesizerTrn
12
+ from infer import infer_ds
13
+ from utils import utils
14
+
15
+ logging.getLogger('numba').setLevel(logging.WARNING)
16
+ logging.getLogger('markdown_it').setLevel(logging.WARNING)
17
+ logging.getLogger('urllib3').setLevel(logging.WARNING)
18
+ logging.getLogger('matplotlib').setLevel(logging.WARNING)
19
+
20
+ config_json = "egs/visinger2/config.json"
21
+ model_path = "G_157000.pth"
22
+
23
+
24
+ hps = utils.get_hparams_from_file(config_json)
25
+ net_g = SynthesizerTrn(hps)
26
+ _ = net_g.eval()
27
+ _ = utils.load_checkpoint(model_path, net_g, None)
28
+
29
+ def vc_fn(speaker, ds, vc_transform):
30
+ try:
31
+ ds = json.loads(ds)
32
+ except:
33
+ return "工程文件json解析失败,请将ds文件的完整内容粘贴与此处", None
34
+
35
+ dur = 0
36
+ flag = False
37
+ try:
38
+ for inp in ds:
39
+ f0_seq = inp["f0_seq"]
40
+ ph_dur = inp["ph_dur"]
41
+ ph_dur= [float(i) for i in ph_dur.split(" ")]
42
+ f0_seq = [float(i) for i in f0_seq.split(" ")]
43
+ dur+=sum(ph_dur)
44
+ if sum(ph_dur) >30:
45
+ flag = True
46
+ except:
47
+ return "ds工程需要冻结f0和音素参数才能使用此模型合成", None
48
+ if flag:
49
+ return "单个切片时长必须小于30s,否则请使用本地推理", None
50
+ if dur>120:
51
+ return "总时长需要小于2分钟,否则请使用本地推理", None
52
+ out_audio = infer_ds(net_g, hps, ds, speaker, vc_transform)
53
+ # return "请上传小于45s的音频,需要转换长音频请本地进行转换", None
54
+ # out_audio, out_sr = inference_main.infer(sid, out_wav_path, model_map[model], vc_transform)
55
+ # _audio = out_audio.cpu().numpy()
56
+ return "Success", (44100, out_audio.astype(np.float32))
57
+
58
+
59
+ app = gr.Blocks()
60
+ with app:
61
+ with gr.Tabs():
62
+ with gr.TabItem("Basic"):
63
+ gr.Markdown(value="""
64
+ 这是visinger2 塔菲、电棍模型的在线demo, github 仓库地址是[visinger2-nomidi](https://github.com/innnky/VISinger2-nomidi)
65
+
66
+ 由于训练集为录播数据全自动化制作,因此质量比较差,此模型并非visinger2的音质上限,最高质量模型效果请参照[VISinger2官方demo](https://zhangyongmao.github.io/VISinger2/)
67
+
68
+ 其中ds工程文件为[DiffSinger](https://github.com/openvpi/DiffSinger)工程,需要通过[OpenSVIP](https://openvpi.github.io/) 转换器进行制作,原理是先通过别的歌声合成软件制作工程并转换为模型能够接受的输入格式。
69
+
70
+ 由于此模型是nomidi模型,因此导出ds工程时需要冻结音素和音高参数, 否则会报错,具体DiffSinger工程制作详细问题可以加入DiffSinger QQ交流群 907879266
71
+
72
+ 在线推理限制为总时长小于2分钟,且单个切片时长小于30s,有更大需求请下载本仓库或github仓库代码运行ds_inference.py进行本地推理
73
+ """)
74
+ sid = gr.Dropdown(label="音色", choices=["taffy", "otto"], value="taffy")
75
+ vc_input3 = gr.TextArea(label="ds工程(json格式)",value='''[
76
+ {
77
+ "text": "SP 清 晨 SP",
78
+ "ph_seq": "SP q ing ch en SP",
79
+ "note_seq": "rest D4 D4 G4 G4 rest",
80
+ "note_dur_seq": "0.6 0.273 0.273 0.4089999 0.4089999 0.4",
81
+ "is_slur_seq": "0 0 0 0 0 0",
82
+ "ph_dur": "0.469318 0.130682 0.120727 0.152273 0.409 0.4",
83
+ "f0_timestep": "0.005",
84
+ "f0_seq": "301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 302.0 302.4 301.9 301.4 300.5 299.4 299.0 298.3 297.9 297.6 297.2 297.2 297.0 296.8 296.9 296.7 296.6 296.8 296.9 296.9 297.4 297.6 297.7 298.2 298.5 298.3 298.6 298.7 298.5 298.6 298.3 297.8 296.4 293.9 291.5 286.7 283.2 279.6 278.5 283.4 288.4 293.5 298.6 303.9 309.3 314.7 320.3 325.9 331.7 337.5 343.5 349.5 355.7 362.0 368.3 374.8 381.5 387.1 388.7 391.3 393.6 396.1 397.7 398.7 399.3 399.6 399.8 399.4 399.0 398.6 397.9 397.7 397.1 396.7 396.1 396.0 395.4 395.6 395.7 395.9 395.9 396.1 396.4 396.8 397.0 397.3 397.5 397.5 397.5 397.7 397.7 397.7 397.7 397.9 397.7 397.7 397.7 397.7 397.7 397.7 397.5 397.5 397.2 397.0 397.0 396.7 396.6 396.6 396.5 396.3 396.3 396.1 396.1 396.3 396.3 396.1 396.3 396.3 396.4 396.6 396.7 396.6 396.9 397.2 396.8 397.4 397.9 398.0 398.5 399.1 399.1 399.1 399.0 398.7 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2",
85
+ "input_type": "phoneme",
86
+ "offset": 0.0
87
+ }
88
+ ]''')
89
+ vc_transform = gr.Number(label="变调(整数,可以正负,半音数量,升高八度就是12)", value=0)
90
+ # model = gr.Dropdown(label="模型", choices=list(model_map.keys()), value="G_34000.pth")
91
+ vc_submit = gr.Button("合成", variant="primary")
92
+ vc_output1 = gr.Textbox(label="Output Message")
93
+ vc_output2 = gr.Audio(label="Output Audio")
94
+ vc_submit.click(vc_fn, [sid, vc_input3, vc_transform], [vc_output1, vc_output2])
95
+
96
+ app.launch()
97
+
98
+
99
+
ds_inference.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import time
4
+ import re
5
+
6
+ import numpy as np
7
+ import soundfile
8
+ import torch
9
+ import tqdm
10
+ from scipy.interpolate import interp1d
11
+
12
+ from utils import utils
13
+ from egs.visinger2.models import SynthesizerTrn
14
+ from infer import preprocess, cross_fade, infer_ds
15
+
16
+ trans = -12
17
+ speaker = "otto"
18
+ ds_path = "infer/share.ds"
19
+ config_json = "egs/visinger2/config.json"
20
+ checkpoint_path = f"/Volumes/Extend/下载/G_110000.pth"
21
+ file_name = os.path.splitext(os.path.basename(ds_path))[0]
22
+ step = re.findall(r'G_(\d+)\.pth', checkpoint_path)[0]
23
+
24
+
25
+ if __name__ == '__main__':
26
+ ds = json.load(open(ds_path))
27
+ hps = utils.get_hparams_from_file(config_json)
28
+ net_g = SynthesizerTrn(hps)
29
+ _ = net_g.eval()
30
+ _ = utils.load_checkpoint(checkpoint_path, net_g, None)
31
+
32
+ audio = infer_ds(net_g, hps, ds, speaker, trans)
33
+ soundfile.write(f"{speaker}_{file_name}_{step}step.wav", audio, 44100)
egs/visinger2/__init__.py ADDED
File without changes
egs/visinger2/bash/train.sh ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ num_gpu=$1
3
+
4
+ cd $(dirname $(dirname $0))
5
+ exp_dir=$(pwd)
6
+ base_dir=$(dirname $(dirname $exp_dir))
7
+ config=${exp_dir}/config.json
8
+
9
+ export PYTHONPATH=$base_dir
10
+ export PYTHONIOENCODING=UTF-8
11
+
12
+ CUDA_VISIBLE_DEVICES=${num_gpu} python train.py -c config.json
13
+
egs/visinger2/config.json ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train": {
3
+ "log_interval": 200,
4
+ "eval_interval": 1000,
5
+ "seed": 1234,
6
+ "port": 8001,
7
+ "epochs": 10000,
8
+ "learning_rate": 2e-4,
9
+ "betas": [0.8, 0.99],
10
+ "eps": 1e-9,
11
+ "batch_size": 8,
12
+ "accumulation_steps": 1,
13
+ "fp16_run": false,
14
+ "lr_decay": 0.998,
15
+ "segment_size": 10240,
16
+ "init_lr_ratio": 1,
17
+ "warmup_epochs": 0,
18
+ "c_mel": 45,
19
+ "save_dir": "logdir/visinger2"
20
+ },
21
+ "data": {
22
+ "data_dir":"../../data",
23
+ "dataset_type": "SingDataset",
24
+ "collate_type": "SingCollate",
25
+ "training_filelist":"train.list",
26
+ "training_labellist":"transcriptions.txt",
27
+ "validation_filelist":"test.list",
28
+ "validation_labellist":"transcriptions.txt",
29
+ "max_wav_value": 32768.0,
30
+ "sample_rate": 44100,
31
+ "n_fft": 2048,
32
+ "fmin": 0,
33
+ "fmax": 22050,
34
+ "hop_size": 512,
35
+ "win_size": 2048,
36
+ "acoustic_dim": 80,
37
+ "min_level_db": -115,
38
+ "ref_level_db": 20,
39
+ "min_db": -115,
40
+ "max_abs_value": 4.0,
41
+ "n_speakers": 200,
42
+ "spk2id": {"opencpop": 0, "taffy": 1, "otto": 2, "nanami": 3}
43
+ },
44
+ "model": {
45
+ "hidden_channels": 192,
46
+ "spk_channels": 192,
47
+ "filter_channels": 768,
48
+ "n_heads": 2,
49
+ "n_layers": 4,
50
+ "kernel_size": 3,
51
+ "p_dropout": 0.1,
52
+ "prior_hidden_channels": 192,
53
+ "prior_filter_channels": 768,
54
+ "prior_n_heads": 2,
55
+ "prior_n_layers": 4,
56
+ "prior_kernel_size": 3,
57
+ "prior_p_dropout": 0.1,
58
+ "resblock": "1",
59
+ "use_spectral_norm": false,
60
+ "resblock_kernel_sizes": [3,7,11],
61
+ "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
62
+ "upsample_rates": [8,8,4,2],
63
+ "upsample_initial_channel": 256,
64
+ "upsample_kernel_sizes": [16,16,8,4],
65
+ "n_harmonic": 64,
66
+ "n_bands": 65
67
+ }
68
+ }
egs/visinger2/dataset.py ADDED
@@ -0,0 +1,334 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+ import sys
4
+ import string
5
+ import random
6
+ import numpy as np
7
+ import math
8
+ import json
9
+ from torch.utils.data import DataLoader
10
+ import torch
11
+
12
+ sys.path.append('../..')
13
+ from utils.audio import load_wav
14
+ from text import npu
15
+
16
+ class BaseDataset(torch.utils.data.Dataset):
17
+
18
+ def __init__(self, hparams, fileid_list_path):
19
+ self.hparams = hparams
20
+ self.fileid_list = self.get_fileid_list(fileid_list_path)
21
+ random.seed(hparams.train.seed)
22
+ random.shuffle(self.fileid_list)
23
+ if(hparams.data.n_speakers > 0):
24
+ self.spk2id = hparams.data.spk2id
25
+
26
+ def get_fileid_list(self, fileid_list_path):
27
+ fileid_list = []
28
+ with open(fileid_list_path, 'r') as f:
29
+ for line in f.readlines():
30
+ fileid_list.append(line.strip())
31
+
32
+ return fileid_list
33
+
34
+ def __len__(self):
35
+ return len(self.fileid_list)
36
+
37
+ class SingDataset(BaseDataset):
38
+ def __init__(self, hparams, data_dir, fileid_list_path, label_list_path):
39
+ BaseDataset.__init__(self, hparams, os.path.join(data_dir, fileid_list_path))
40
+ self.hps = hparams
41
+
42
+ with open(os.path.join(data_dir, label_list_path), "r") as in_file:
43
+ self.id2label = {}
44
+ for line in in_file.readlines():
45
+ fileid, txt, phones, pitchid, dur, gtdur, slur = line.split('|')
46
+ self.id2label[fileid] = [phones, pitchid, dur, slur, gtdur]
47
+
48
+ self.data_dir = data_dir
49
+ # self.__filter__()
50
+
51
+ def __filter__(self):
52
+ new_fileid_list = []
53
+ print("before filter: ", len(self.fileid_list))
54
+ for file_id in self.fileid_list:
55
+ _is_qualified = True
56
+ if(not os.path.exists(os.path.join(self.label_dir, self.fileid_list[index] + '.lab')) or
57
+ not os.path.exists(os.path.join(self.dur_dir, self.fileid_list[index] + '.lab')) or
58
+ not os.path.exists(os.path.join(self.mel_dir, self.fileid_list[index] + '.npy')) or
59
+ not os.path.exists(os.path.join(self.pitch_dir, self.fileid_list[index] + '.npy'))):
60
+ _is_qualified = False
61
+ if(_is_qualified):
62
+ new_fileid_list.append(file_id)
63
+ self.fileid_list = new_fileid_list
64
+ print("after filter: ", len(self.fileid_list))
65
+
66
+ def interpolate_f0(self, data):
67
+ '''
68
+ 对F0进行插值处理
69
+ '''
70
+ data = np.reshape(data, (data.size, 1))
71
+
72
+ vuv_vector = np.zeros((data.size, 1),dtype=np.float32)
73
+ vuv_vector[data > 0.0] = 1.0
74
+ vuv_vector[data <= 0.0] = 0.0
75
+
76
+ ip_data = data
77
+
78
+ frame_number = data.size
79
+ last_value = 0.0
80
+ for i in range(frame_number):
81
+ if data[i] <= 0.0:
82
+ j = i + 1
83
+ for j in range(i + 1, frame_number):
84
+ if data[j] > 0.0:
85
+ break
86
+ if j < frame_number - 1:
87
+ if last_value > 0.0:
88
+ step = (data[j] - data[i - 1]) / float(j - i)
89
+ for k in range(i, j):
90
+ ip_data[k] = data[i - 1] + step * (k - i + 1)
91
+ else:
92
+ for k in range(i, j):
93
+ ip_data[k] = data[j]
94
+ else:
95
+ for k in range(i, frame_number):
96
+ ip_data[k] = last_value
97
+ else:
98
+ ip_data[i] = data[i]
99
+ last_value = data[i]
100
+
101
+ return ip_data, vuv_vector
102
+
103
+ def parse_label(self, pho, pitchid, dur, slur, gtdur):
104
+ phos = []
105
+ pitchs = []
106
+ durs = []
107
+ slurs = []
108
+ gtdurs = []
109
+
110
+ for index in range(len(pho.split())):
111
+ phos.append(npu.symbol_converter.ttsing_phone_to_int[pho.strip().split()[index]])
112
+ pitchs.append(0)
113
+ durs.append(0)
114
+ slurs.append(0)
115
+ gtdurs.append(float(gtdur.strip().split()[index]))
116
+
117
+ phos = np.asarray(phos, dtype=np.int32)
118
+ pitchs = np.asarray(pitchs, dtype=np.int32)
119
+ durs = np.asarray(durs, dtype=np.float32)
120
+ slurs = np.asarray(slurs, dtype=np.int32)
121
+ gtdurs = np.asarray(gtdurs, dtype=np.float32)
122
+
123
+ acc_duration = np.cumsum(gtdurs)
124
+ acc_duration = np.pad(acc_duration, (1, 0), 'constant', constant_values=(0,))
125
+ acc_duration_frames = np.ceil(acc_duration / (self.hps.data.hop_size / self.hps.data.sample_rate))
126
+ gtdurs = acc_duration_frames[1:] - acc_duration_frames[:-1]
127
+
128
+ phos = torch.LongTensor(phos)
129
+ pitchs = torch.LongTensor(pitchs)
130
+ durs = torch.FloatTensor(durs)
131
+ slurs = torch.LongTensor(slurs)
132
+ gtdurs = torch.LongTensor(gtdurs)
133
+ return phos, pitchs, durs, slurs, gtdurs
134
+
135
+ def __getitem__(self, index):
136
+
137
+ pho, pitchid, dur, slur, gtdur = self.id2label[self.fileid_list[index]]
138
+ pho, pitchid, dur, slur, gtdur = self.parse_label(pho, pitchid, dur, slur, gtdur)
139
+ sum_dur = gtdur.sum()
140
+ spk, fileid = self.fileid_list[index].split("/")
141
+ spkid = self.spk2id[spk]
142
+ mel = np.load(os.path.join(self.data_dir, spk, "mels", fileid + '.npy'))
143
+ if mel.shape[0] <150:
144
+ print("drop short audio:", self.fileid_list[index])
145
+ return None
146
+ assert mel.shape[1] == 80
147
+ if(mel.shape[0] != sum_dur):
148
+ if(abs(mel.shape[0] - sum_dur) > 3):
149
+ print("dataset error mel: ",mel.shape, sum_dur)
150
+ return None
151
+ if(mel.shape[0] > sum_dur):
152
+ mel = mel[:sum_dur]
153
+ else:
154
+ mel = np.concatenate([mel, mel.min() * np.ones([sum_dur - mel.shape[0], self.hps.data.acoustic_dim])], axis=0)
155
+ mel = torch.FloatTensor(mel).transpose(0, 1)
156
+
157
+ f0 = np.load(os.path.join(self.data_dir, spk, "pitch", fileid + '.npy')).reshape([-1])
158
+ f0, _ = self.interpolate_f0(f0)
159
+ f0 = f0.reshape([-1])
160
+ if(f0.shape[0] != sum_dur):
161
+ if(abs(f0.shape[0] - sum_dur) > 3):
162
+ print("dataset error f0 : ",f0.shape, sum_dur)
163
+ return None
164
+ if(f0.shape[0] > sum_dur):
165
+ f0 = f0[:sum_dur]
166
+ else:
167
+ f0 = np.concatenate([f0, np.zeros([sum_dur - f0.shape[0]])], axis=0)
168
+ f0 = torch.FloatTensor(f0).reshape([1, -1])
169
+
170
+ wav = load_wav(os.path.join(self.data_dir, spk, "wavs", fileid + '.wav'),
171
+ raw_sr=self.hparams.data.sample_rate,
172
+ target_sr=self.hparams.data.sample_rate,
173
+ win_size=self.hparams.data.win_size,
174
+ hop_size=self.hparams.data.hop_size)
175
+ wav = wav.reshape(-1)
176
+ if(wav.shape[0] != sum_dur * self.hparams.data.hop_size):
177
+ if(abs(wav.shape[0] - sum_dur * self.hparams.data.hop_size) > 3 * self.hparams.data.hop_size):
178
+ print("dataset error wav : ", wav.shape, sum_dur)
179
+ return None
180
+ if(wav.shape[0] > sum_dur * self.hparams.data.hop_size):
181
+ wav = wav[:sum_dur * self.hparams.data.hop_size]
182
+ else:
183
+ wav = np.concatenate([wav, np.zeros([sum_dur * self.hparams.data.hop_size - wav.shape[0]])], axis=0)
184
+ wav = torch.FloatTensor(wav).reshape([1, -1])
185
+
186
+ return pho, pitchid, dur, slur, gtdur, mel, f0, wav, spkid
187
+
188
+
189
+ class SingCollate():
190
+
191
+ def __init__(self, hparams):
192
+ self.hparams = hparams
193
+ self.mel_dim = self.hparams.data.acoustic_dim
194
+
195
+ def __call__(self, batch):
196
+
197
+ batch = [b for b in batch if b is not None]
198
+
199
+ input_lengths, ids_sorted_decreasing = torch.sort(
200
+ torch.LongTensor([len(x[0]) for x in batch]),
201
+ dim=0, descending=True)
202
+
203
+ max_phone_len = max([len(x[0]) for x in batch])
204
+ max_pitchid_len = max([len(x[1]) for x in batch])
205
+ max_dur_len = max([len(x[2]) for x in batch])
206
+ max_slur_len = max([len(x[3]) for x in batch])
207
+ max_gtdur_len = max([len(x[4]) for x in batch])
208
+ max_mel_len = max([x[5].size(1) for x in batch])
209
+ max_f0_len = max([x[6].size(1) for x in batch])
210
+ max_wav_len = max([x[7].size(1) for x in batch])
211
+
212
+ phone_lengths = torch.LongTensor(len(batch))
213
+ pitchid_lengths = torch.LongTensor(len(batch))
214
+ dur_lengths = torch.LongTensor(len(batch))
215
+ slur_lengths = torch.LongTensor(len(batch))
216
+ gtdur_lengths = torch.LongTensor(len(batch))
217
+ mel_lengths = torch.LongTensor(len(batch))
218
+ f0_lengths = torch.LongTensor(len(batch))
219
+ wav_lengths = torch.LongTensor(len(batch))
220
+
221
+ phone_padded = torch.LongTensor(len(batch), max_phone_len)
222
+ pitchid_padded = torch.LongTensor(len(batch), max_pitchid_len)
223
+ dur_padded = torch.FloatTensor(len(batch), max_dur_len)
224
+ slur_padded = torch.LongTensor(len(batch), max_slur_len)
225
+ gtdur_padded = torch.LongTensor(len(batch), 1, max_gtdur_len)
226
+ mel_padded = torch.FloatTensor(len(batch), self.hparams.data.acoustic_dim, max_mel_len)
227
+ f0_padded = torch.FloatTensor(len(batch), 1, max_f0_len)
228
+ wav_padded = torch.FloatTensor(len(batch), 1, max_wav_len)
229
+ spkids = torch.LongTensor(len(batch))
230
+
231
+ phone_padded.zero_()
232
+ pitchid_padded.zero_()
233
+ dur_padded.zero_()
234
+ slur_padded.zero_()
235
+ gtdur_padded.zero_()
236
+ mel_padded.zero_()
237
+ f0_padded.zero_()
238
+ wav_padded.zero_()
239
+
240
+ for i in range(len(ids_sorted_decreasing)):
241
+ row = batch[ids_sorted_decreasing[i]]
242
+
243
+ phone = row[0]
244
+ phone_padded[i, :phone.size(0)] = phone
245
+ phone_lengths[i] = phone.size(0)
246
+
247
+ pitchid = row[1]
248
+ pitchid_padded[i, :pitchid.size(0)] = pitchid
249
+ pitchid_lengths[i] = pitchid.size(0)
250
+
251
+ dur = row[2]
252
+ dur_padded[i, :dur.size(0)] = dur
253
+ dur_lengths[i] = dur.size(0)
254
+
255
+ slur = row[3]
256
+ slur_padded[i, :slur.size(0)] = slur
257
+ slur_lengths[i] = slur.size(0)
258
+
259
+ gtdur = row[4]
260
+ gtdur_padded[i, :, :gtdur.size(0)] = gtdur
261
+ gtdur_lengths[i] = gtdur.size(0)
262
+
263
+ mel = row[5]
264
+ mel_padded[i, :, :mel.size(1)] = mel
265
+ mel_lengths[i] = mel.size(1)
266
+
267
+ f0 = row[6]
268
+ f0_padded[i, :, :f0.size(1)] = f0
269
+ f0_lengths[i] = f0.size(1)
270
+
271
+ wav = row[7]
272
+ wav_padded[i, :, :wav.size(1)] = wav
273
+ wav_lengths[i] = wav.size(1)
274
+
275
+ spkids[i] = row[8]
276
+
277
+ data_dict = {}
278
+ data_dict["phone"] = phone_padded
279
+ data_dict["phone_lengths"] = phone_lengths
280
+ data_dict["pitchid"] = pitchid_padded
281
+ data_dict["dur"] = dur_padded
282
+ data_dict["slur"] = slur_padded
283
+ data_dict["gtdur"] = gtdur_padded
284
+ data_dict["mel"] = mel_padded
285
+ data_dict["f0"] = f0_padded
286
+ data_dict["wav"] = wav_padded
287
+
288
+ data_dict["mel_lengths"] = mel_lengths
289
+ data_dict["f0_lengths"] = f0_lengths
290
+ data_dict["wav_lengths"] = wav_lengths
291
+ data_dict["spkid"] = spkids
292
+
293
+ return data_dict
294
+
295
+
296
+ class DatasetConstructor():
297
+
298
+ def __init__(self, hparams, num_replicas=1, rank=1):
299
+ self.hparams = hparams
300
+ self.num_replicas = num_replicas
301
+ self.rank = rank
302
+ self.dataset_function = {"SingDataset": SingDataset}
303
+ self.collate_function = {"SingCollate": SingCollate}
304
+ self._get_components()
305
+
306
+ def _get_components(self):
307
+ self._init_datasets()
308
+ self._init_collate()
309
+ self._init_data_loaders()
310
+
311
+ def _init_datasets(self):
312
+ self._train_dataset = self.dataset_function[self.hparams.data.dataset_type](self.hparams, self.hparams.data.data_dir, self.hparams.data.training_filelist, self.hparams.data.training_labellist)
313
+ self._valid_dataset = self.dataset_function[self.hparams.data.dataset_type](self.hparams, self.hparams.data.data_dir, self.hparams.data.validation_filelist, self.hparams.data.validation_labellist)
314
+
315
+ def _init_collate(self):
316
+ self._collate_fn = self.collate_function[self.hparams.data.collate_type](self.hparams)
317
+
318
+ def _init_data_loaders(self):
319
+ train_sampler = torch.utils.data.distributed.DistributedSampler(self._train_dataset, num_replicas=self.num_replicas, rank=self.rank, shuffle=True)
320
+
321
+ self.train_loader = DataLoader(self._train_dataset, num_workers=4, shuffle=False,
322
+ batch_size=self.hparams.train.batch_size, pin_memory=True,
323
+ drop_last=True, collate_fn=self._collate_fn, sampler=train_sampler)
324
+
325
+ self.valid_loader = DataLoader(self._valid_dataset, num_workers=1, shuffle=False,
326
+ batch_size=1, pin_memory=True,
327
+ drop_last=True, collate_fn=self._collate_fn)
328
+
329
+ def get_train_loader(self):
330
+ return self.train_loader
331
+
332
+ def get_valid_loader(self):
333
+ return self.valid_loader
334
+
egs/visinger2/inference.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import matplotlib.pyplot as plt
2
+ import IPython.display as ipd
3
+
4
+ import sys
5
+ import os
6
+ import json
7
+ import math
8
+ import torch
9
+ from torch import nn
10
+ from torch.nn import functional as F
11
+ from torch.utils.data import DataLoader
12
+
13
+ import modules.commons as commons
14
+ import utils.utils as utils
15
+ from models import SynthesizerTrn
16
+ from text import npu
17
+ from scipy.io.wavfile import write
18
+ from tqdm import tqdm
19
+ import numpy as np
20
+ import time
21
+ import argparse
22
+
23
+ def parse_label(hps, pho, pitchid, dur, slur, gtdur):
24
+ phos = []
25
+ pitchs = []
26
+ durs = []
27
+ slurs = []
28
+ gtdurs = []
29
+
30
+ for index in range(len(pho.split())):
31
+ phos.append(npu.symbol_converter.ttsing_phone_to_int[pho.strip().split()[index]])
32
+ pitchs.append(npu.symbol_converter.ttsing_opencpop_pitch_to_int[pitchid.strip().split()[index]])
33
+ durs.append(float(dur.strip().split()[index]))
34
+ slurs.append(int(slur.strip().split()[index]))
35
+ gtdurs.append(float(gtdur.strip().split()[index]))
36
+
37
+ phos = np.asarray(phos, dtype=np.int32)
38
+ pitchs = np.asarray(pitchs, dtype=np.int32)
39
+ durs = np.asarray(durs, dtype=np.float32)
40
+ slurs = np.asarray(slurs, dtype=np.int32)
41
+ gtdurs = np.asarray(gtdurs, dtype=np.float32)
42
+ gtdurs = np.ceil(gtdurs / (hps.data.hop_size / hps.data.sample_rate))
43
+
44
+ phos = torch.LongTensor(phos)
45
+ pitchs = torch.LongTensor(pitchs)
46
+ durs = torch.FloatTensor(durs)
47
+ slurs = torch.LongTensor(slurs)
48
+ gtdurs = torch.LongTensor(gtdurs)
49
+ return phos, pitchs, durs, slurs, gtdurs
50
+
51
+ def load_model(model_dir):
52
+
53
+ # load config and model
54
+ model_path = utils.latest_checkpoint_path(model_dir)
55
+ config_path = os.path.join(model_dir, "config.json")
56
+
57
+ hps = utils.get_hparams_from_file(config_path)
58
+
59
+ print("Load model from : ", model_path)
60
+ print("config: ", config_path)
61
+
62
+ net_g = SynthesizerTrn(hps)
63
+ _ = net_g.eval()
64
+ _ = utils.load_checkpoint(model_path, net_g, None)
65
+ return net_g, hps
66
+
67
+ def inference_label2wav(net_g, label_list_path, output_dir, hps, cuda_id=None):
68
+
69
+ id2label = {}
70
+ with open(label_list_path, "r") as in_file:
71
+ for line in in_file.readlines():
72
+ fileid, txt, phones, pitchid, dur, gtdur, slur = line.split('|')
73
+ id2label[fileid] = [phones, pitchid, dur, slur, gtdur]
74
+
75
+ for file_name in tqdm(id2label.keys()):
76
+ pho, pitchid, dur, slur, gtdur = id2label[file_name]
77
+ pho, pitchid, dur, slur, gtdur = parse_label(hps, pho, pitchid, dur, slur, gtdur)
78
+
79
+ with torch.no_grad():
80
+
81
+ # data
82
+ pho_lengths = torch.LongTensor([pho.size(0)])
83
+ pho = pho.unsqueeze(0)
84
+ pitchid = pitchid.unsqueeze(0)
85
+ dur = dur.unsqueeze(0)
86
+ slur = slur.unsqueeze(0)
87
+
88
+ if(cuda_id != None):
89
+ net_g = net_g.cuda(0)
90
+ pho = pho.cuda(0)
91
+ pho_lengths = pho_lengths.cuda(0)
92
+ pitchid = pitchid.cuda(0)
93
+ dur = dur.cuda(0)
94
+ slur = slur.cuda(0)
95
+
96
+ # infer
97
+ o, _, _ = net_g.infer(pho, pho_lengths, pitchid, dur, slur)
98
+ audio = o[0,0].data.cpu().float().numpy()
99
+ audio = audio * 32768 #hps.data.max_wav_value
100
+ audio = audio.astype(np.int16)
101
+
102
+ # save
103
+ write(os.path.join(output_dir, file_name.split('.')[0] + '.wav' ), hps.data.sample_rate, audio)
104
+
105
+ if __name__ == "__main__":
106
+
107
+ parser = argparse.ArgumentParser()
108
+ parser.add_argument('-model_dir', '--model_dir', type=str, required=True)
109
+ parser.add_argument('-input_dir', '--input_dir', type=str, required=True)
110
+ parser.add_argument('-output_dir', '--output_dir', type=str, required=True)
111
+ args = parser.parse_args()
112
+
113
+ model_dir = args.model_dir
114
+ input_dir = args.input_dir
115
+ output_dir = args.output_dir
116
+
117
+ model, hps = load_model(model_dir)
118
+ if(not os.path.exists(output_dir)):
119
+ os.makedirs(output_dir)
120
+ print("load model end!")
121
+
122
+ inference_label2wav(model, input_dir, output_dir, hps, cuda_id=0)
123
+
egs/visinger2/models.py ADDED
@@ -0,0 +1,1023 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import copy
3
+ import math
4
+ import torch
5
+ from torch import nn
6
+ from torch.nn import functional as F
7
+ from torch.nn import Conv1d, ConvTranspose1d, AvgPool1d, Conv2d
8
+ from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm
9
+
10
+ sys.path.append('../..')
11
+ import modules.commons as commons
12
+ import modules.modules as modules
13
+ import modules.attentions as attentions
14
+
15
+ from modules.commons import init_weights, get_padding
16
+ from text.npu.symbols import ttsing_phone_set, ttsing_opencpop_pitch_set, ttsing_slur_set
17
+
18
+ from modules.ddsp import mlp, gru, scale_function, remove_above_nyquist, upsample
19
+ from modules.ddsp import harmonic_synth, amp_to_impulse_response, fft_convolve
20
+ from modules.ddsp import resample
21
+
22
+ from modules.stft import TorchSTFT
23
+
24
+ import torch.distributions as D
25
+
26
+ from modules.losses import (
27
+ generator_loss,
28
+ discriminator_loss,
29
+ feature_loss,
30
+ kl_loss
31
+ )
32
+
33
+ LRELU_SLOPE = 0.1
34
+
35
+
36
+ class DurationPredictor(nn.Module):
37
+ def __init__(self, in_channels, filter_channels, kernel_size, p_dropout, n_speakers=0, spk_channels=0):
38
+ super().__init__()
39
+
40
+ self.in_channels = in_channels
41
+ self.filter_channels = filter_channels
42
+ self.kernel_size = kernel_size
43
+ self.p_dropout = p_dropout
44
+ self.spk_channels = spk_channels
45
+
46
+ self.drop = nn.Dropout(p_dropout)
47
+ self.conv_1 = nn.Conv1d(in_channels, filter_channels, kernel_size, padding=kernel_size // 2)
48
+ self.norm_1 = modules.LayerNorm(filter_channels)
49
+ self.conv_2 = nn.Conv1d(filter_channels, filter_channels, kernel_size, padding=kernel_size // 2)
50
+ self.norm_2 = modules.LayerNorm(filter_channels)
51
+ self.conv_3 = nn.Conv1d(filter_channels, filter_channels, kernel_size, padding=kernel_size // 2)
52
+ self.norm_3 = modules.LayerNorm(filter_channels)
53
+ self.proj = nn.Conv1d(filter_channels, 2, 1)
54
+
55
+ if n_speakers != 0:
56
+ self.cond = nn.Conv1d(spk_channels, in_channels, 1)
57
+
58
+ def forward(self, x, x_mask, spk_emb=None):
59
+ # x = torch.detach(x)
60
+ if spk_emb is not None:
61
+ spk_emb = torch.detach(spk_emb)
62
+ x = x + self.cond(spk_emb)
63
+
64
+ x = self.conv_1(x * x_mask)
65
+ x = torch.relu(x)
66
+ x = self.norm_1(x)
67
+ x = self.drop(x)
68
+
69
+ x = self.conv_2(x * x_mask)
70
+ x = torch.relu(x)
71
+ x = self.norm_2(x)
72
+ x = self.drop(x)
73
+
74
+ x = self.conv_3(x * x_mask)
75
+ x = torch.relu(x)
76
+ x = self.norm_3(x)
77
+ x = self.drop(x)
78
+
79
+ x = self.proj(x * x_mask)
80
+ return x * x_mask
81
+
82
+
83
+ class TextEncoder(nn.Module):
84
+ def __init__(self,
85
+ n_vocab,
86
+ out_channels,
87
+ hidden_channels,
88
+ filter_channels,
89
+ n_heads,
90
+ n_layers,
91
+ kernel_size,
92
+ p_dropout):
93
+ super().__init__()
94
+ self.n_vocab = n_vocab
95
+ self.out_channels = out_channels
96
+ self.hidden_channels = hidden_channels
97
+ self.filter_channels = filter_channels
98
+ self.n_heads = n_heads
99
+ self.n_layers = n_layers
100
+ self.kernel_size = kernel_size
101
+ self.p_dropout = p_dropout
102
+
103
+ self.emb_phone = nn.Embedding(len(ttsing_phone_set), 256)
104
+ nn.init.normal_(self.emb_phone.weight, 0.0, 256 ** -0.5)
105
+
106
+ self.pre_net = torch.nn.Linear(256, hidden_channels)
107
+
108
+ self.encoder = attentions.Encoder(
109
+ hidden_channels,
110
+ filter_channels,
111
+ n_heads,
112
+ n_layers,
113
+ kernel_size,
114
+ p_dropout)
115
+ self.proj = nn.Conv1d(hidden_channels, out_channels, 1)
116
+
117
+ def forward(self, phone, phone_lengths, pitchid, dur, slur):
118
+ phone_end = self.emb_phone(phone) * math.sqrt(256)
119
+ x = phone_end
120
+
121
+ x = self.pre_net(x)
122
+ x = torch.transpose(x, 1, -1) # [b, h, t]
123
+
124
+ x_mask = torch.unsqueeze(commons.sequence_mask(phone_lengths, x.size(2)), 1).to(x.dtype)
125
+
126
+ x = self.encoder(x * x_mask, x_mask)
127
+ x = self.proj(x) * x_mask
128
+
129
+ return x, x_mask
130
+
131
+
132
+ def pad_v2(input_ele, mel_max_length=None):
133
+ if mel_max_length:
134
+ max_len = mel_max_length
135
+ else:
136
+ max_len = max([input_ele[i].size(0) for i in range(len(input_ele))])
137
+
138
+ out_list = list()
139
+ for i, batch in enumerate(input_ele):
140
+ if len(batch.shape) == 1:
141
+ one_batch_padded = F.pad(
142
+ batch, (0, max_len - batch.size(0)), "constant", 0.0
143
+ )
144
+ elif len(batch.shape) == 2:
145
+ one_batch_padded = F.pad(
146
+ batch, (0, 0, 0, max_len - batch.size(0)), "constant", 0.0
147
+ )
148
+ out_list.append(one_batch_padded)
149
+ out_padded = torch.stack(out_list)
150
+ return out_padded
151
+
152
+
153
+ class LengthRegulator(nn.Module):
154
+ """ Length Regulator """
155
+
156
+ def __init__(self):
157
+ super(LengthRegulator, self).__init__()
158
+
159
+ def LR(self, x, duration, max_len):
160
+ x = torch.transpose(x, 1, 2)
161
+ output = list()
162
+ mel_len = list()
163
+ for batch, expand_target in zip(x, duration):
164
+ expanded = self.expand(batch, expand_target)
165
+ output.append(expanded)
166
+ mel_len.append(expanded.shape[0])
167
+
168
+ if max_len is not None:
169
+ output = pad_v2(output, max_len)
170
+ else:
171
+ output = pad_v2(output)
172
+ output = torch.transpose(output, 1, 2)
173
+ return output, torch.LongTensor(mel_len)
174
+
175
+ def expand(self, batch, predicted):
176
+ predicted = torch.squeeze(predicted)
177
+ out = list()
178
+
179
+ for i, vec in enumerate(batch):
180
+ expand_size = predicted[i].item()
181
+ state_info_index = torch.unsqueeze(torch.arange(0, expand_size), 1).float()
182
+ state_info_length = torch.unsqueeze(torch.Tensor([expand_size] * expand_size), 1).float()
183
+ state_info = torch.cat([state_info_index, state_info_length], 1).to(vec.device)
184
+ new_vec = vec.expand(max(int(expand_size), 0), -1)
185
+ new_vec = torch.cat([new_vec, state_info], 1)
186
+ out.append(new_vec)
187
+ out = torch.cat(out, 0)
188
+ return out
189
+
190
+ def forward(self, x, duration, max_len):
191
+ output, mel_len = self.LR(x, duration, max_len)
192
+ return output, mel_len
193
+
194
+
195
+ class PriorDecoder(nn.Module):
196
+ def __init__(self,
197
+ out_bn_channels,
198
+ hidden_channels,
199
+ filter_channels,
200
+ n_heads,
201
+ n_layers,
202
+ kernel_size,
203
+ p_dropout,
204
+ n_speakers=0,
205
+ spk_channels=0):
206
+ super().__init__()
207
+ self.out_bn_channels = out_bn_channels
208
+ self.hidden_channels = hidden_channels
209
+ self.filter_channels = filter_channels
210
+ self.n_heads = n_heads
211
+ self.n_layers = n_layers
212
+ self.kernel_size = kernel_size
213
+ self.p_dropout = p_dropout
214
+ self.spk_channels = spk_channels
215
+
216
+ self.prenet = nn.Conv1d(hidden_channels + 2, hidden_channels, 3, padding=1)
217
+ self.decoder = attentions.FFT(
218
+ hidden_channels,
219
+ filter_channels,
220
+ n_heads,
221
+ n_layers,
222
+ kernel_size,
223
+ p_dropout)
224
+ self.proj = nn.Conv1d(hidden_channels, out_bn_channels, 1)
225
+
226
+ if n_speakers != 0:
227
+ self.cond = nn.Conv1d(spk_channels, hidden_channels, 1)
228
+
229
+ def forward(self, x, x_lengths, spk_emb=None):
230
+ x_mask = torch.unsqueeze(commons.sequence_mask(x_lengths, x.size(2)), 1).to(x.dtype)
231
+
232
+ x = self.prenet(x) * x_mask
233
+
234
+ if (spk_emb is not None):
235
+ x = x + self.cond(spk_emb)
236
+
237
+ x = self.decoder(x * x_mask, x_mask)
238
+
239
+ bn = self.proj(x) * x_mask
240
+
241
+ return bn, x_mask
242
+
243
+
244
+ class Decoder(nn.Module):
245
+ def __init__(self,
246
+ out_channels,
247
+ hidden_channels,
248
+ filter_channels,
249
+ n_heads,
250
+ n_layers,
251
+ kernel_size,
252
+ p_dropout,
253
+ n_speakers=0,
254
+ spk_channels=0):
255
+ super().__init__()
256
+ self.out_channels = out_channels
257
+ self.hidden_channels = hidden_channels
258
+ self.filter_channels = filter_channels
259
+ self.n_heads = n_heads
260
+ self.n_layers = n_layers
261
+ self.kernel_size = kernel_size
262
+ self.p_dropout = p_dropout
263
+ self.spk_channels = spk_channels
264
+
265
+ self.prenet = nn.Conv1d(hidden_channels + 2, hidden_channels, 3, padding=1)
266
+ self.decoder = attentions.FFT(
267
+ hidden_channels,
268
+ filter_channels,
269
+ n_heads,
270
+ n_layers,
271
+ kernel_size,
272
+ p_dropout)
273
+ self.proj = nn.Conv1d(hidden_channels, out_channels, 1)
274
+
275
+ if n_speakers != 0:
276
+ self.cond = nn.Conv1d(spk_channels, hidden_channels, 1)
277
+
278
+ def forward(self, x, x_lengths, spk_emb=None):
279
+ x_mask = torch.unsqueeze(commons.sequence_mask(x_lengths, x.size(2)), 1).to(x.dtype)
280
+
281
+ x = self.prenet(x) * x_mask
282
+
283
+ if (spk_emb is not None):
284
+ x = x + self.cond(spk_emb)
285
+
286
+ x = self.decoder(x * x_mask, x_mask)
287
+
288
+ x = self.proj(x) * x_mask
289
+
290
+ return x, x_mask
291
+
292
+
293
+ class ConvReluNorm(nn.Module):
294
+ def __init__(self, in_channels, hidden_channels, out_channels, kernel_size, n_layers, p_dropout):
295
+ super().__init__()
296
+ self.in_channels = in_channels
297
+ self.hidden_channels = hidden_channels
298
+ self.out_channels = out_channels
299
+ self.kernel_size = kernel_size
300
+ self.n_layers = n_layers
301
+ self.p_dropout = p_dropout
302
+ assert n_layers > 1, "Number of layers should be larger than 0."
303
+
304
+ self.conv_layers = nn.ModuleList()
305
+ self.norm_layers = nn.ModuleList()
306
+ self.conv_layers.append(nn.Conv1d(in_channels, hidden_channels, kernel_size, padding=kernel_size // 2))
307
+ self.norm_layers.append(LayerNorm(hidden_channels))
308
+ self.relu_drop = nn.Sequential(
309
+ nn.ReLU(),
310
+ nn.Dropout(p_dropout))
311
+ for _ in range(n_layers - 1):
312
+ self.conv_layers.append(nn.Conv1d(hidden_channels, hidden_channels, kernel_size, padding=kernel_size // 2))
313
+ self.norm_layers.append(LayerNorm(hidden_channels))
314
+ self.proj = nn.Conv1d(hidden_channels, out_channels, 1)
315
+ self.proj.weight.data.zero_()
316
+ self.proj.bias.data.zero_()
317
+
318
+ def forward(self, x):
319
+ x = self.conv_layers[0](x)
320
+ x = self.norm_layers[0](x)
321
+ x = self.relu_drop(x)
322
+
323
+ for i in range(1, self.n_layers):
324
+ x_ = self.conv_layers[i](x)
325
+ x_ = self.norm_layers[i](x_)
326
+ x_ = self.relu_drop(x_)
327
+ x = (x + x_) / 2
328
+ x = self.proj(x)
329
+ return x
330
+
331
+
332
+ class PosteriorEncoder(nn.Module):
333
+ def __init__(self,
334
+ hps,
335
+ in_channels,
336
+ out_channels,
337
+ hidden_channels,
338
+ kernel_size,
339
+ dilation_rate,
340
+ n_layers):
341
+ super().__init__()
342
+ self.in_channels = in_channels
343
+ self.out_channels = out_channels
344
+ self.hidden_channels = hidden_channels
345
+ self.kernel_size = kernel_size
346
+ self.dilation_rate = dilation_rate
347
+ self.n_layers = n_layers
348
+
349
+ self.pre = nn.Conv1d(in_channels, hidden_channels, 1)
350
+ self.enc = modules.WN(hidden_channels, kernel_size, dilation_rate, n_layers, n_speakers=hps.data.n_speakers, spk_channels=hps.model.spk_channels)
351
+ # self.enc = ConvReluNorm(hidden_channels,
352
+ # hidden_channels,
353
+ # hidden_channels,
354
+ # kernel_size,
355
+ # n_layers,
356
+ # 0.1)
357
+ self.proj = nn.Conv1d(hidden_channels, out_channels * 2, 1)
358
+
359
+ def forward(self, x, x_lengths, g=None):
360
+ x_mask = torch.unsqueeze(commons.sequence_mask(x_lengths, x.size(2)), 1).to(x.dtype)
361
+ x = self.pre(x) * x_mask
362
+ x = self.enc(x, x_mask, g=g)
363
+ stats = self.proj(x) * x_mask
364
+ return stats, x_mask
365
+
366
+
367
+ class ResBlock3(torch.nn.Module):
368
+ def __init__(self, channels, kernel_size=3, dilation=(1, 3)):
369
+ super(ResBlock3, self).__init__()
370
+ self.convs = nn.ModuleList([
371
+ weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[0],
372
+ padding=get_padding(kernel_size, dilation[0])))
373
+ ])
374
+ self.convs.apply(init_weights)
375
+
376
+ def forward(self, x, x_mask=None):
377
+ for c in self.convs:
378
+ xt = F.leaky_relu(x, LRELU_SLOPE)
379
+ if x_mask is not None:
380
+ xt = xt * x_mask
381
+ xt = c(xt)
382
+ x = xt + x
383
+ if x_mask is not None:
384
+ x = x * x_mask
385
+ return x
386
+
387
+ def remove_weight_norm(self):
388
+ for l in self.convs:
389
+ remove_weight_norm(l)
390
+
391
+
392
+ class Generator_Harm(torch.nn.Module):
393
+ def __init__(self, hps):
394
+ super(Generator_Harm, self).__init__()
395
+ self.hps = hps
396
+
397
+ self.prenet = Conv1d(hps.model.hidden_channels, hps.model.hidden_channels, 3, padding=1)
398
+
399
+ self.net = ConvReluNorm(hps.model.hidden_channels,
400
+ hps.model.hidden_channels,
401
+ hps.model.hidden_channels,
402
+ hps.model.kernel_size,
403
+ 8,
404
+ hps.model.p_dropout)
405
+
406
+ # self.rnn = nn.LSTM(input_size=hps.model.hidden_channels,
407
+ # hidden_size=hps.model.hidden_channels,
408
+ # num_layers=1,
409
+ # bias=True,
410
+ # batch_first=True,
411
+ # dropout=0.5,
412
+ # bidirectional=True)
413
+ self.postnet = Conv1d(hps.model.hidden_channels, hps.model.n_harmonic + 1, 3, padding=1)
414
+
415
+ def forward(self, f0, harm, mask):
416
+ pitch = f0.transpose(1, 2)
417
+ harm = self.prenet(harm)
418
+
419
+ harm = self.net(harm) * mask
420
+ # harm = harm.transpose(1, 2)
421
+ # harm, (hs, hc) = self.rnn(harm)
422
+ # harm = harm.transpose(1, 2)
423
+
424
+ harm = self.postnet(harm)
425
+ harm = harm.transpose(1, 2)
426
+ param = harm
427
+
428
+ param = scale_function(param)
429
+ total_amp = param[..., :1]
430
+ amplitudes = param[..., 1:]
431
+ amplitudes = remove_above_nyquist(
432
+ amplitudes,
433
+ pitch,
434
+ self.hps.data.sample_rate,
435
+ )
436
+ amplitudes /= amplitudes.sum(-1, keepdim=True)
437
+ amplitudes *= total_amp
438
+
439
+ amplitudes = upsample(amplitudes, self.hps.data.hop_size)
440
+ pitch = upsample(pitch, self.hps.data.hop_size)
441
+
442
+ n_harmonic = amplitudes.shape[-1]
443
+ omega = torch.cumsum(2 * math.pi * pitch / self.hps.data.sample_rate, 1)
444
+ omegas = omega * torch.arange(1, n_harmonic + 1).to(omega)
445
+ signal_harmonics = (torch.sin(omegas) * amplitudes)
446
+ signal_harmonics = signal_harmonics.transpose(1, 2)
447
+ return signal_harmonics
448
+
449
+
450
+ class Generator(torch.nn.Module):
451
+ def __init__(self, hps, initial_channel, resblock, resblock_kernel_sizes, resblock_dilation_sizes, upsample_rates,
452
+ upsample_initial_channel, upsample_kernel_sizes, n_speakers=0, spk_channels=0):
453
+ super(Generator, self).__init__()
454
+ self.num_kernels = len(resblock_kernel_sizes)
455
+ self.num_upsamples = len(upsample_rates)
456
+ self.conv_pre = Conv1d(initial_channel, upsample_initial_channel, 7, 1, padding=3)
457
+ self.upsample_rates = upsample_rates
458
+ self.n_speakers = n_speakers
459
+
460
+ resblock = modules.ResBlock1 if resblock == '1' else modules.R
461
+
462
+ self.downs = nn.ModuleList()
463
+ for i, (u, k) in enumerate(zip(upsample_rates, upsample_kernel_sizes)):
464
+ i = len(upsample_rates) - 1 - i
465
+ u = upsample_rates[i]
466
+ k = upsample_kernel_sizes[i]
467
+ # print("down: ",upsample_initial_channel//(2**(i+1))," -> ", upsample_initial_channel//(2**i))
468
+ self.downs.append(weight_norm(
469
+ Conv1d(hps.model.n_harmonic + 2, hps.model.n_harmonic + 2,
470
+ k, u, padding=k // 2)))
471
+
472
+ self.resblocks_downs = nn.ModuleList()
473
+ for i in range(len(self.downs)):
474
+ j = len(upsample_rates) - 1 - i
475
+ self.resblocks_downs.append(ResBlock3(hps.model.n_harmonic + 2, 3, (1, 3)))
476
+
477
+ self.concat_pre = Conv1d(upsample_initial_channel + hps.model.n_harmonic + 2, upsample_initial_channel, 3, 1,
478
+ padding=1)
479
+ self.concat_conv = nn.ModuleList()
480
+ for i in range(len(upsample_rates)):
481
+ ch = upsample_initial_channel // (2 ** (i + 1))
482
+ self.concat_conv.append(Conv1d(ch + hps.model.n_harmonic + 2, ch, 3, 1, padding=1, bias=False))
483
+
484
+ self.ups = nn.ModuleList()
485
+ for i, (u, k) in enumerate(zip(upsample_rates, upsample_kernel_sizes)):
486
+ self.ups.append(weight_norm(
487
+ ConvTranspose1d(upsample_initial_channel // (2 ** i), upsample_initial_channel // (2 ** (i + 1)),
488
+ k, u, padding=(k - u) // 2)))
489
+
490
+ self.resblocks = nn.ModuleList()
491
+ for i in range(len(self.ups)):
492
+ ch = upsample_initial_channel // (2 ** (i + 1))
493
+ for j, (k, d) in enumerate(zip(resblock_kernel_sizes, resblock_dilation_sizes)):
494
+ self.resblocks.append(resblock(ch, k, d))
495
+
496
+ self.conv_post = Conv1d(ch, 1, 7, 1, padding=3, bias=False)
497
+ self.ups.apply(init_weights)
498
+
499
+ if self.n_speakers != 0:
500
+ self.cond = nn.Conv1d(spk_channels, upsample_initial_channel, 1)
501
+
502
+ def forward(self, x, ddsp, g=None):
503
+
504
+ x = self.conv_pre(x)
505
+
506
+ if g is not None:
507
+ x = x + self.cond(g)
508
+
509
+ se = ddsp
510
+ res_features = [se]
511
+ for i in range(self.num_upsamples):
512
+ in_size = se.size(2)
513
+ se = self.downs[i](se)
514
+ se = self.resblocks_downs[i](se)
515
+ up_rate = self.upsample_rates[self.num_upsamples - 1 - i]
516
+ se = se[:, :, : in_size // up_rate]
517
+ res_features.append(se)
518
+
519
+ x = torch.cat([x, se], 1)
520
+ x = self.concat_pre(x)
521
+
522
+ for i in range(self.num_upsamples):
523
+ x = F.leaky_relu(x, modules.LRELU_SLOPE)
524
+ in_size = x.size(2)
525
+ x = self.ups[i](x)
526
+ # 保证维度正确,丢掉多余通道
527
+ x = x[:, :, : in_size * self.upsample_rates[i]]
528
+
529
+ x = torch.cat([x, res_features[self.num_upsamples - 1 - i]], 1)
530
+ x = self.concat_conv[i](x)
531
+
532
+ xs = None
533
+ for j in range(self.num_kernels):
534
+ if xs is None:
535
+ xs = self.resblocks[i * self.num_kernels + j](x)
536
+ else:
537
+ xs += self.resblocks[i * self.num_kernels + j](x)
538
+ x = xs / self.num_kernels
539
+
540
+ x = F.leaky_relu(x)
541
+ x = self.conv_post(x)
542
+ x = torch.tanh(x)
543
+
544
+ return x
545
+
546
+ def remove_weight_norm(self):
547
+ print('Removing weight norm...')
548
+ for l in self.ups:
549
+ remove_weight_norm(l)
550
+ for l in self.resblocks:
551
+ l.remove_weight_norm()
552
+
553
+
554
+ class Generator_Noise(torch.nn.Module):
555
+ def __init__(self, hps):
556
+ super(Generator_Noise, self).__init__()
557
+ self.hps = hps
558
+ self.win_size = hps.data.win_size
559
+ self.hop_size = hps.data.hop_size
560
+ self.fft_size = hps.data.n_fft
561
+ self.istft_pre = Conv1d(hps.model.hidden_channels, hps.model.hidden_channels, 3, padding=1)
562
+
563
+ self.net = ConvReluNorm(hps.model.hidden_channels,
564
+ hps.model.hidden_channels,
565
+ hps.model.hidden_channels,
566
+ hps.model.kernel_size,
567
+ 8,
568
+ hps.model.p_dropout)
569
+
570
+ self.istft_amplitude = torch.nn.Conv1d(hps.model.hidden_channels, self.fft_size // 2 + 1, 1, 1)
571
+ self.window = torch.hann_window(self.win_size)
572
+
573
+ def forward(self, x, mask):
574
+ istft_x = x
575
+ istft_x = self.istft_pre(istft_x)
576
+
577
+ istft_x = self.net(istft_x) * mask
578
+
579
+ amp = self.istft_amplitude(istft_x).unsqueeze(-1)
580
+ phase = (torch.rand(amp.shape) * 2 * 3.14 - 3.14).to(amp)
581
+
582
+ real = amp * torch.cos(phase)
583
+ imag = amp * torch.sin(phase)
584
+ spec = torch.cat([real, imag], 3)
585
+ istft_x = torch.istft(spec, self.fft_size, self.hop_size, self.win_size, self.window.to(amp), True,
586
+ length=x.shape[2] * self.hop_size, return_complex=False)
587
+
588
+ return istft_x.unsqueeze(1)
589
+
590
+
591
+ class LayerNorm(nn.Module):
592
+ def __init__(self, channels, eps=1e-5):
593
+ super().__init__()
594
+ self.channels = channels
595
+ self.eps = eps
596
+
597
+ self.gamma = nn.Parameter(torch.ones(channels))
598
+ self.beta = nn.Parameter(torch.zeros(channels))
599
+
600
+ def forward(self, x):
601
+ x = x.transpose(1, -1)
602
+ x = F.layer_norm(x, (self.channels,), self.gamma, self.beta, self.eps)
603
+ return x.transpose(1, -1)
604
+
605
+
606
+ class DiscriminatorP(torch.nn.Module):
607
+ def __init__(self, period, kernel_size=5, stride=3, use_spectral_norm=False):
608
+ super(DiscriminatorP, self).__init__()
609
+ self.period = period
610
+ self.use_spectral_norm = use_spectral_norm
611
+ norm_f = weight_norm if use_spectral_norm == False else spectral_norm
612
+ self.convs = nn.ModuleList([
613
+ norm_f(Conv2d(1, 32, (kernel_size, 1), (stride, 1), padding=(get_padding(kernel_size, 1), 0))),
614
+ norm_f(Conv2d(32, 128, (kernel_size, 1), (stride, 1), padding=(get_padding(kernel_size, 1), 0))),
615
+ norm_f(Conv2d(128, 512, (kernel_size, 1), (stride, 1), padding=(get_padding(kernel_size, 1), 0))),
616
+ norm_f(Conv2d(512, 1024, (kernel_size, 1), (stride, 1), padding=(get_padding(kernel_size, 1), 0))),
617
+ norm_f(Conv2d(1024, 1024, (kernel_size, 1), 1, padding=(get_padding(kernel_size, 1), 0))),
618
+ ])
619
+ self.conv_post = norm_f(Conv2d(1024, 1, (3, 1), 1, padding=(1, 0)))
620
+
621
+ def forward(self, x):
622
+ fmap = []
623
+
624
+ # 1d to 2d
625
+ b, c, t = x.shape
626
+ if t % self.period != 0: # pad first
627
+ n_pad = self.period - (t % self.period)
628
+ x = F.pad(x, (0, n_pad), "reflect")
629
+ t = t + n_pad
630
+ x = x.view(b, c, t // self.period, self.period)
631
+
632
+ for l in self.convs:
633
+ x = l(x)
634
+ x = F.leaky_relu(x, modules.LRELU_SLOPE)
635
+ fmap.append(x)
636
+ x = self.conv_post(x)
637
+ fmap.append(x)
638
+ x = torch.flatten(x, 1, -1)
639
+
640
+ return x, fmap
641
+
642
+
643
+ class DiscriminatorS(torch.nn.Module):
644
+ def __init__(self, use_spectral_norm=False):
645
+ super(DiscriminatorS, self).__init__()
646
+ norm_f = weight_norm if use_spectral_norm == False else spectral_norm
647
+ self.convs = nn.ModuleList([
648
+ norm_f(Conv1d(1, 16, 15, 1, padding=7)),
649
+ norm_f(Conv1d(16, 64, 41, 4, groups=4, padding=20)),
650
+ norm_f(Conv1d(64, 256, 41, 4, groups=16, padding=20)),
651
+ norm_f(Conv1d(256, 1024, 41, 4, groups=64, padding=20)),
652
+ norm_f(Conv1d(1024, 1024, 41, 4, groups=256, padding=20)),
653
+ norm_f(Conv1d(1024, 1024, 5, 1, padding=2)),
654
+ ])
655
+ self.conv_post = norm_f(Conv1d(1024, 1, 3, 1, padding=1))
656
+
657
+ def forward(self, x):
658
+ fmap = []
659
+
660
+ for l in self.convs:
661
+ x = l(x)
662
+ x = F.leaky_relu(x, modules.LRELU_SLOPE)
663
+ fmap.append(x)
664
+ x = self.conv_post(x)
665
+ fmap.append(x)
666
+ x = torch.flatten(x, 1, -1)
667
+
668
+ return x, fmap
669
+
670
+
671
+ class MultiFrequencyDiscriminator(nn.Module):
672
+ def __init__(self,
673
+ hop_lengths=[128, 256, 512],
674
+ hidden_channels=[256, 512, 512],
675
+ domain='double', mel_scale=True):
676
+ super(MultiFrequencyDiscriminator, self).__init__()
677
+
678
+ self.stfts = nn.ModuleList([
679
+ TorchSTFT(fft_size=x * 4, hop_size=x, win_size=x * 4,
680
+ normalized=True, domain=domain, mel_scale=mel_scale)
681
+ for x in hop_lengths])
682
+
683
+ self.domain = domain
684
+ if domain == 'double':
685
+ self.discriminators = nn.ModuleList([
686
+ BaseFrequenceDiscriminator(2, c)
687
+ for x, c in zip(hop_lengths, hidden_channels)])
688
+ else:
689
+ self.discriminators = nn.ModuleList([
690
+ BaseFrequenceDiscriminator(1, c)
691
+ for x, c in zip(hop_lengths, hidden_channels)])
692
+
693
+ def forward(self, x):
694
+ scores, feats = list(), list()
695
+ for stft, layer in zip(self.stfts, self.discriminators):
696
+ # print(stft)
697
+ mag, phase = stft.transform(x.squeeze())
698
+ if self.domain == 'double':
699
+ mag = torch.stack(torch.chunk(mag, 2, dim=1), dim=1)
700
+ else:
701
+ mag = mag.unsqueeze(1)
702
+
703
+ score, feat = layer(mag)
704
+ scores.append(score)
705
+ feats.append(feat)
706
+ return scores, feats
707
+
708
+
709
+ class BaseFrequenceDiscriminator(nn.Module):
710
+ def __init__(self, in_channels, hidden_channels=512):
711
+ super(BaseFrequenceDiscriminator, self).__init__()
712
+
713
+ self.discriminator = nn.ModuleList()
714
+ self.discriminator += [
715
+ nn.Sequential(
716
+ nn.ReflectionPad2d((1, 1, 1, 1)),
717
+ nn.utils.weight_norm(nn.Conv2d(
718
+ in_channels, hidden_channels // 32,
719
+ kernel_size=(3, 3), stride=(1, 1)))
720
+ ),
721
+ nn.Sequential(
722
+ nn.LeakyReLU(0.2, True),
723
+ nn.ReflectionPad2d((1, 1, 1, 1)),
724
+ nn.utils.weight_norm(nn.Conv2d(
725
+ hidden_channels // 32, hidden_channels // 16,
726
+ kernel_size=(3, 3), stride=(2, 2)))
727
+ ),
728
+ nn.Sequential(
729
+ nn.LeakyReLU(0.2, True),
730
+ nn.ReflectionPad2d((1, 1, 1, 1)),
731
+ nn.utils.weight_norm(nn.Conv2d(
732
+ hidden_channels // 16, hidden_channels // 8,
733
+ kernel_size=(3, 3), stride=(1, 1)))
734
+ ),
735
+ nn.Sequential(
736
+ nn.LeakyReLU(0.2, True),
737
+ nn.ReflectionPad2d((1, 1, 1, 1)),
738
+ nn.utils.weight_norm(nn.Conv2d(
739
+ hidden_channels // 8, hidden_channels // 4,
740
+ kernel_size=(3, 3), stride=(2, 2)))
741
+ ),
742
+ nn.Sequential(
743
+ nn.LeakyReLU(0.2, True),
744
+ nn.ReflectionPad2d((1, 1, 1, 1)),
745
+ nn.utils.weight_norm(nn.Conv2d(
746
+ hidden_channels // 4, hidden_channels // 2,
747
+ kernel_size=(3, 3), stride=(1, 1)))
748
+ ),
749
+ nn.Sequential(
750
+ nn.LeakyReLU(0.2, True),
751
+ nn.ReflectionPad2d((1, 1, 1, 1)),
752
+ nn.utils.weight_norm(nn.Conv2d(
753
+ hidden_channels // 2, hidden_channels,
754
+ kernel_size=(3, 3), stride=(2, 2)))
755
+ ),
756
+ nn.Sequential(
757
+ nn.LeakyReLU(0.2, True),
758
+ nn.ReflectionPad2d((1, 1, 1, 1)),
759
+ nn.utils.weight_norm(nn.Conv2d(
760
+ hidden_channels, 1,
761
+ kernel_size=(3, 3), stride=(1, 1)))
762
+ )
763
+ ]
764
+
765
+ def forward(self, x):
766
+ hiddens = []
767
+ for layer in self.discriminator:
768
+ x = layer(x)
769
+ hiddens.append(x)
770
+ return x, hiddens[-1]
771
+
772
+
773
+ class Discriminator(torch.nn.Module):
774
+ def __init__(self, hps, use_spectral_norm=False):
775
+ super(Discriminator, self).__init__()
776
+ periods = [2, 3, 5, 7, 11]
777
+
778
+ discs = [DiscriminatorS(use_spectral_norm=use_spectral_norm)]
779
+ discs = discs + [DiscriminatorP(i, use_spectral_norm=use_spectral_norm) for i in periods]
780
+ self.discriminators = nn.ModuleList(discs)
781
+ self.disc_multfrequency = MultiFrequencyDiscriminator(hop_lengths=[int(hps.data.sample_rate * 2.5 / 1000),
782
+ int(hps.data.sample_rate * 5 / 1000),
783
+ int(hps.data.sample_rate * 7.5 / 1000),
784
+ int(hps.data.sample_rate * 10 / 1000),
785
+ int(hps.data.sample_rate * 12.5 / 1000),
786
+ int(hps.data.sample_rate * 15 / 1000)],
787
+ hidden_channels=[256, 256, 256, 256, 256])
788
+
789
+ def forward(self, y, y_hat):
790
+ y_d_rs = []
791
+ y_d_gs = []
792
+ fmap_rs = []
793
+ fmap_gs = []
794
+ for i, d in enumerate(self.discriminators):
795
+ y_d_r, fmap_r = d(y)
796
+ y_d_g, fmap_g = d(y_hat)
797
+ y_d_rs.append(y_d_r)
798
+ y_d_gs.append(y_d_g)
799
+ fmap_rs.append(fmap_r)
800
+ fmap_gs.append(fmap_g)
801
+ scores_r, fmaps_r = self.disc_multfrequency(y)
802
+ scores_g, fmaps_g = self.disc_multfrequency(y_hat)
803
+ for i in range(len(scores_r)):
804
+ y_d_rs.append(scores_r[i])
805
+ y_d_gs.append(scores_g[i])
806
+ fmap_rs.append(fmaps_r[i])
807
+ fmap_gs.append(fmaps_g[i])
808
+ return y_d_rs, y_d_gs, fmap_rs, fmap_gs
809
+
810
+
811
+ class SynthesizerTrn(nn.Module):
812
+ """
813
+ Model
814
+ """
815
+
816
+ def __init__(self, hps):
817
+ super().__init__()
818
+ self.hps = hps
819
+
820
+ self.text_encoder = TextEncoder(
821
+ len(ttsing_phone_set),
822
+ hps.model.prior_hidden_channels,
823
+ hps.model.prior_hidden_channels,
824
+ hps.model.prior_filter_channels,
825
+ hps.model.prior_n_heads,
826
+ hps.model.prior_n_layers,
827
+ hps.model.prior_kernel_size,
828
+ hps.model.prior_p_dropout)
829
+
830
+ self.decoder = PriorDecoder(
831
+ hps.model.hidden_channels * 2,
832
+ hps.model.prior_hidden_channels,
833
+ hps.model.prior_filter_channels,
834
+ hps.model.prior_n_heads,
835
+ hps.model.prior_n_layers,
836
+ hps.model.prior_kernel_size,
837
+ hps.model.prior_p_dropout,
838
+ n_speakers=hps.data.n_speakers,
839
+ spk_channels=hps.model.spk_channels
840
+ )
841
+
842
+ self.f0_decoder = Decoder(
843
+ 1,
844
+ hps.model.prior_hidden_channels,
845
+ hps.model.prior_filter_channels,
846
+ hps.model.prior_n_heads,
847
+ hps.model.prior_n_layers,
848
+ hps.model.prior_kernel_size,
849
+ hps.model.prior_p_dropout,
850
+ n_speakers=hps.data.n_speakers,
851
+ spk_channels=hps.model.spk_channels
852
+ )
853
+
854
+ self.mel_decoder = Decoder(
855
+ hps.data.acoustic_dim,
856
+ hps.model.prior_hidden_channels,
857
+ hps.model.prior_filter_channels,
858
+ hps.model.prior_n_heads,
859
+ hps.model.prior_n_layers,
860
+ hps.model.prior_kernel_size,
861
+ hps.model.prior_p_dropout,
862
+ n_speakers=hps.data.n_speakers,
863
+ spk_channels=hps.model.spk_channels
864
+ )
865
+
866
+ self.posterior_encoder = PosteriorEncoder(
867
+ hps,
868
+ hps.data.acoustic_dim,
869
+ hps.model.hidden_channels,
870
+ hps.model.hidden_channels, 3, 1, 8)
871
+
872
+ self.dropout = nn.Dropout(0.2)
873
+
874
+ self.duration_predictor = DurationPredictor(
875
+ hps.model.prior_hidden_channels,
876
+ hps.model.prior_hidden_channels,
877
+ 3,
878
+ 0.5,
879
+ n_speakers=hps.data.n_speakers,
880
+ spk_channels=hps.model.spk_channels)
881
+ self.LR = LengthRegulator()
882
+
883
+ self.dec = Generator(hps,
884
+ hps.model.hidden_channels,
885
+ hps.model.resblock,
886
+ hps.model.resblock_kernel_sizes,
887
+ hps.model.resblock_dilation_sizes,
888
+ hps.model.upsample_rates,
889
+ hps.model.upsample_initial_channel,
890
+ hps.model.upsample_kernel_sizes,
891
+ n_speakers=hps.data.n_speakers,
892
+ spk_channels=hps.model.spk_channels)
893
+
894
+ self.dec_harm = Generator_Harm(hps)
895
+
896
+ self.dec_noise = Generator_Noise(hps)
897
+
898
+ self.f0_prenet = nn.Conv1d(1, hps.model.prior_hidden_channels + 2, 3, padding=1)
899
+ self.energy_prenet = nn.Conv1d(1, hps.model.prior_hidden_channels + 2, 3, padding=1)
900
+ self.mel_prenet = nn.Conv1d(hps.data.acoustic_dim, hps.model.prior_hidden_channels + 2, 3, padding=1)
901
+
902
+ if hps.data.n_speakers > 1:
903
+ self.emb_spk = nn.Embedding(hps.data.n_speakers, hps.model.spk_channels)
904
+ self.flow = modules.ResidualCouplingBlock(hps.model.prior_hidden_channels, hps.model.hidden_channels, 5, 1, 4,n_speakers=hps.data.n_speakers, gin_channels=hps.model.spk_channels)
905
+
906
+ def forward(self, phone, phone_lengths, pitchid, dur, slur, gtdur, F0, mel, bn_lengths, spk_id=None):
907
+ if self.hps.data.n_speakers > 0:
908
+ g = self.emb_spk(spk_id).unsqueeze(-1) # [b, h, 1]
909
+ else:
910
+ g = None
911
+
912
+ # Encoder
913
+ x, x_mask = self.text_encoder(phone, phone_lengths, pitchid, dur, slur)
914
+
915
+ # LR
916
+ decoder_input, mel_len = self.LR(x, gtdur, None)
917
+
918
+ LF0 = 2595. * torch.log10(1. + F0 / 700.)
919
+ LF0 = LF0 / 500
920
+
921
+ # aam
922
+ predict_mel, predict_bn_mask = self.mel_decoder(decoder_input + self.f0_prenet(LF0), bn_lengths, spk_emb=g)
923
+
924
+ predict_energy = predict_mel.detach().sum(1).unsqueeze(1) / self.hps.data.acoustic_dim
925
+
926
+ decoder_input = decoder_input + \
927
+ self.f0_prenet(LF0) + \
928
+ self.energy_prenet(predict_energy) + \
929
+ self.mel_prenet(predict_mel.detach())
930
+ decoder_output, predict_bn_mask = self.decoder(decoder_input, bn_lengths, spk_emb=g)
931
+
932
+ prior_info = decoder_output
933
+ m_p = prior_info[:, :self.hps.model.hidden_channels, :]
934
+ logs_p = prior_info[:, self.hps.model.hidden_channels:, :]
935
+
936
+ # posterior
937
+ posterior, y_mask = self.posterior_encoder(mel, bn_lengths,g=g)
938
+ m_q = posterior[:, :self.hps.model.hidden_channels, :]
939
+ logs_q = posterior[:, self.hps.model.hidden_channels:, :]
940
+ z = (m_q + torch.randn_like(m_q) * torch.exp(logs_q)) * y_mask
941
+ z_p = self.flow(z, y_mask, g=g)
942
+
943
+ # kl loss
944
+ loss_kl = kl_loss(z_p, logs_q, m_p, logs_p, y_mask)
945
+
946
+ p_z = z
947
+ p_z = self.dropout(p_z)
948
+
949
+ pitch = upsample(F0.transpose(1, 2), self.hps.data.hop_size)
950
+ omega = torch.cumsum(2 * math.pi * pitch / self.hps.data.sample_rate, 1)
951
+ sin = torch.sin(omega).transpose(1, 2)
952
+
953
+ # dsp synthesize
954
+ noise_x = self.dec_noise(p_z, y_mask)
955
+ harm_x = self.dec_harm(F0, p_z, y_mask)
956
+
957
+ # dsp waveform
958
+ dsp_o = torch.cat([harm_x, noise_x], axis=1)
959
+
960
+ decoder_condition = torch.cat([harm_x, noise_x, sin], axis=1)
961
+
962
+ # dsp based HiFiGAN vocoder
963
+ x_slice, ids_slice = commons.rand_slice_segments(p_z, bn_lengths,
964
+ self.hps.train.segment_size // self.hps.data.hop_size)
965
+ F0_slice = commons.slice_segments(F0, ids_slice, self.hps.train.segment_size // self.hps.data.hop_size)
966
+ dsp_slice = commons.slice_segments(dsp_o, ids_slice * self.hps.data.hop_size, self.hps.train.segment_size)
967
+ condition_slice = commons.slice_segments(decoder_condition, ids_slice * self.hps.data.hop_size,
968
+ self.hps.train.segment_size)
969
+ o = self.dec(x_slice, condition_slice.detach(), g=g)
970
+
971
+ return o, ids_slice, LF0 * predict_bn_mask, dsp_slice.sum(1), loss_kl, predict_mel, predict_bn_mask
972
+
973
+ def infer(self, phone, phone_lengths, pitchid, dur, slur, gtdur=None, spk_id=None, length_scale=1., F0=None, noise_scale=0.8):
974
+
975
+ if self.hps.data.n_speakers > 0:
976
+ g = self.emb_spk(spk_id).unsqueeze(-1) # [b, h, 1]
977
+ else:
978
+ g = None
979
+
980
+ # Encoder
981
+ x, x_mask = self.text_encoder(phone, phone_lengths, pitchid, dur, slur)
982
+
983
+ # dur
984
+ y_lengths = torch.clamp_min(torch.sum(gtdur.squeeze(1), [1]), 1).long()
985
+ LF0 = 2595. * torch.log10(1. + F0 / 700.)
986
+ LF0 = LF0 / 500
987
+ # LR
988
+ decoder_input, mel_len = self.LR(x, gtdur, None)
989
+
990
+ # aam
991
+ predict_mel, predict_bn_mask = self.mel_decoder(decoder_input + self.f0_prenet(LF0), y_lengths, spk_emb=g)
992
+
993
+ predict_energy = predict_mel.sum(1).unsqueeze(1) / self.hps.data.acoustic_dim
994
+
995
+ decoder_input = decoder_input + \
996
+ self.f0_prenet(LF0) + \
997
+ self.energy_prenet(predict_energy) + \
998
+ self.mel_prenet(predict_mel)
999
+ decoder_output, y_mask = self.decoder(decoder_input, y_lengths, spk_emb=g)
1000
+
1001
+ prior_info = decoder_output
1002
+
1003
+ m_p = prior_info[:, :self.hps.model.hidden_channels, :]
1004
+ logs_p = prior_info[:, self.hps.model.hidden_channels:, :]
1005
+ z_p = m_p + torch.randn_like(m_p) * torch.exp(logs_p) * noise_scale
1006
+ z = self.flow(z_p, y_mask, g=g, reverse=True)
1007
+
1008
+ prior_z = z
1009
+
1010
+ noise_x = self.dec_noise(prior_z, y_mask)
1011
+
1012
+ harm_x = self.dec_harm(F0, prior_z, y_mask)
1013
+
1014
+ pitch = upsample(F0.transpose(1, 2), self.hps.data.hop_size)
1015
+ omega = torch.cumsum(2 * math.pi * pitch / self.hps.data.sample_rate, 1)
1016
+ sin = torch.sin(omega).transpose(1, 2)
1017
+
1018
+ decoder_condition = torch.cat([harm_x, noise_x, sin], axis=1)
1019
+
1020
+ # dsp based HiFiGAN vocoder
1021
+ o = self.dec(prior_z, decoder_condition, g=g)
1022
+
1023
+ return o, harm_x.sum(1).unsqueeze(1), noise_x
egs/visinger2/train.py ADDED
@@ -0,0 +1,456 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import json
4
+ import argparse
5
+ import itertools
6
+ import math
7
+ import time
8
+ import logging
9
+
10
+ import torch
11
+ from torch import nn, optim
12
+ from torch.nn import functional as F
13
+ from torch.utils.data import DataLoader
14
+ from torch.utils.tensorboard import SummaryWriter
15
+ import torch.multiprocessing as mp
16
+ import torch.distributed as dist
17
+ from torch.nn.parallel import DistributedDataParallel as DDP
18
+ from torch.cuda.amp import autocast, GradScaler
19
+
20
+ sys.path.append('../..')
21
+ import modules.commons as commons
22
+ import utils.utils as utils
23
+
24
+ from dataset import DatasetConstructor
25
+
26
+ from models import (
27
+ SynthesizerTrn,
28
+ Discriminator
29
+ )
30
+
31
+ from modules.losses import (
32
+ generator_loss,
33
+ discriminator_loss,
34
+ feature_loss,
35
+ kl_loss,
36
+ )
37
+ from preprocess.mel_processing import mel_spectrogram_torch, spec_to_mel_torch, spectrogram_torch
38
+
39
+ torch.backends.cudnn.benchmark = True
40
+ global_step = 0
41
+ use_cuda = torch.cuda.is_available()
42
+ print("use_cuda, ", use_cuda)
43
+
44
+ numba_logger = logging.getLogger('numba')
45
+ numba_logger.setLevel(logging.WARNING)
46
+
47
+
48
+ def main():
49
+ """Assume Single Node Multi GPUs Training Only"""
50
+
51
+ hps = utils.get_hparams()
52
+ os.environ['MASTER_ADDR'] = 'localhost'
53
+ os.environ['MASTER_PORT'] = str(hps.train.port)
54
+
55
+ if (torch.cuda.is_available()):
56
+ n_gpus = torch.cuda.device_count()
57
+ mp.spawn(run, nprocs=n_gpus, args=(n_gpus, hps,))
58
+ else:
59
+ cpurun(0, 1, hps)
60
+
61
+
62
+ def run(rank, n_gpus, hps):
63
+ global global_step
64
+ if rank == 0:
65
+ logger = utils.get_logger(hps.train.save_dir)
66
+ logger.info(hps.train)
67
+ logger.info(hps.data)
68
+ logger.info(hps.model)
69
+ utils.check_git_hash(hps.train.save_dir)
70
+ writer = SummaryWriter(log_dir=hps.train.save_dir)
71
+ writer_eval = SummaryWriter(log_dir=os.path.join(hps.train.save_dir, "eval"))
72
+
73
+ dist.init_process_group(backend='nccl', init_method='env://', world_size=n_gpus, rank=rank)
74
+ torch.manual_seed(hps.train.seed)
75
+ torch.cuda.set_device(rank)
76
+ dataset_constructor = DatasetConstructor(hps, num_replicas=n_gpus, rank=rank)
77
+
78
+ train_loader = dataset_constructor.get_train_loader()
79
+ if rank == 0:
80
+ valid_loader = dataset_constructor.get_valid_loader()
81
+
82
+ net_g = SynthesizerTrn(hps).cuda(rank)
83
+ net_d = Discriminator(hps, hps.model.use_spectral_norm).cuda(rank)
84
+
85
+ optim_g = torch.optim.AdamW(
86
+ net_g.parameters(),
87
+ hps.train.learning_rate,
88
+ betas=hps.train.betas,
89
+ eps=hps.train.eps)
90
+ optim_d = torch.optim.AdamW(
91
+ net_d.parameters(),
92
+ hps.train.learning_rate,
93
+ betas=hps.train.betas,
94
+ eps=hps.train.eps)
95
+ net_g = DDP(net_g, device_ids=[rank], find_unused_parameters=True)
96
+ net_d = DDP(net_d, device_ids=[rank], find_unused_parameters=True)
97
+ try:
98
+ _, _, _, epoch_str = utils.load_checkpoint(utils.latest_checkpoint_path(hps.train.save_dir, "G_*.pth"), net_g,
99
+ optim_g)
100
+ _, _, _, epoch_str = utils.load_checkpoint(utils.latest_checkpoint_path(hps.train.save_dir, "D_*.pth"), net_d,
101
+ optim_d)
102
+ global_step = (epoch_str - 1) * len(train_loader)
103
+ except:
104
+ epoch_str = 1
105
+ global_step = 0
106
+
107
+ scheduler_g = torch.optim.lr_scheduler.ExponentialLR(optim_g, gamma=hps.train.lr_decay, last_epoch=epoch_str - 2)
108
+ scheduler_d = torch.optim.lr_scheduler.ExponentialLR(optim_d, gamma=hps.train.lr_decay, last_epoch=epoch_str - 2)
109
+
110
+ for epoch in range(epoch_str, hps.train.epochs + 1):
111
+ if rank == 0:
112
+ train_and_evaluate(rank, epoch, hps, [net_g, net_d], [optim_g, optim_d], [scheduler_g, scheduler_d],
113
+ [train_loader, valid_loader], logger, [writer, writer_eval])
114
+ else:
115
+ train_and_evaluate(rank, epoch, hps, [net_g, net_d], [optim_g, optim_d], [scheduler_g, scheduler_d],
116
+ [train_loader, None], None, None)
117
+ scheduler_g.step()
118
+ scheduler_d.step()
119
+
120
+
121
+ def cpurun(rank, n_gpus, hps):
122
+ global global_step
123
+ if rank == 0:
124
+ logger = utils.get_logger(hps.train.save_dir)
125
+ logger.info(hps.train)
126
+ logger.info(hps.data)
127
+ logger.info(hps.model)
128
+ utils.check_git_hash(hps.train.save_dir)
129
+ writer = SummaryWriter(log_dir=hps.train.save_dir)
130
+ writer_eval = SummaryWriter(log_dir=os.path.join(hps.train.save_dir, "eval"))
131
+ torch.manual_seed(hps.train.seed)
132
+ dataset_constructor = DatasetConstructor(hps, num_replicas=n_gpus, rank=rank)
133
+
134
+ train_loader = dataset_constructor.get_train_loader()
135
+ if rank == 0:
136
+ valid_loader = dataset_constructor.get_valid_loader()
137
+
138
+ net_g = SynthesizerTrn(hps)
139
+ net_d = Discriminator(hps, hps.model.use_spectral_norm)
140
+
141
+ optim_g = torch.optim.AdamW(
142
+ net_g.parameters(),
143
+ hps.train.learning_rate,
144
+ betas=hps.train.betas,
145
+ eps=hps.train.eps)
146
+ optim_d = torch.optim.AdamW(
147
+ net_d.parameters(),
148
+ hps.train.learning_rate,
149
+ betas=hps.train.betas,
150
+ eps=hps.train.eps)
151
+ try:
152
+ _, _, _, epoch_str = utils.load_checkpoint(utils.latest_checkpoint_path(hps.train.save_dir, "G_*.pth"), net_g,
153
+ optim_g)
154
+ _, _, _, epoch_str = utils.load_checkpoint(utils.latest_checkpoint_path(hps.train.save_dir, "D_*.pth"), net_g,
155
+ optim_g)
156
+ global_step = (epoch_str - 1) * len(train_loader)
157
+ except:
158
+ epoch_str = 1
159
+ global_step = 0
160
+
161
+ scheduler_g = torch.optim.lr_scheduler.ExponentialLR(optim_g, gamma=hps.train.lr_decay, last_epoch=epoch_str - 2)
162
+ scheduler_d = torch.optim.lr_scheduler.ExponentialLR(optim_d, gamma=hps.train.lr_decay, last_epoch=epoch_str - 2)
163
+
164
+ for epoch in range(epoch_str, hps.train.epochs + 1):
165
+ train_and_evaluate(rank, epoch, hps, [net_g, net_d], [optim_g, optim_d], [scheduler_g, scheduler_d],
166
+ [train_loader, valid_loader], logger, [writer, writer_eval])
167
+
168
+ scheduler_g.step()
169
+ scheduler_d.step()
170
+
171
+
172
+ def train_and_evaluate(rank, epoch, hps, nets, optims, schedulers, loaders, logger, writers):
173
+ net_g, net_d = nets
174
+ optim_g, optim_d = optims
175
+ scheduler_g, scheduler_d = schedulers
176
+ train_loader, eval_loader = loaders
177
+ if writers is not None:
178
+ writer, writer_eval = writers
179
+
180
+ train_loader.sampler.set_epoch(epoch)
181
+ global global_step
182
+
183
+ net_g.train()
184
+ net_d.train()
185
+ for batch_idx, data_dict in enumerate(train_loader):
186
+
187
+ phone = data_dict["phone"]
188
+ pitchid = data_dict["pitchid"]
189
+ dur = data_dict["dur"]
190
+ slur = data_dict["slur"]
191
+ gtdur = data_dict["gtdur"]
192
+ mel = data_dict["mel"]
193
+ f0 = data_dict["f0"]
194
+ wav = data_dict["wav"]
195
+ spkid = data_dict["spkid"]
196
+
197
+ phone_lengths = data_dict["phone_lengths"]
198
+ mel_lengths = data_dict["mel_lengths"]
199
+ wav_lengths = data_dict["wav_lengths"]
200
+ f0_lengths = data_dict["f0_lengths"]
201
+
202
+ # data
203
+ if (use_cuda):
204
+ phone, phone_lengths = phone.cuda(rank, non_blocking=True), phone_lengths.cuda(rank, non_blocking=True)
205
+ pitchid = pitchid.cuda(rank, non_blocking=True)
206
+ dur = dur.cuda(rank, non_blocking=True)
207
+ slur = slur.cuda(rank, non_blocking=True)
208
+ gtdur = gtdur.cuda(rank, non_blocking=True)
209
+ mel, mel_lengths = mel.cuda(rank, non_blocking=True), mel_lengths.cuda(rank, non_blocking=True)
210
+ wav, wav_lengths = wav.cuda(rank, non_blocking=True), wav_lengths.cuda(rank, non_blocking=True)
211
+ f0, f0_lengths = f0.cuda(rank, non_blocking=True), f0_lengths.cuda(rank, non_blocking=True)
212
+ spkid = spkid.cuda(rank, non_blocking=True)
213
+
214
+ # forward
215
+ y_hat, ids_slice, LF0, y_ddsp, kl_div, predict_mel, mask = net_g(phone, phone_lengths, pitchid, dur, slur,
216
+ gtdur, f0, mel, mel_lengths, spk_id=spkid)
217
+ y_ddsp = y_ddsp.unsqueeze(1)
218
+
219
+ # Discriminator
220
+ y = commons.slice_segments(wav, ids_slice * hps.data.hop_size, hps.train.segment_size) # slice
221
+ y_ddsp_mel = mel_spectrogram_torch(
222
+ y_ddsp.squeeze(1),
223
+ hps.data.n_fft,
224
+ hps.data.acoustic_dim,
225
+ hps.data.sample_rate,
226
+ hps.data.hop_size,
227
+ hps.data.win_size,
228
+ hps.data.fmin,
229
+ hps.data.fmax
230
+ )
231
+
232
+ y_logspec = torch.log(spectrogram_torch(
233
+ y.squeeze(1),
234
+ hps.data.n_fft,
235
+ hps.data.sample_rate,
236
+ hps.data.hop_size,
237
+ hps.data.win_size
238
+ ) + 1e-7)
239
+
240
+ y_ddsp_logspec = torch.log(spectrogram_torch(
241
+ y_ddsp.squeeze(1),
242
+ hps.data.n_fft,
243
+ hps.data.sample_rate,
244
+ hps.data.hop_size,
245
+ hps.data.win_size
246
+ ) + 1e-7)
247
+
248
+ y_mel = mel_spectrogram_torch(
249
+ y.squeeze(1),
250
+ hps.data.n_fft,
251
+ hps.data.acoustic_dim,
252
+ hps.data.sample_rate,
253
+ hps.data.hop_size,
254
+ hps.data.win_size,
255
+ hps.data.fmin,
256
+ hps.data.fmax
257
+ )
258
+ y_hat_mel = mel_spectrogram_torch(
259
+ y_hat.squeeze(1),
260
+ hps.data.n_fft,
261
+ hps.data.acoustic_dim,
262
+ hps.data.sample_rate,
263
+ hps.data.hop_size,
264
+ hps.data.win_size,
265
+ hps.data.fmin,
266
+ hps.data.fmax
267
+ )
268
+
269
+ y_d_hat_r, y_d_hat_g, _, _ = net_d(y, y_hat.detach())
270
+ loss_disc, losses_disc_r, losses_disc_g = discriminator_loss(y_d_hat_r, y_d_hat_g)
271
+ loss_disc_all = loss_disc
272
+
273
+ optim_d.zero_grad()
274
+ loss_disc_all.backward()
275
+ grad_norm_d = commons.clip_grad_value_(net_d.parameters(), None)
276
+ optim_d.step()
277
+
278
+ # loss
279
+ y_d_hat_r, y_d_hat_g, fmap_r, fmap_g = net_d(y, y_hat)
280
+
281
+ loss_mel = F.l1_loss(y_mel, y_hat_mel) * 45
282
+ loss_mel_dsp = F.l1_loss(y_mel, y_ddsp_mel) * 45
283
+ loss_spec_dsp = F.l1_loss(y_logspec, y_ddsp_logspec) * 45
284
+
285
+ loss_mel_am = F.mse_loss(mel * mask, predict_mel * mask) # * 10
286
+
287
+ loss_fm = feature_loss(fmap_r, fmap_g)
288
+ loss_gen, losses_gen = generator_loss(y_d_hat_g)
289
+
290
+ loss_fm = loss_fm / 2
291
+ loss_gen = loss_gen / 2
292
+ loss_gen_all = loss_gen + loss_fm + loss_mel + loss_mel_dsp + kl_div + loss_mel_am + loss_spec_dsp
293
+
294
+ loss_gen_all = loss_gen_all / hps.train.accumulation_steps
295
+
296
+ loss_gen_all.backward()
297
+ if ((global_step + 1) % hps.train.accumulation_steps == 0):
298
+ grad_norm_g = commons.clip_grad_value_(net_g.parameters(), None)
299
+ optim_g.step()
300
+ optim_g.zero_grad()
301
+
302
+ if rank == 0:
303
+ if (global_step + 1) % (hps.train.accumulation_steps * 10) == 0:
304
+ logger.info(["step&time", global_step, time.asctime(time.localtime(time.time()))])
305
+ logger.info(["mel&mel_dsp&spec_dsp: ", loss_mel, loss_mel_dsp, loss_spec_dsp])
306
+ logger.info(["adv&fm: ", loss_gen, loss_fm])
307
+ logger.info(["kl: ", kl_div])
308
+ logger.info(["am&dur: ", loss_mel_am])
309
+
310
+ if global_step % hps.train.log_interval == 0:
311
+ lr = optim_g.param_groups[0]['lr']
312
+ losses = [loss_gen_all, loss_mel]
313
+ logger.info('Train Epoch: {} [{:.0f}%]'.format(
314
+ epoch,
315
+ 100. * batch_idx / len(train_loader)))
316
+ logger.info([x.item() for x in losses] + [global_step, lr])
317
+
318
+ scalar_dict = {"loss/total": loss_gen_all,
319
+ "loss/mel": loss_mel,
320
+ "loss/adv": loss_gen,
321
+ "loss/fm": loss_fm,
322
+ "loss/mel_ddsp": loss_mel_dsp,
323
+ "loss/spec_ddsp": loss_spec_dsp,
324
+ "loss/mel_am": loss_mel_am,
325
+ "loss/kl_div": kl_div,
326
+ "learning_rate": lr}
327
+
328
+ utils.summarize(
329
+ writer=writer,
330
+ global_step=global_step,
331
+ scalars=scalar_dict)
332
+
333
+ if global_step % hps.train.eval_interval == 0:
334
+ logger.info(['All training params(G): ', utils.count_parameters(net_g), ' M'])
335
+ # print('Sub training params(G): ', \
336
+ # 'text_encoder: ', utils.count_parameters(net_g.module.text_encoder), ' M, ', \
337
+ # 'decoder: ', utils.count_parameters(net_g.module.decoder), ' M, ', \
338
+ # 'mel_decoder: ', utils.count_parameters(net_g.module.mel_decoder), ' M, ', \
339
+ # 'dec: ', utils.count_parameters(net_g.module.dec), ' M, ', \
340
+ # 'dec_harm: ', utils.count_parameters(net_g.module.dec_harm), ' M, ', \
341
+ # 'dec_noise: ', utils.count_parameters(net_g.module.dec_noise), ' M, ', \
342
+ # 'posterior: ', utils.count_parameters(net_g.module.posterior_encoder), ' M, ', \
343
+ # )
344
+
345
+ evaluate(hps, net_g, eval_loader, writer_eval)
346
+ utils.save_checkpoint(net_g, optim_g, hps.train.learning_rate, epoch,
347
+ os.path.join(hps.train.save_dir, "G_{}.pth".format(global_step)), hps.train.eval_interval)
348
+ utils.save_checkpoint(net_d, optim_d, hps.train.learning_rate, epoch,
349
+ os.path.join(hps.train.save_dir, "D_{}.pth".format(global_step)), hps.train.eval_interval)
350
+ net_g.train()
351
+ global_step += 1
352
+
353
+ if rank == 0:
354
+ logger.info('====> Epoch: {}'.format(epoch))
355
+
356
+
357
+ def evaluate(hps, generator, eval_loader, writer_eval):
358
+ generator.eval()
359
+ image_dict = {}
360
+ audio_dict = {}
361
+ with torch.no_grad():
362
+ for batch_idx, data_dict in enumerate(eval_loader):
363
+ if batch_idx == 4:
364
+ break
365
+ phone = data_dict["phone"]
366
+ pitchid = data_dict["pitchid"]
367
+ dur = data_dict["dur"]
368
+ slur = data_dict["slur"]
369
+ gtdur = data_dict["gtdur"]
370
+ mel = data_dict["mel"]
371
+ f0 = data_dict["f0"]
372
+ wav = data_dict["wav"]
373
+ spkid = data_dict["spkid"]
374
+
375
+ phone_lengths = data_dict["phone_lengths"]
376
+ mel_lengths = data_dict["mel_lengths"]
377
+ wav_lengths = data_dict["wav_lengths"]
378
+ f0_lengths = data_dict["f0_lengths"]
379
+
380
+ # data
381
+ if (use_cuda):
382
+ phone, phone_lengths = phone.cuda(0), phone_lengths.cuda(0)
383
+ pitchid = pitchid.cuda(0)
384
+ dur = dur.cuda(0)
385
+ slur = slur.cuda(0)
386
+ wav = wav.cuda(0)
387
+ mel = mel.cuda(0)
388
+ f0 = f0.cuda(0)
389
+ gtdur = gtdur.cuda(0)
390
+ spkid = spkid.cuda(0)
391
+ # remove else
392
+ phone = phone[:1]
393
+ phone_lengths = phone_lengths[:1]
394
+ pitchid = pitchid[:1]
395
+ dur = dur[:1]
396
+ slur = slur[:1]
397
+ wav = wav[:1]
398
+ mel = mel[:1]
399
+ f0 = f0[:1]
400
+ gtdur = gtdur[:1]
401
+ spkid = spkid[:1]
402
+
403
+ y_hat, y_harm, y_noise = generator.module.infer(phone, phone_lengths, pitchid, dur, slur, gtdur=gtdur, F0=f0,
404
+ spk_id=spkid)
405
+ spec = spectrogram_torch(
406
+ wav.squeeze(1),
407
+ hps.data.n_fft,
408
+ hps.data.sample_rate,
409
+ hps.data.hop_size,
410
+ hps.data.win_size
411
+ )
412
+
413
+ y_mel = mel_spectrogram_torch(
414
+ wav.squeeze(1),
415
+ hps.data.n_fft,
416
+ hps.data.acoustic_dim,
417
+ hps.data.sample_rate,
418
+ hps.data.hop_size,
419
+ hps.data.win_size,
420
+ hps.data.fmin,
421
+ hps.data.fmax
422
+ )
423
+ y_hat_mel = mel_spectrogram_torch(
424
+ y_hat.squeeze(1),
425
+ hps.data.n_fft,
426
+ hps.data.acoustic_dim,
427
+ hps.data.sample_rate,
428
+ hps.data.hop_size,
429
+ hps.data.win_size,
430
+ hps.data.fmin,
431
+ hps.data.fmax
432
+ )
433
+ image_dict.update({
434
+ f"gen/mel_{batch_idx}": utils.plot_spectrogram_to_numpy(y_hat_mel[0].cpu().numpy()),
435
+ })
436
+ audio_dict.update( {
437
+ f"gen/audio_{batch_idx}": y_hat[0, :, :],
438
+ f"gen/harm_{batch_idx}": y_harm[0, :, :],
439
+ "gen/noise": y_noise[0, :, :]
440
+ })
441
+ # if global_step == 0:
442
+ image_dict.update({f"gt/mel_{batch_idx}": utils.plot_spectrogram_to_numpy(mel[0].cpu().numpy())})
443
+ audio_dict.update({f"gt/audio_{batch_idx}": wav[0, :, :wav_lengths[0]]})
444
+
445
+ utils.summarize(
446
+ writer=writer_eval,
447
+ global_step=global_step,
448
+ images=image_dict,
449
+ audios=audio_dict,
450
+ audio_sampling_rate=hps.data.sample_rate
451
+ )
452
+ generator.train()
453
+
454
+
455
+ if __name__ == "__main__":
456
+ main()
infer/__init__.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+
3
+ import librosa
4
+ import numpy as np
5
+ import torch
6
+ import tqdm
7
+ from text import npu
8
+
9
+ def resize2d_f0(x, target_len):
10
+ source = np.array(x)
11
+ source[source < 0.001] = np.nan
12
+ target = np.interp(np.arange(0, len(source) * target_len, len(source)) / target_len, np.arange(0, len(source)),
13
+ source)
14
+ res = np.nan_to_num(target)
15
+ return res
16
+
17
+
18
+ def preprocess(ds):
19
+ note_list = ds["note_seq"]
20
+ midis = [librosa.note_to_midi(x.split("/")[0]) if x != 'rest' else 0
21
+ for x in note_list.split(" ")]
22
+ f0_seq = None
23
+ if ds["f0_seq"] is not None:
24
+ f0_seq = [float(i.strip()) for i in ds["f0_seq"].split(" ")]
25
+ f0_seq = np.array(f0_seq)
26
+ phseq = ds["ph_seq"].split(" ")
27
+ newphseq = []
28
+ for ph in phseq:
29
+ newphseq.append(npu.ttsing_phone_to_int[ph])
30
+ phseq = newphseq
31
+ phseq = np.array(phseq)
32
+ pitch = 440 * (2 ** ((np.array(midis) - 69) / 12))
33
+ durations = [float(i) for i in ds["ph_dur"].split(" ")]
34
+ accu_dur = 0
35
+ accu_durs = []
36
+ for dur in durations:
37
+ accu_dur += dur
38
+ accu_durs.append(accu_dur)
39
+ accu_durs = np.array(accu_durs)
40
+ accu_durs = (accu_durs * 44100 // 512).astype(int)
41
+ sub_durs = np.zeros_like(accu_durs)
42
+ sub_durs[1:accu_durs.shape[0]] = accu_durs[:accu_durs.shape[0]-1]
43
+ durations = accu_durs-sub_durs
44
+ f0_seq = resize2d_f0(f0_seq, sum(durations))
45
+ pos = 0
46
+ for i, d in enumerate(durations):
47
+ if phseq[i] == 0:
48
+ f0_seq[pos:pos + d] = 0
49
+ pos += d
50
+
51
+ return f0_seq,pitch, phseq, durations
52
+
53
+ if __name__ == '__main__':
54
+ inp = {
55
+ "text": "SP 啊 啊 啊 啊 啊 啊 啊 啊 啊 啊 啊 啊 SP 啊 啊 啊 啊 啊 啊 啊 啊 啊 啊 啊 SP 啊 啊 啊 啊 啊 啊 啊 啊 啊 啊 啊 啊 啊 啊 啊 啊 啊 啊 啊 啊 SP",
56
+ "ph_seq": "SP x ing z ou z ai w ei x ian b ian y van s i0 y i d e g uai d ao SP z i0 y ou d e t iao zh e zh ir j ian sh ang d e w u d ao SP q ing y ing d e x iang an y ing zh ong c ang f u d e b o s i0 m ao d eng d ai x ia y i g e m u u b iao SP",
57
+ "note_seq": "rest D5 D5 B4 B4 D5 D5 G5 G5 D5 D5 C5 C5 B4 B4 A#4 A#4 A4 A4 G4 G4 D4 D4 G4 G4 rest D5 D5 B4 B4 D5 D5 G5 G5 D5 D5 C5 C5 B4 B4 C5 C5 C5 C5 G5 G5 C5 C5 rest D5 D5 B4 B4 D5 D5 G5 G5 D5 C5 C5 B4 B4 A#4 A#4 A#4 A#4 A#4 A#4 A#4 A#4 A#4 A#4 G4 G4 D4 D4 G4 G4 F4 F4 G4 G4 A#4 A#4 C5 C5 C#5 D5 D5 rest",
58
+ "note_dur_seq": "0.6 0.136 0.136 0.137 0.137 0.545 0.545 0.546 0.546 0.2720001 0.2720001 0.273 0.273 0.273 0.273 0.2719998 0.2719998 0.546 0.546 0.5450001 0.5450001 0.2730002 0.2730002 0.4089999 0.4089999 0.1370001 0.1359997 0.1359997 0.1360002 0.1360002 0.546 0.546 0.5450001 0.5450001 0.2729998 0.2729998 0.2730002 0.2730002 0.2719998 0.2719998 0.546 0.546 0.2730002 0.2730002 0.5449996 0.5449996 0.6820002 0.6820002 0.1359997 0.1370001 0.1370001 0.1360006 0.1360006 0.5450001 0.5450001 0.5459995 0.5459995 0.2729998 0.2720003 0.2720003 0.2729998 0.2729998 0.3640003 0.3640003 0.1809998 0.1809998 0.3640003 0.3640003 0.1820002 0.1820002 0.3639994 0.3639994 0.1810007 0.1810007 0.3639994 0.3639994 0.1820002 0.1820002 0.4090004 0.4090004 0.4089994 0.4089994 0.2729998 0.2729998 0.2720003 0.2720003 0.5460005 0.8179989 0.8179989 0.5",
59
+ "is_slur_seq": "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0",
60
+ "ph_dur": "0.3875 0.2125 0.070091 0.065909 0.082455 0.054545 0.474545 0.070455 0.339182 0.206818 0.244727 0.027273 0.207091 0.065909 0.163909 0.109091 0.272 0 0.442591 0.103409 0.447273 0.097727 0.224137 0.048864 0.409 0.088136 0.048864 0.070091 0.065909 0.081455 0.054545 0.452818 0.093182 0.37 0.175 0.103682 0.169318 0.115046 0.157955 0.1845 0.0875 0.475545 0.070455 0.273 0 0.506363 0.038636 0.682 0.054182 0.081818 0.076773 0.060227 0.097364 0.038636 0.354091 0.190909 0.546 0.202545 0.070455 0.168591 0.103409 0.218454 0.054545 0.2765 0.0875 0.148045 0.032955 0.325364 0.038636 0.067227 0.114773 0.270818 0.093182 0.148046 0.032955 0.286727 0.077273 0.057 0.125 0.409 0 0.381727 0.027273 0.152545 0.120455 0.272 0.441653 0.104348 0.817999 0.5",
61
+ "f0_timestep": "0.005",
62
+ "f0_seq": "587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.2 587.0 586.9 586.7 586.1 585.4 584.8 584.1 583.4 582.9 582.5 582.3 582.5 582.9 583.4 584.1 584.9 585.5 586.1 586.7 587.0 587.3 587.6 587.9 588.0 588.1 588.4 588.7 588.7 588.7 588.0 586.4 584.1 580.8 575.8 568.7 560.8 552.0 540.9 531.0 522.2 513.8 506.6 501.7 497.9 495.0 493.8 493.0 492.6 492.6 492.7 492.7 492.7 492.7 492.7 492.5 492.6 493.2 494.1 495.6 498.7 502.5 507.6 515.5 523.9 532.9 543.2 553.7 562.4 570.3 577.2 581.7 584.6 586.9 588.2 588.7 588.7 588.6 588.3 588.1 588.0 587.8 587.5 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.2 586.9 586.7 587.0 587.0 587.0 587.0 587.5 588.7 590.8 594.1 599.0 607.7 617.7 630.6 647.9 667.1 686.3 706.4 727.1 743.0 755.2 765.1 773.3 778.6 781.6 783.4 784.4 784.4 784.4 784.4 784.7 784.7 784.3 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.1 784.5 784.9 784.4 784.4 784.4 784.4 783.8 782.3 779.9 775.1 768.7 759.5 747.9 731.5 712.9 694.2 674.0 652.5 636.1 622.4 610.1 601.9 596.0 591.8 589.1 587.8 587.0 587.0 587.0 587.0 586.8 586.8 587.1 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.6 587.9 588.0 588.1 588.5 589.1 589.4 589.4 589.1 588.4 586.8 584.5 581.2 575.9 570.6 564.1 556.0 548.8 542.3 536.2 531.1 527.3 524.8 522.6 521.9 521.5 521.4 521.6 521.9 522.4 522.6 522.6 522.9 523.2 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.6 523.9 524.1 524.4 524.8 525.4 525.8 526.0 526.2 525.7 524.9 523.3 521.1 518.6 515.3 511.3 507.6 504.0 499.9 497.3 495.0 493.1 492.0 491.4 491.1 491.4 491.6 492.1 492.6 492.9 493.2 493.4 493.7 493.9 493.9 493.9 493.9 493.9 493.9 493.9 493.9 493.9 493.9 493.9 493.9 493.9 493.9 493.9 493.9 493.9 493.9 493.9 493.9 493.9 494.1 494.3 494.5 494.8 495.1 495.6 496.1 496.4 496.6 496.5 495.8 494.7 493.2 491.0 487.9 484.7 481.2 477.3 473.8 470.9 468.4 466.2 464.8 464.1 463.6 463.7 463.9 464.2 464.7 465.1 465.4 465.6 465.8 466.1 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.4 466.7 466.9 467.2 467.5 468.0 468.4 468.6 468.9 468.3 467.6 466.4 464.4 462.0 459.3 456.0 452.2 449.0 446.0 443.1 441.0 439.5 438.5 437.9 437.5 437.7 437.9 438.4 438.8 439.1 439.3 439.6 439.8 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.0 440.3 440.5 440.5 440.7 441.0 441.4 441.5 441.5 441.3 440.6 439.1 437.0 434.2 430.6 426.3 420.5 415.3 410.1 404.6 400.5 397.2 394.5 392.6 391.4 390.9 390.6 390.6 390.8 391.1 391.4 391.5 391.6 391.8 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.2 392.4 392.3 392.2 392.2 392.2 392.1 391.5 390.6 388.6 385.6 381.6 375.9 368.3 360.1 351.0 339.3 329.8 321.3 313.1 306.8 302.4 298.9 296.3 294.9 294.1 293.7 293.5 293.5 293.5 293.5 293.4 293.5 293.6 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.6 293.5 293.4 293.5 293.5 293.5 293.5 293.7 294.3 295.4 297.0 299.5 303.8 308.9 315.3 323.9 333.6 343.2 353.2 363.5 371.5 377.6 382.5 386.6 389.3 390.8 391.7 392.2 392.2 392.2 392.2 392.4 392.3 392.1 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 391.8 391.8 391.6 391.3 390.9 390.5 390.1 389.5 389.1 388.9 389.1 389.6 390.3 391.2 392.8 394.5 397.0 400.6 405.3 411.1 419.5 431.0 443.7 458.9 479.8 497.9 515.2 532.6 546.7 557.1 565.4 571.7 575.6 577.8 579.1 580.0 580.4 580.8 581.5 582.7 582.9 583.5 584.4 585.1 585.6 586.2 586.8 587.0 587.3 587.7 588.0 588.0 588.2 588.5 588.7 588.7 588.5 587.7 586.3 583.3 579.0 573.7 567.1 558.7 548.3 538.6 529.1 519.2 511.5 505.6 500.7 496.9 494.8 493.6 492.7 492.5 492.6 492.7 492.7 492.7 492.7 492.7 492.5 492.7 493.3 494.5 496.5 499.4 503.7 510.1 517.2 525.5 536.3 546.3 555.5 564.6 572.6 578.1 582.6 585.6 587.3 588.3 588.7 588.7 588.6 588.3 588.0 588.0 587.7 587.4 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.1 586.8 586.8 587.0 587.0 587.0 587.0 587.8 589.1 591.4 595.5 601.9 609.7 619.7 636.1 652.5 670.9 692.6 712.9 730.2 745.9 759.5 768.7 775.1 779.9 782.3 783.8 784.4 784.4 784.4 784.4 784.8 784.5 784.1 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.2 784.6 784.7 784.4 784.4 784.4 784.4 783.5 781.7 778.6 773.8 766.5 755.2 743.0 727.1 706.4 686.3 667.1 649.2 632.8 617.7 607.7 600.4 594.3 590.8 588.9 587.6 587.0 587.0 587.0 587.0 586.7 586.9 587.2 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.7 588.0 588.0 588.2 588.7 589.1 589.4 589.3 589.0 588.2 586.1 583.4 579.6 574.8 569.0 561.3 554.4 547.5 540.1 534.7 530.2 526.6 524.1 522.5 521.7 521.4 521.4 521.6 522.1 522.5 522.6 522.7 523.0 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.4 523.7 523.9 524.2 524.5 525.0 525.6 525.9 526.1 526.1 525.5 524.5 522.9 520.7 517.6 514.2 510.6 506.6 502.6 499.4 496.7 494.2 492.7 491.9 491.3 491.2 491.4 491.7 492.3 492.7 493.0 493.2 493.5 493.7 493.9 493.9 493.9 493.9 493.9 493.9 493.9 493.9 493.9 493.9 493.9 493.9 493.9 493.9 493.9 493.9 493.9 493.9 493.9 493.9 493.9 493.6 493.4 493.1 492.9 492.6 492.1 491.6 491.3 491.1 491.5 492.2 493.3 495.2 497.8 500.6 504.0 508.4 512.0 515.6 518.9 521.6 523.6 524.9 525.8 526.3 526.0 525.8 525.3 524.8 524.4 524.1 523.8 523.6 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.5 523.8 524.0 524.4 525.0 525.5 526.0 526.9 527.4 527.7 527.8 527.5 527.0 526.4 525.5 524.5 523.5 522.4 521.3 520.4 519.7 519.2 518.7 518.7 519.0 519.5 520.2 520.8 521.4 521.9 522.4 522.6 522.9 523.2 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.2 522.9 522.6 523.3 523.3 523.5 523.9 524.8 526.3 529.0 533.6 539.5 548.4 560.5 577.8 598.5 620.8 646.5 675.9 700.1 720.9 741.4 755.4 765.4 773.0 778.1 781.1 782.6 783.5 783.9 784.0 784.5 784.7 784.3 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.1 784.5 784.9 784.1 784.0 783.7 783.1 782.0 780.0 775.7 770.5 762.0 748.5 731.9 712.5 688.4 660.8 635.5 611.2 586.0 569.0 555.3 543.8 535.9 531.0 527.6 525.2 524.2 523.7 523.3 523.3 522.9 522.7 523.0 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.2 522.9 522.9 522.7 522.2 521.7 521.1 520.5 519.8 519.3 519.0 519.0 519.2 519.7 520.5 521.5 522.7 524.5 526.0 528.0 530.9 534.3 538.4 543.6 549.7 555.5 561.5 568.0 572.4 575.9 578.8 580.8 581.9 582.6 582.6 582.6 582.3 582.0 581.9 582.3 582.7 583.1 583.8 584.6 585.2 585.8 586.4 586.8 587.1 587.4 587.7 588.0 588.0 588.3 588.6 588.7 588.7 588.3 587.3 585.1 582.6 578.1 572.6 564.6 555.5 546.3 536.3 525.5 517.2 510.1 503.7 499.4 496.5 494.5 493.3 492.7 492.5 492.7 492.7 492.7 492.7 492.7 492.6 492.5 492.9 493.6 494.8 497.3 501.0 505.6 511.5 519.2 529.1 538.6 548.3 558.7 567.1 573.7 579.4 583.6 586.0 587.7 588.7 588.7 588.7 588.5 588.2 588.0 587.9 587.6 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.0 586.7 586.9 587.0 587.0 587.0 587.2 588.0 589.4 592.4 597.0 603.3 612.5 625.1 639.3 655.8 678.6 698.1 716.6 735.3 750.3 761.4 770.3 777.1 780.8 782.7 784.0 784.4 784.4 784.4 784.5 784.8 784.4 784.1 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.0 784.4 784.7 784.7 784.4 784.4 784.4 784.4 783.1 781.2 777.8 771.8 763.2 752.8 739.1 720.2 702.1 682.4 663.3 643.9 627.9 615.7 605.9 598.0 593.1 590.3 588.3 587.3 587.0 587.0 587.0 586.9 586.7 587.0 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.4 587.7 588.0 588.0 588.3 588.8 589.2 589.4 589.3 588.8 587.6 585.6 582.8 578.7 573.1 566.7 559.9 552.7 544.8 538.6 533.7 528.8 525.8 523.7 522.3 521.6 521.4 521.4 521.7 522.2 522.5 522.6 522.8 523.0 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.5 523.7 524.0 524.3 524.6 525.2 525.7 525.9 526.2 525.9 525.2 524.2 522.4 519.7 516.9 513.5 509.2 505.4 501.9 498.6 495.9 493.9 492.5 491.6 491.1 491.2 491.5 491.9 492.4 492.8 493.1 493.3 493.6 493.8 493.9 493.9 493.9 493.9 493.9 493.9 493.9 493.9 493.9 493.9 493.9 493.9 493.9 493.9 493.9 493.9 493.9 493.9 493.9 493.9 493.9 494.2 494.5 494.7 494.9 495.4 495.9 496.3 496.5 496.6 496.2 495.5 493.9 491.9 489.5 486.4 482.6 479.1 475.7 471.9 469.4 467.2 465.5 464.4 463.8 463.5 463.8 464.0 464.5 465.0 465.3 465.5 465.7 465.9 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.3 466.5 466.8 467.1 467.5 468.0 468.5 469.1 469.7 470.0 470.2 470.1 469.7 469.2 468.5 467.6 466.7 465.7 464.7 463.9 463.2 462.7 462.2 462.1 462.3 462.7 463.2 463.9 464.4 464.8 465.3 465.5 465.8 466.0 466.2 466.2 466.2 466.4 466.7 466.9 467.3 467.8 468.3 468.9 469.5 469.9 470.2 470.2 469.9 469.4 468.7 468.0 467.0 466.0 465.1 464.2 463.4 462.9 462.4 462.1 462.2 462.5 462.9 463.6 464.2 464.6 465.1 465.5 465.7 465.9 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.3 466.5 466.7 467.0 467.4 467.9 468.4 469.0 469.6 470.0 470.2 470.2 469.7 469.2 468.6 467.7 466.7 465.8 464.9 463.9 463.2 462.8 462.3 462.1 462.3 462.6 463.1 463.8 464.3 464.7 465.2 465.5 465.8 466.0 466.2 466.2 466.2 466.4 466.7 466.9 467.3 467.8 468.2 468.8 469.5 469.9 470.2 470.2 469.9 469.4 468.9 468.1 467.1 466.2 465.3 464.2 463.5 462.9 462.5 462.1 462.2 462.4 462.9 463.6 464.1 464.6 465.1 465.4 465.7 465.9 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.3 466.5 466.7 466.7 466.9 467.2 467.2 467.2 467.0 466.1 464.4 462.4 458.9 454.5 448.1 440.9 433.6 425.7 417.1 410.5 404.8 399.8 396.4 394.0 392.4 391.5 391.1 391.1 391.1 391.3 391.5 391.5 391.7 391.8 392.0 392.0 392.0 392.2 392.4 392.3 392.2 392.2 392.2 392.1 391.5 390.6 388.6 385.6 381.6 375.9 368.3 360.1 351.0 339.3 329.8 321.3 313.1 306.8 302.4 298.9 296.3 294.9 294.1 293.7 293.5 293.5 293.5 293.5 293.4 293.5 293.6 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.6 293.5 293.3 293.5 293.5 293.5 293.5 293.7 294.2 295.1 296.6 299.0 303.0 307.8 313.9 322.0 331.7 341.2 351.0 362.0 369.9 376.4 382.2 386.3 388.9 390.6 391.7 392.2 392.2 392.2 392.2 392.3 392.4 392.2 392.2 392.0 392.2 392.2 392.4 392.4 392.5 392.8 393.2 393.4 393.4 393.2 392.7 391.5 389.7 387.4 384.2 380.0 375.6 370.9 366.3 361.5 357.5 354.4 351.9 350.0 348.8 348.3 348.1 348.0 348.1 348.4 348.7 348.8 348.8 349.0 349.2 349.2 349.2 349.2 349.2 349.2 349.2 349.2 349.2 349.2 349.2 349.2 349.2 349.2 349.2 349.2 349.2 349.2 349.2 349.2 349.2 349.2 349.2 349.2 349.2 349.2 349.2 349.2 349.2 349.2 349.2 349.2 349.2 349.2 349.2 349.2 349.2 349.2 349.2 349.2 349.2 349.2 349.2 349.2 349.2 349.2 349.2 349.2 349.2 349.0 348.9 348.8 348.7 348.4 348.2 348.0 348.1 348.2 348.7 350.0 351.6 353.9 356.8 360.5 365.4 370.0 374.7 379.8 383.6 386.8 389.5 391.4 392.6 393.1 393.4 393.4 393.2 392.9 392.6 392.4 392.4 392.2 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 392.0 391.7 391.5 391.5 391.4 391.2 391.1 391.1 391.2 392.0 393.1 394.8 398.1 402.1 407.1 413.7 421.4 429.0 436.7 445.1 451.4 456.3 460.7 463.6 465.4 466.6 467.2 467.2 467.2 467.0 466.8 466.7 466.6 466.4 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.2 466.0 465.8 465.6 465.6 465.3 464.8 464.6 464.6 464.7 465.2 466.4 468.4 470.9 474.6 479.8 485.4 491.4 498.4 504.8 510.2 514.9 519.2 521.7 523.5 524.6 525.0 525.1 525.0 524.6 524.1 523.9 523.9 523.6 523.4 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.3 523.2 522.9 522.7 522.4 522.1 521.7 521.2 520.7 520.5 520.2 520.8 521.6 522.9 525.2 528.0 531.1 534.9 539.4 543.3 546.9 550.5 553.1 555.0 556.3 557.1 557.5 557.3 557.0 556.4 555.9 555.5 555.2 554.9 554.6 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.4 554.0 553.8 553.5 553.2 552.8 552.2 551.7 551.4 551.2 551.7 552.6 554.0 556.1 558.8 562.0 566.5 570.6 574.7 579.3 582.7 585.5 587.8 589.4 590.2 590.7 590.4 590.1 589.6 589.0 588.5 588.2 587.9 587.6 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.3 587.1 587.0 586.9 586.4 585.9 585.3 584.6 583.8 583.2 582.7 582.4 582.4 582.7 583.0 583.6 584.5 585.1 585.7 586.3 586.8 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0 587.0",
63
+ "input_type": "phoneme",
64
+ "offset": 72.491
65
+ }
66
+ res = preprocess(inp)
67
+ print(res)
68
+ print([float(i) for i in res[0]])
69
+
70
+ def cross_fade(a: np.ndarray, b: np.ndarray, idx: int):
71
+ result = np.zeros(idx + b.shape[0])
72
+ fade_len = a.shape[0] - idx
73
+ np.copyto(dst=result[:idx], src=a[:idx])
74
+ k = np.linspace(0, 1.0, num=fade_len, endpoint=True)
75
+ result[idx: a.shape[0]] = (1 - k) * a[idx:] + k * b[: fade_len]
76
+ np.copyto(dst=result[a.shape[0]:], src=b[fade_len:])
77
+ return result
78
+
79
+
80
+ def infer_ds(model, hps, ds, speaker, trans):
81
+
82
+ sample_rate = 44100
83
+
84
+ result = np.zeros(0)
85
+ current_length = 0
86
+ for inp in tqdm.tqdm(ds):
87
+ spkid = hps.data.spk2id[speaker]
88
+ f0_seq, pitch, phseq, durations = preprocess(inp)
89
+
90
+ f0 = torch.FloatTensor(f0_seq).unsqueeze(0)
91
+
92
+ text_norm = torch.LongTensor(phseq)
93
+ x_tst = text_norm.unsqueeze(0)
94
+ x_tst_lengths = torch.LongTensor([text_norm.size(0)])
95
+ spk = torch.LongTensor([spkid])
96
+ manual_f0 = torch.FloatTensor(f0).unsqueeze(0)
97
+ manual_dur = torch.LongTensor(durations).unsqueeze(0)
98
+ t1 = time.time()
99
+ infer_res = model.infer(x_tst, x_tst_lengths, None, None,
100
+ None, gtdur=manual_dur, spk_id=spk,
101
+ F0=manual_f0 * 2 ** (trans / 12))
102
+ seg_audio = infer_res[0][0, 0].data.float().numpy()
103
+ try:
104
+ offset_ = inp['offset']
105
+ except:
106
+ offset_ = 0
107
+ silent_length = round(offset_ * sample_rate) - current_length
108
+ if silent_length >= 0:
109
+ result = np.append(result, np.zeros(silent_length))
110
+ result = np.append(result, seg_audio)
111
+ else:
112
+ result = cross_fade(result, seg_audio, current_length + silent_length)
113
+ current_length = current_length + silent_length + seg_audio.shape[0]
114
+ print("infer time:", time.time() - t1)
115
+ return result
116
+
117
+
118
+
119
+
120
+ #
121
+ # midis = [librosa.note_to_midi(x.split("/")[0]) if x != 'rest' else 0
122
+ # for x in note_lst]
infer/share.ds ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "text": "SP 清 晨 SP",
4
+ "ph_seq": "SP q ing ch en SP",
5
+ "note_seq": "rest D4 D4 G4 G4 rest",
6
+ "note_dur_seq": "0.6 0.273 0.273 0.4089999 0.4089999 0.4",
7
+ "is_slur_seq": "0 0 0 0 0 0",
8
+ "ph_dur": "0.469318 0.130682 0.120727 0.152273 0.409 0.4",
9
+ "f0_timestep": "0.005",
10
+ "f0_seq": "301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 301.9 302.0 302.4 301.9 301.4 300.5 299.4 299.0 298.3 297.9 297.6 297.2 297.2 297.0 296.8 296.9 296.7 296.6 296.8 296.9 296.9 297.4 297.6 297.7 298.2 298.5 298.3 298.6 298.7 298.5 298.6 298.3 297.8 296.4 293.9 291.5 286.7 283.2 279.6 278.5 283.4 288.4 293.5 298.6 303.9 309.3 314.7 320.3 325.9 331.7 337.5 343.5 349.5 355.7 362.0 368.3 374.8 381.5 387.1 388.7 391.3 393.6 396.1 397.7 398.7 399.3 399.6 399.8 399.4 399.0 398.6 397.9 397.7 397.1 396.7 396.1 396.0 395.4 395.6 395.7 395.9 395.9 396.1 396.4 396.8 397.0 397.3 397.5 397.5 397.5 397.7 397.7 397.7 397.7 397.9 397.7 397.7 397.7 397.7 397.7 397.7 397.5 397.5 397.2 397.0 397.0 396.7 396.6 396.6 396.5 396.3 396.3 396.1 396.1 396.3 396.3 396.1 396.3 396.3 396.4 396.6 396.7 396.6 396.9 397.2 396.8 397.4 397.9 398.0 398.5 399.1 399.1 399.1 399.0 398.7 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2 398.2",
11
+ "input_type": "phoneme",
12
+ "offset": 16.582
13
+ },
14
+ {
15
+ "text": "SP 阳 光 攀 过 窗 沿 隙 缝 SP 温 暖 了 我 的 被 窝 搅 醒 没 做 完 的 好 梦 SP 起 身 SP 手 忙 脚 乱 匆 匆 出 门 SP 怕 赶 不 上 整 点 开 走 的 巴 士 一 路 狂 奔 SP 铃 声 SP 唤 不 动 我 疲 惫 眼 神 SP 怪 自 己 昨 夜 睡 的 太 沉 闹 钟 不 够 大 声 SP",
16
+ "ph_seq": "SP y ang g uang p an g uo ch uang y En x i f eng SP w en n uan l e w o d e b ei w o j iao x ing m ei z uo w an d e h ao m eng SP q i sh en SP sh ou m ang j iao l uan c ong c ong ch u m en SP p a g an b u sh ang zh eng d ian k ai z ou d e b a sh ir y i l u k uang b en SP l ing sh eng SP h uan b u d ong w o p i b ei y En sh en SP g uai z i0 j i z uo y E sh ui d e t ai ch en n ao zh ong b u g ou d a sh eng SP",
17
+ "note_seq": "rest G4 G4 G4 G4 G4 G4 G4 G4 F#4 F#4 G4 G4 A4 A4 G4 G4 rest F#4 F#4 F#4 F#4 F#4 F#4 E4 E4 E4 E4 E4 E4 E4 E4 E4 E4 E4 E4 D4 D4 E4 E4 G4 G4 D4 D4 D4 D4 D4 D4 rest D4 D4 G4 G4 rest G4 G4 G4 G4 G4 G4 G4 G4 F#4 F#4 G4 G4 A4 A4 G4 G4 rest F#4 F#4 F#4 F#4 F#4 F#4 E4 E4 E4 E4 E4 E4 E4 E4 E4 E4 E4 E4 D4 D4 E4 E4 G4 G4 A4 A4 G4 G4 F#4 F#4 rest D4 D4 G4 G4 rest G4 G4 G4 G4 G4 G4 G4 G4 F#4 F#4 G4 G4 A4 A4 G4 G4 rest F#4 F#4 F#4 F#4 F#4 F#4 E4 E4 E4 E4 E4 E4 E4 E4 E4 E4 E4 E4 D4 D4 E4 E4 G4 G4 D4 D4 D4 D4 D4 D4 rest",
18
+ "note_dur_seq": "0.327 0.545 0.545 0.273 0.273 0.273 0.273 0.2720001 0.2720001 0.273 0.273 0.273 0.273 0.273 0.273 0.4089999 0.4089999 0.1359999 0.273 0.273 0.2720001 0.2720001 0.273 0.273 0.273 0.273 0.2730002 0.2730002 0.2719998 0.2719998 0.2729998 0.2729998 0.2730002 0.2730002 0.2729998 0.2729998 0.2720003 0.2720003 0.2729998 0.2729998 0.4089999 0.4089999 0.4090004 0.4090004 0.2729998 0.2729998 0.5450001 0.5450001 0.2729998 0.2730002 0.2730002 0.5450001 0.5450001 0.2729998 0.5450001 0.5450001 0.2729998 0.2729998 0.2730007 0.2730007 0.2729998 0.2729998 0.2719994 0.2719994 0.2730007 0.2730007 0.2729998 0.2729998 0.4090004 0.4090004 0.1359997 0.2729998 0.2729998 0.2729998 0.2729998 0.2720003 0.2720003 0.2729998 0.2729998 0.2730007 0.2730007 0.2729998 0.2729998 0.2719994 0.2719994 0.2730007 0.2730007 0.2729998 0.2729998 0.2729998 0.2729998 0.2720003 0.2720003 0.4089994 0.4089994 0.4090004 0.4090004 0.2729998 0.2729998 0.5459995 0.5459995 0.2720013 0.2729988 0.2729988 0.5460014 0.5460014 0.2719994 0.5459995 0.5459995 0.2720013 0.2720013 0.2729988 0.2729988 0.2730007 0.2730007 0.2729988 0.2729988 0.2720013 0.2720013 0.2729988 0.2729988 0.2730007 0.2730007 0.2730007 0.2719994 0.2719994 0.2730007 0.2730007 0.2729988 0.2729988 0.2720013 0.2720013 0.2729988 0.2729988 0.2730007 0.2730007 0.2729988 0.2729988 0.2720013 0.2720013 0.2729988 0.2729988 0.2730007 0.2730007 0.2730007 0.2730007 0.4089985 0.4089985 0.4090004 0.4090004 0.2720013 0.2720013 0.4099998 0.4099998 0.081",
19
+ "is_slur_seq": "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0",
20
+ "ph_dur": "0.245182 0.081818 0.490454 0.054546 0.202546 0.070454 0.218454 0.054546 0.151545 0.120455 0.202546 0.070454 0.136636 0.136364 0.103683 0.169317 0.409 0.087136 0.048864 0.1855 0.0875 0.178818 0.093182 0.1855 0.0875 0.234363 0.038637 0.247999 0.025002 0.228817 0.043182 0.148 0.125 0.082092 0.190908 0.218453 0.054546 0.151546 0.120455 0.229817 0.043182 0.348773 0.060226 0.245363 0.163637 0.202546 0.070454 0.545 0.175273 0.097727 0.070728 0.202272 0.545 0.109363 0.163637 0.496136 0.048864 0.1855 0.0875 0.207092 0.065908 0.169591 0.103409 0.146999 0.125 0.136637 0.136364 0.115045 0.157955 0.409 0.087135 0.048864 0.245728 0.027271 0.234365 0.038635 0.119729 0.152271 0.196862 0.076138 0.247999 0.025002 0.185498 0.087502 0.195862 0.076138 0.234365 0.038635 0.234365 0.038635 0.115045 0.157955 0.168591 0.103409 0.343089 0.06591 0.327183 0.081818 0.218453 0.054546 0.546 0.206091 0.06591 0.10368 0.169319 0.546001 0.190182 0.081818 0.497137 0.048862 0.233366 0.038635 0.202545 0.070454 0.224138 0.048862 0.218452 0.054546 0.217455 0.054546 0.120728 0.152271 0.273001 0.240046 0.032955 0.217453 0.054546 0.202547 0.070454 0.179817 0.093182 0.24473 0.027271 0.130955 0.142044 0.234365 0.038635 0.147999 0.125 0.114046 0.157955 0.196861 0.076138 0.218454 0.054546 0.202547 0.070454 0.278318 0.13068 0.376045 0.032955 0.086775 0.185226 0.41 0.081",
21
+ "f0_timestep": "0.005",
22
+ "f0_seq": "371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.9 371.5 372.2 373.2 374.9 376.0 378.0 378.4 379.3 378.6 378.6 378.0 377.3 374.9 374.5 373.1 373.2 373.6 374.3 373.6 374.5 374.3 375.2 376.7 377.6 378.1 380.4 380.8 382.8 385.5 387.2 389.1 391.3 392.7 394.3 395.4 396.8 397.2 397.9 398.4 398.6 398.6 399.2 399.3 399.3 399.5 399.3 399.1 399.3 399.3 398.9 399.1 399.1 398.8 399.1 399.1 398.6 398.8 399.1 398.6 398.6 398.6 398.2 397.9 397.7 397.5 397.5 397.7 397.2 397.2 397.0 397.0 397.0 397.2 397.2 397.2 397.2 397.2 397.0 397.2 397.2 396.8 397.0 397.2 396.6 396.7 397.0 396.2 395.6 395.0 393.6 390.4 388.0 383.9 380.2 377.8 378.3 379.5 380.7 381.9 383.1 384.3 385.5 386.7 388.0 389.2 390.4 391.7 392.9 394.2 395.4 396.6 397.9 398.2 397.9 398.3 398.2 398.0 397.9 397.7 397.7 397.2 397.4 397.5 396.8 397.0 397.0 396.8 397.0 397.2 397.2 397.5 397.7 397.7 398.2 398.2 398.6 398.6 398.6 398.2 398.0 396.8 395.6 395.0 393.4 392.0 390.4 388.7 385.7 383.0 380.8 377.6 375.6 375.3 375.6 375.8 376.0 376.3 376.5 376.7 376.9 377.2 377.4 377.6 377.9 378.1 378.3 378.6 378.8 379.0 379.3 381.5 384.3 387.3 390.9 393.3 395.2 397.3 398.6 398.6 399.0 399.1 398.5 398.2 398.2 397.8 397.5 397.3 397.2 397.0 397.0 396.6 396.4 395.9 395.4 395.0 395.0 395.0 395.2 395.9 396.2 396.6 397.3 397.7 397.2 397.2 396.6 395.2 392.7 390.0 386.8 382.2 379.2 376.0 374.3 376.8 379.3 381.8 384.3 386.9 389.4 392.0 394.6 397.2 398.2 398.0 398.4 398.4 398.2 398.4 398.6 398.4 398.4 398.6 398.4 398.4 398.6 398.4 398.4 398.4 398.4 398.2 398.2 398.2 397.9 397.9 398.2 398.0 397.7 397.5 397.3 395.2 393.7 389.3 383.9 378.4 371.9 367.7 363.4 362.4 362.7 362.9 363.2 363.5 363.8 364.0 364.3 364.6 364.9 365.1 365.4 365.7 366.0 366.3 366.5 366.8 367.1 367.4 367.7 368.1 370.7 372.6 374.4 375.4 376.0 376.2 376.0 376.2 376.2 375.6 375.8 375.8 375.6 375.8 375.8 375.6 375.8 375.8 375.6 376.0 376.2 376.2 376.4 376.7 376.7 376.9 376.5 376.6 376.7 376.2 376.4 376.2 375.7 375.8 375.6 374.9 374.5 375.4 375.6 375.8 376.4 376.5 376.1 376.7 377.1 377.1 378.4 379.5 381.5 383.9 386.5 390.0 392.2 394.3 395.4 396.2 397.2 397.6 397.9 397.9 398.3 398.4 398.2 398.2 398.2 398.0 398.2 398.0 397.9 398.2 398.0 397.7 397.9 397.9 397.5 397.5 397.5 397.1 397.0 396.6 396.0 394.3 391.8 387.9 384.2 382.1 379.5 382.2 384.8 387.5 390.3 393.0 395.7 398.5 401.3 404.1 406.9 409.8 412.7 415.5 418.4 421.4 424.3 427.3 430.3 433.3 436.3 439.4 441.0 441.0 441.7 442.8 443.2 443.3 443.6 444.0 444.1 443.9 444.1 444.1 444.1 444.3 444.3 444.6 444.6 444.6 444.9 444.9 444.6 445.1 444.9 444.9 445.3 444.6 444.3 443.5 441.0 439.7 435.7 431.4 427.7 422.3 418.7 415.3 414.6 413.8 413.1 412.3 411.6 410.9 410.1 409.4 408.7 407.9 407.2 406.5 405.7 405.0 404.3 403.6 402.9 402.1 401.4 400.7 400.2 399.7 398.6 398.2 398.2 397.9 397.9 397.7 397.9 397.9 397.5 397.7 397.5 397.3 397.5 397.5 397.1 397.2 397.2 397.0 397.2 397.2 397.0 397.0 397.2 397.0 397.4 397.7 397.5 397.7 397.9 397.7 397.9 398.1 397.9 397.9 397.9 397.9 397.9 397.9 397.9 397.7 397.7 397.9 397.9 397.9 398.1 398.4 398.4 398.8 399.5 399.5 400.5 401.2 401.4 402.3 402.9 403.0 402.8 402.8 402.1 401.2 400.3 399.4 398.5 397.6 396.7 395.8 394.9 394.1 393.2 392.3 391.4 390.5 389.7 388.8 387.9 387.1 386.2 385.3 384.5 383.6 382.7 381.9 381.0 380.2 379.3 378.5 377.6 376.8 375.9 375.1 374.3 373.4 372.6 371.7 370.9 370.1 369.3 368.4 367.6 366.8 366.0 365.8 366.6 367.4 368.5 369.1 370.2 371.1 371.7 372.2 372.8 373.3 373.4 373.4 373.6 373.2 373.4 373.4 373.2 373.0 373.2 372.9 373.0 373.2 373.2 373.0 373.2 373.0 372.8 372.8 372.1 372.0 372.1 371.3 371.3 371.9 371.6 371.7 372.1 371.6 371.7 372.1 371.3 371.3 371.5 370.6 370.5 370.9 370.2 370.0 370.6 370.3 370.0 370.9 370.9 370.9 371.4 372.1 373.0 373.7 374.9 375.1 376.0 376.0 376.0 376.2 376.4 376.0 376.0 375.8 375.6 375.4 375.2 374.9 374.9 374.9 374.9 374.9 374.9 374.9 374.9 374.8 374.7 374.5 374.5 374.3 374.1 374.1 373.9 374.0 374.3 374.1 374.5 374.5 374.3 374.5 374.5 374.3 374.5 374.3 373.6 373.6 373.2 372.6 372.7 372.6 372.4 372.8 373.0 373.2 373.9 374.6 374.9 375.4 367.2 368.1 369.3 370.9 371.5 371.5 371.7 371.9 371.7 371.9 371.9 371.9 372.3 372.4 372.1 372.5 372.6 372.6 372.8 373.0 373.0 373.0 373.2 373.2 373.2 373.2 373.4 373.4 373.2 372.6 372.0 371.5 370.2 368.1 366.6 364.5 360.5 358.4 354.7 351.3 349.1 346.2 342.2 337.3 334.0 332.4 330.8 330.3 329.8 329.8 329.3 329.8 329.7 330.0 329.6 331.5 333.5 334.3 335.0 335.2 335.4 335.2 335.4 335.4 335.2 335.0 334.8 334.6 334.2 334.2 334.1 333.8 333.8 334.0 333.8 334.0 334.2 334.2 334.4 334.4 334.6 334.6 334.6 334.6 334.6 334.5 334.4 333.9 333.3 332.7 329.8 326.2 321.4 314.7 308.1 304.4 300.0 298.1 300.0 302.8 305.6 308.5 311.3 314.3 317.2 320.1 323.1 326.2 329.2 332.3 335.4 338.5 338.7 338.1 337.1 336.8 336.0 335.6 335.2 334.8 334.5 334.2 334.1 333.8 333.8 333.8 334.0 334.0 334.0 334.2 334.2 334.2 334.2 334.2 334.0 334.0 334.0 333.9 333.3 333.3 332.9 330.4 329.0 324.9 320.2 315.8 310.9 308.7 306.0 307.9 309.8 311.8 313.7 315.7 317.7 319.7 321.7 323.8 325.8 327.8 329.9 332.0 334.1 336.2 338.3 337.8 337.5 336.9 335.8 335.6 335.3 334.6 334.6 334.6 334.0 334.2 334.0 333.7 333.7 333.7 333.2 333.1 333.1 332.7 332.7 332.8 332.7 332.8 333.1 333.1 333.1 333.7 333.5 333.5 333.7 333.3 332.9 332.9 332.7 332.3 332.5 332.3 332.0 331.9 331.9 331.6 331.2 331.0 330.6 330.2 330.0 329.8 329.7 329.8 330.0 330.3 331.0 331.6 332.9 333.8 334.6 335.4 335.7 336.0 336.0 336.0 336.2 336.0 335.6 335.8 335.5 334.8 335.0 335.0 334.6 334.8 334.8 334.6 334.6 334.6 334.3 334.0 334.2 333.7 333.5 333.3 331.0 329.7 325.1 319.7 314.2 307.4 303.6 299.1 297.9 299.9 301.9 304.0 306.0 308.1 310.1 312.2 314.3 316.4 318.5 320.7 322.8 325.0 327.2 329.4 331.6 333.8 336.1 338.3 339.1 338.8 338.7 338.7 338.3 338.1 337.8 336.9 336.6 336.0 336.0 335.5 335.4 335.4 335.2 335.2 335.4 335.0 335.2 335.4 335.4 335.4 335.6 335.6 335.8 335.8 335.0 334.0 332.9 329.8 327.2 323.7 320.1 317.8 318.1 319.0 319.9 320.9 321.9 322.8 323.8 324.7 325.7 326.7 327.6 328.6 329.6 330.6 331.6 332.5 333.5 334.5 335.5 336.5 337.5 337.1 337.1 336.7 336.4 336.9 336.4 335.9 335.6 335.1 334.4 334.0 333.7 333.5 333.3 332.9 332.7 332.4 331.9 331.6 331.5 331.7 331.7 332.1 332.3 332.9 332.9 333.0 333.3 333.1 332.9 332.7 332.5 332.3 332.0 331.7 331.5 331.1 330.6 330.4 329.6 326.6 324.6 321.2 318.7 315.7 312.6 310.6 307.9 305.7 303.8 301.9 300.3 299.5 298.8 298.5 297.8 299.1 299.7 299.8 299.5 299.3 299.2 299.3 299.3 299.1 299.0 299.1 298.8 298.7 298.8 298.6 298.5 298.5 298.5 298.1 298.3 298.1 297.9 298.1 298.1 297.9 297.6 295.9 294.3 290.1 285.6 281.5 276.9 273.9 271.6 270.9 274.0 277.2 280.5 283.7 287.0 290.4 293.8 297.2 300.7 304.2 307.8 311.4 315.0 318.7 322.4 326.2 330.0 333.8 337.7 338.9 338.4 338.3 338.0 337.3 337.3 337.0 336.4 336.4 336.0 335.6 335.6 335.0 334.8 335.0 334.8 334.5 334.6 334.3 334.0 334.2 334.1 333.8 333.8 333.7 333.7 333.7 333.7 333.5 333.5 333.3 333.3 333.1 333.1 332.7 332.1 332.0 331.0 329.7 328.7 326.8 328.2 330.8 332.4 333.7 336.0 337.9 340.1 342.4 345.8 348.8 352.0 357.8 362.0 363.2 364.5 365.4 365.5 366.4 367.2 367.0 366.8 367.9 367.5 368.5 369.8 370.1 370.6 372.4 373.0 374.9 376.9 378.9 379.6 382.8 383.5 385.1 388.2 390.0 391.8 394.0 394.8 395.9 396.4 397.0 397.2 397.2 396.6 396.4 395.9 395.2 394.4 394.3 393.3 392.4 392.0 391.1 389.5 388.8 387.3 385.3 383.8 381.9 379.9 376.7 374.3 371.8 367.2 365.1 362.5 359.9 357.2 354.6 351.9 349.3 346.8 344.2 341.7 339.1 336.6 334.2 331.7 329.2 326.8 324.4 322.0 319.6 317.3 314.9 312.6 310.3 308.0 305.7 303.5 301.2 302.1 301.9 301.2 300.5 299.7 299.1 298.7 298.3 298.0 297.8 297.6 297.6 297.4 297.6 297.4 297.6 297.6 297.6 297.6 297.6 297.6 297.6 297.6 297.6 297.8 297.8 297.8 297.8 297.8 297.8 297.8 297.8 297.8 297.8 297.6 297.8 297.8 297.8 297.8 297.9 298.1 298.1 298.5 298.6 298.6 299.0 299.0 298.3 298.4 297.4 295.5 294.1 291.6 289.4 287.0 285.6 286.0 286.4 286.8 287.2 287.6 288.0 288.4 288.8 289.2 289.6 290.0 290.4 290.8 291.2 291.6 292.0 292.4 292.9 293.3 293.7 294.1 294.5 294.9 295.3 295.7 296.6 297.9 298.4 299.1 299.7 300.0 300.0 300.0 300.0 299.5 299.1 298.8 298.4 298.1 298.1 297.8 297.4 297.3 297.1 297.1 297.1 296.9 297.0 296.9 297.1 297.2 297.6 297.4 297.4 297.4 297.3 297.4 297.2 297.1 297.2 296.8 296.4 296.0 295.2 294.3 293.8 292.8 292.4 292.1 291.3 291.2 291.0 290.7 290.3 290.6 290.8 291.0 291.0 291.8 292.3 293.4 295.5 296.9 298.1 299.1 299.1 298.8 298.5 298.5 298.3 298.1 298.1 298.0 297.8 297.8 297.8 297.6 297.6 297.4 297.4 297.3 297.2 297.2 297.1 296.9 297.0 296.9 296.7 296.9 296.7 296.7 296.9 296.9 297.1 297.2 297.2 297.2 297.2 297.2 297.4 297.6 297.6 297.6 297.4 297.4 297.6 297.6 297.6 297.6 297.6 297.4 297.6 297.4 297.2 297.6 297.4 297.6 297.6 297.6 297.4 297.6 297.6 297.6 297.6 297.6 297.6 297.8 297.8 297.8 297.8 297.6 297.4 297.4 297.2 297.1 297.1 297.1 296.9 296.9 296.9 296.8 296.9 296.9 296.9 297.1 297.2 297.2 297.4 297.8 297.8 298.1 298.5 298.6 298.8 299.1 299.4 299.7 299.8 300.0 300.0 299.7 299.3 298.8 298.3 298.1 298.1 298.1 298.1 298.1 298.1 298.1 298.1 298.1 298.1 298.1 298.0 298.0 298.0 298.0 298.0 298.0 298.0 298.0 298.0 298.0 298.0 298.0 298.0 298.0 298.0 298.0 298.0 297.9 297.9 297.9 297.9 297.9 297.9 297.9 297.9 297.9 297.9 297.9 297.9 297.9 297.9 297.9 297.9 297.9 297.9 297.8 297.8 297.8 297.8 297.8 297.8 297.8 297.8 297.8 297.8 297.8 297.8 297.8 297.8 297.8 297.6 297.8 297.8 297.4 298.1 297.8 297.4 297.6 297.6 297.4 297.6 297.4 297.2 297.4 297.4 297.4 297.6 297.4 297.8 297.8 297.8 298.1 298.1 298.1 298.3 298.0 297.2 296.0 293.8 290.8 288.6 285.1 284.0 286.7 290.4 294.1 297.9 301.7 305.6 309.5 313.4 317.4 321.5 325.6 329.8 334.0 338.3 342.6 347.0 351.4 355.9 360.5 365.1 369.8 368.3 367.9 368.5 369.9 370.6 371.9 371.9 373.0 373.2 374.1 374.7 375.4 375.6 375.8 375.6 376.5 376.6 377.8 378.1 379.1 379.7 381.0 382.4 384.2 385.7 387.5 388.6 390.4 391.2 392.7 393.4 393.7 394.3 395.4 395.6 396.3 396.7 396.8 396.8 396.8 396.8 396.8 396.8 396.8 396.8 397.0 397.0 396.8 397.0 397.0 397.0 397.0 397.0 396.8 396.8 396.6 396.6 396.6 396.3 396.3 396.6 396.4 396.3 396.5 396.6 396.3 396.7 396.6 396.6 396.8 396.6 396.6 396.8 396.6 396.3 396.3 396.3 396.1 396.1 396.1 396.1 396.1 396.3 396.3 396.6 396.8 396.8 397.0 397.2 397.5 397.5 397.9 398.2 398.0 398.6 398.6 398.6 399.5 399.5 399.5 400.7 400.7 400.5 401.0 400.7 400.3 400.0 399.6 399.2 398.9 398.5 398.2 397.8 397.4 397.1 396.7 396.4 396.0 395.6 395.3 394.9 394.6 394.2 393.8 393.5 393.1 392.8 392.4 392.1 391.7 391.4 391.0 390.6 390.3 389.9 389.6 389.2 388.9 388.5 388.2 387.8 387.5 387.1 386.8 386.4 386.1 385.7 385.4 385.0 384.7 384.3 384.0 383.6 383.3 382.9 382.6 382.2 381.9 381.5 381.2 380.8 380.5 380.2 379.8 380.8 380.6 380.4 379.7 380.3 379.7 380.0 379.0 378.9 377.8 378.0 377.8 378.1 377.6 377.6 377.1 377.3 377.7 378.6 379.1 380.6 381.5 383.0 385.3 387.5 389.7 391.8 392.9 394.0 394.6 395.6 395.9 396.2 396.3 396.5 396.6 396.8 396.8 396.8 396.8 396.8 396.8 396.8 396.6 396.8 396.8 396.6 396.7 396.8 396.6 396.8 396.8 396.6 396.8 396.6 396.6 396.8 396.6 396.6 396.7 396.6 396.8 396.8 396.6 396.7 397.0 396.8 397.0 397.2 397.2 397.5 397.7 397.7 397.7 397.7 397.7 397.5 397.7 397.5 397.5 397.7 397.7 397.5 397.5 397.5 397.0 396.3 396.1 394.3 393.1 391.3 389.1 387.6 385.7 384.4 383.9 383.5 383.3 384.4 384.6 384.6 386.4 386.9 387.5 388.8 390.6 392.4 394.2 396.3 396.9 397.5 397.7 397.7 397.7 397.7 397.5 397.5 397.5 397.5 397.5 397.5 397.5 397.7 397.5 397.5 397.2 397.0 396.8 396.3 395.9 395.0 394.7 393.8 392.2 391.3 389.3 386.8 385.0 381.7 379.0 375.8 373.4 372.5 373.4 374.8 376.1 377.5 378.8 380.2 381.6 383.0 384.3 385.7 387.1 388.5 389.9 391.3 392.7 394.2 395.6 397.0 397.2 397.4 397.7 397.7 397.7 397.7 397.7 397.5 397.2 397.0 396.6 396.6 396.1 396.1 395.9 395.6 395.6 395.9 395.7 395.6 395.9 395.9 395.6 395.8 396.1 396.1 396.3 396.6 396.4 396.6 396.8 396.8 396.8 397.2 397.2 397.2 397.4 397.5 397.5 397.2 397.2 397.0 396.3 395.9 395.0 394.3 393.3 392.4 392.1 391.5 390.9 391.2 391.5 391.9 393.1 394.3 395.2 396.1 397.5 397.7 397.9 397.9 397.9 397.7 397.5 397.5 397.1 397.0 396.8 396.8 396.8 397.0 397.0 397.0 397.2 397.0 396.8 396.6 396.4 395.4 394.0 393.8 392.7 391.3 391.3 390.0 388.7 387.5 385.8 383.0 380.6 379.0 376.7 375.4 374.1 372.9 371.6 370.3 369.1 367.8 366.6 365.3 364.1 362.9 361.6 360.4 359.2 358.0 356.7 355.5 356.6 359.7 363.4 366.9 370.2 372.4 373.8 374.7 375.4 375.8 375.8 375.8 375.8 376.0 376.2 376.5 376.6 376.9 377.1 377.3 377.3 377.3 377.3 376.9 376.2 375.8 375.6 374.3 373.4 373.0 370.4 369.0 367.0 364.4 360.3 357.0 354.9 352.9 353.0 354.9 357.2 359.5 361.8 364.1 366.4 368.8 371.1 373.5 375.9 378.3 380.7 383.2 385.6 388.1 389.5 392.7 393.8 396.6 397.9 398.8 399.8 400.2 400.7 400.5 400.5 400.2 400.0 399.8 399.8 399.8 399.8 399.6 399.1 398.4 397.7 397.2 396.9 396.1 395.5 395.0 394.3 393.1 391.1 389.8 387.0 383.7 381.5 378.4 376.4 377.8 380.9 384.1 387.3 390.6 393.9 397.2 400.5 403.8 407.2 410.6 414.1 417.5 421.0 424.6 428.1 431.7 435.3 439.0 439.6 442.5 443.1 444.3 445.6 445.6 445.4 445.6 445.4 444.9 444.6 444.3 443.8 444.0 444.1 444.3 444.6 444.9 445.1 445.6 445.8 446.1 446.4 446.4 446.7 446.7 446.4 446.1 445.9 444.9 444.4 442.3 440.5 438.4 434.7 430.7 425.0 421.1 418.5 415.3 412.2 410.3 407.7 404.8 402.3 399.6 395.6 395.0 393.1 390.6 391.0 391.3 392.0 392.8 395.6 396.4 397.2 397.5 397.9 397.7 397.7 397.5 397.2 397.0 396.8 396.6 396.3 396.1 395.9 395.6 395.6 395.6 395.9 395.9 395.9 395.9 396.1 395.9 396.1 396.3 396.3 396.6 396.8 396.8 397.0 397.2 397.2 397.2 397.4 397.5 397.5 397.7 397.7 397.7 397.7 397.5 397.5 397.5 397.5 397.2 397.4 397.5 397.2 397.4 397.5 397.5 397.7 398.2 398.2 398.6 399.4 399.8 400.2 400.8 400.7 399.8 400.2 399.3 399.1 398.2 397.2 396.2 395.1 394.1 393.1 392.1 391.0 390.0 389.0 388.0 387.0 386.0 385.0 384.0 383.0 382.0 381.0 380.0 379.0 378.0 377.0 376.0 375.0 374.1 373.1 372.1 371.1 370.2 369.2 368.2 367.3 366.3 365.4 364.4 363.5 362.5 361.6 360.6 359.7 358.8 357.8 356.9 356.0 355.0 354.1 355.7 359.0 362.0 365.1 369.1 370.7 373.9 375.2 375.2 375.8 376.2 375.4 375.6 375.4 374.5 373.8 373.2 372.1 372.0 372.1 372.3 372.8 373.9 374.0 375.2 375.9 376.0 376.0 376.0 374.7 374.1 373.2 370.0 367.7 363.2 359.7 354.5 350.8 348.8 349.7 351.3 353.0 354.6 356.3 357.9 359.6 361.3 363.0 364.7 366.4 368.1 369.8 371.5 371.7 371.9 371.9 371.5 371.3 371.1 370.9 370.9 371.3 371.7 372.2 372.8 373.0 373.2 373.6 373.6 373.9 374.0 374.1 374.3 374.5 374.3 373.8 373.0 372.6 371.9 371.7 371.9 371.7 371.7 372.2 372.1 372.0 372.1 371.9 370.9 369.1 367.4 364.3 361.8 359.2 356.2 353.9 352.1 350.8 353.5 356.2 358.8 361.5 364.3 367.0 369.8 372.6 375.4 378.2 378.8 379.1 378.4 377.3 377.3 377.2 376.7 376.7 376.9 376.5 376.1 376.2 375.6 375.6 375.8 375.4 375.4 375.6 375.4 375.4 375.6 375.4 375.8 375.8 375.4 375.4 374.7 374.0 371.3 367.7 363.4 357.2 353.4 349.4 348.4 348.1 347.8 347.4 347.1 346.8 346.4 346.1 345.8 345.4 345.1 344.8 344.5 344.1 343.8 343.5 343.1 342.8 342.5 342.2 341.8 342.0 341.3 340.9 339.9 339.1 338.3 337.1 336.4 335.8 335.4 335.1 335.2 335.0 335.0 335.0 335.0 334.8 334.6 334.8 334.6 334.3 334.2 333.7 333.1 332.7 332.2 331.3 331.5 331.5 331.0 331.0 330.4 329.8 328.5 325.3 322.6 317.3 313.4 309.2 306.8 309.0 311.1 313.3 315.5 317.7 319.9 322.2 324.4 326.7 329.0 331.2 333.6 335.9 337.3 337.5 337.2 336.9 336.6 336.6 336.2 335.8 335.4 334.9 334.6 334.4 334.1 334.2 334.2 334.0 334.2 334.4 334.2 334.2 334.4 334.0 333.9 333.8 333.4 333.5 333.3 332.8 332.5 332.3 331.7 331.7 331.6 331.0 331.1 330.8 330.4 330.4 328.9 327.4 324.7 321.2 317.9 314.0 311.2 308.3 306.7 309.2 311.8 314.4 317.0 319.7 322.4 325.1 327.8 330.5 333.3 333.7 334.2 334.7 335.0 335.0 335.5 335.6 335.4 335.6 335.6 335.4 335.4 335.4 335.4 335.2 335.0 334.8 334.5 334.2 333.8 333.7 333.1 332.3 332.3 331.3 331.8 331.9 332.1 332.3 331.7 331.2 330.2 327.4 324.7 320.4 316.9 312.4 310.1 310.1 310.6 311.1 311.6 312.1 312.5 313.0 313.5 314.0 314.5 315.0 315.5 316.0 316.5 317.0 317.5 318.0 320.5 323.8 327.0 330.0 332.3 334.4 335.6 335.9 336.6 336.6 336.2 336.4 336.0 335.8 335.6 335.3 334.8 334.8 334.8 334.4 334.6 334.6 334.5 335.0 334.6 334.0 332.7 329.2 324.5 318.0 312.9 306.7 303.6 301.2 303.1 304.9 306.8 308.7 310.6 312.5 314.5 316.4 318.3 320.3 322.3 324.3 326.3 328.3 330.3 332.3 334.4 336.4 338.5 339.7 339.1 338.6 337.7 337.1 336.4 336.2 335.8 335.2 335.4 335.0 334.8 334.8 334.6 334.4 334.6 334.4 334.6 334.8 334.6 334.8 335.0 334.8 334.8 334.8 334.8 334.8 334.6 334.4 334.1 333.7 333.3 332.0 330.6 328.4 324.0 319.7 315.4 310.1 305.4 302.8 300.7 299.8 302.6 306.4 310.2 314.1 318.0 321.9 325.9 330.0 334.1 338.3 340.1 339.6 338.5 336.9 336.3 335.2 334.9 334.4 334.0 333.9 333.8 333.7 333.7 333.7 333.8 333.8 334.0 334.2 334.2 334.2 334.4 334.2 334.2 334.2 333.7 333.5 333.0 330.4 328.3 325.6 319.9 315.6 309.2 304.4 300.4 296.6 296.9 297.3 297.6 298.0 298.3 298.7 299.0 299.4 299.7 300.1 300.5 300.8 301.2 301.5 301.9 302.3 302.6 303.0 301.5 301.6 301.0 300.4 300.0 299.4 298.6 298.8 298.3 297.9 298.1 297.8 297.6 297.6 297.4 297.4 297.6 297.4 297.4 297.6 297.4 297.4 297.4 297.2 297.2 297.4 297.6 298.0 298.6 299.0 299.0 299.1 298.3 296.4 294.2 291.2 286.8 284.3 281.7 284.1 286.6 289.1 291.6 294.1 296.6 299.2 301.8 304.4 307.0 309.7 312.4 315.1 317.8 320.5 323.3 326.1 328.9 331.8 334.6 337.5 337.9 338.9 339.1 338.5 338.3 338.1 337.9 337.9 337.1 336.6 335.9 335.6 335.3 335.0 334.6 334.6 334.2 334.2 334.2 334.2 334.2 334.0 333.8 333.1 331.9 331.1 328.5 326.8 324.9 322.3 320.4 319.0 320.7 322.5 324.4 326.2 328.0 329.9 331.7 333.6 335.5 337.3 339.2 341.1 343.1 345.0 346.9 348.9 350.8 357.2 363.1 371.9 374.3 376.9 380.2 382.2 385.3 387.7 390.0 390.6 391.8 392.7 393.2 394.0 394.7 394.9 395.0 395.1 395.4 395.9 396.2 396.8 397.0 397.5 397.7 397.9 397.9 398.1 398.2 398.2 398.0 398.2 397.9 397.9 397.9 397.9 397.9 397.9 397.9 397.9 397.7 397.9 397.9 397.5 397.2 397.2 397.0 396.6 396.8 396.8 396.4 396.1 396.1 395.4 395.4 395.4 395.0 394.3 394.3 393.6 393.6 393.8 394.2 393.8 395.9 396.4 396.9 397.4 397.9 398.4 397.4 396.8 395.6 394.7 395.4 395.6 394.7 395.5 395.6 396.6 398.3 402.1 405.8 407.9 407.9 409.1 410.3 411.3 412.2 413.9 413.9 414.3 414.5 414.8 414.6 415.0 414.1 414.3 414.1 414.8 415.6 417.5 418.3 420.6 422.6 425.0 428.2 431.4 433.4 435.7 437.0 439.2 440.3 442.0 442.8 443.0 443.1 443.5 443.6 443.8 444.5 444.9 444.9 445.4 445.4 445.2 445.4 444.5 443.3 440.8 438.4 435.2 431.4 428.7 425.7 424.6 423.5 422.4 421.2 420.1 419.0 417.9 416.8 415.6 414.5 413.4 412.3 411.2 410.1 409.0 408.0 406.9 405.8 404.7 403.6 402.5 401.5 400.4 399.3 398.3 397.2 396.1 395.1 394.0 394.7 396.2 397.0 398.2 398.8 399.1 399.1 398.8 398.8 398.4 398.2 398.0 397.9 397.7 397.7 397.7 397.5 397.7 397.7 397.2 397.0 396.6 395.2 394.8 393.1 392.0 391.1 389.3 387.6 384.8 381.9 379.2 375.4 372.1 369.6 370.1 370.6 371.1 371.6 372.1 372.6 373.2 373.7 374.2 374.7 375.2 375.7 376.3 376.8 377.3 377.8 378.3 378.9 377.6 377.1 377.3 376.6 375.8 375.6 375.2 374.7 374.9 374.7 374.7 374.7 374.7 374.4 374.5 374.5 374.5 374.5 374.5 374.3 374.3 374.3 374.3 374.1 373.9 373.9 373.6 373.6 373.6 373.6 373.9 374.3 374.5 374.7 374.9 375.1 375.4 375.6 375.6 375.6 375.8 375.8 375.8 375.8 375.8 375.8 375.8 375.8 375.8 375.8 375.8 375.8 375.8 375.8 375.8 375.8 375.8 375.8 375.8 375.8 375.8 375.8 375.8 375.8 375.8 375.8 376.0 375.8 376.0 376.0 375.8 376.0 375.8 375.8 375.6 375.6 375.2 375.2 375.2 374.9 374.7 374.7 374.5 374.5 374.7 374.7 374.9 375.2 375.2 375.2 375.6 375.6 375.6 376.0 376.0 376.0 376.7 376.7 377.0 377.8 377.8 378.2 378.6 378.5 377.8 378.0 376.9 376.0 374.0 371.9 369.9 367.9 365.9 363.9 361.9 360.0 358.0 356.0 354.1 352.2 350.3 348.3 346.5 344.6 342.7 340.8 339.0 337.1 335.3 333.5 331.6 329.8 328.0 326.3 324.5 322.7 320.9 319.2 317.5 315.7 314.0 312.3 310.6 308.9 307.2 305.6 303.9 302.2 300.6 299.0 297.3 295.7 294.1 292.5 290.9 289.3 287.7 286.2 284.6 283.1 281.5 283.2 285.6 287.6 290.6 292.5 295.0 296.2 296.9 296.9 297.0 297.4 297.2 297.5 297.8 297.6 297.8 298.1 297.8 297.8 298.1 297.6 297.8 297.6 297.4 297.1 297.2 296.9 297.2 297.9 297.9 298.8 299.1 298.9 298.1 297.0 295.0 291.6 289.5 286.6 285.0 287.6 291.3 295.0 298.8 302.6 306.5 310.5 314.4 318.5 322.6 326.7 330.9 335.2 339.5 343.8 348.3 352.7 357.3 361.9 366.5 366.4 366.2 366.3 368.5 368.9 370.2 371.3 372.2 372.4 374.1 374.5 374.7 374.7 374.7 373.8 373.9 373.9 374.2 374.9 376.0 376.7 378.2 379.5 381.9 383.4 385.7 387.3 388.6 390.0 391.5 392.4 393.4 393.9 395.0 395.3 396.6 396.8 397.2 397.2 397.2 397.2 397.2 397.2 397.2 397.2 397.2 397.2 397.4 397.2 397.4 397.5 397.5 397.5 397.7 397.5 397.7 397.9 397.7 397.7 397.9 397.9 397.9 398.1 398.2 398.0 397.9 397.9 397.7 397.7 397.7 397.5 397.5 397.3 397.2 397.0 397.0 397.0 396.8 396.8 396.8 396.3 396.3 396.3 395.9 396.1 396.3 396.3 396.5 397.2 397.4 397.7 398.6 398.6 398.8 399.5 399.3 399.8 400.5 401.4 402.2 403.7 404.6 404.6 405.6 404.8 404.1 403.4 402.7 401.9 401.2 400.5 399.7 399.0 398.3 397.6 396.8 396.1 395.4 394.7 394.0 393.3 392.5 391.8 391.1 390.4 389.7 389.0 388.3 387.6 386.9 386.2 385.5 384.8 384.1 383.4 382.7 382.0 381.3 380.6 379.9 379.2 378.6 377.9 377.2 376.5 375.8 375.1 374.5 373.8 373.1 372.4 371.7 371.1 370.4 369.7 369.1 368.4 367.7 367.1 366.4 365.7 365.1 364.4 364.3 365.5 367.4 368.5 368.7 369.8 369.6 368.9 368.5 368.3 367.2 366.9 366.2 365.3 364.1 364.1 363.4 363.8 364.9 365.6 366.0 367.9 368.5 370.4 372.6 374.9 376.4 379.5 380.8 382.1 384.8 386.4 387.5 389.7 390.5 392.2 393.2 395.2 395.9 396.6 396.8 397.0 396.8 396.8 397.0 396.8 396.8 397.0 396.8 396.8 397.0 396.8 397.0 397.2 397.2 397.5 397.7 397.7 397.9 398.1 398.2 398.2 398.2 398.2 398.0 397.7 397.7 397.0 396.3 395.9 395.2 394.6 394.3 393.8 393.2 393.6 393.0 392.7 393.1 393.3 392.7 393.4 393.4 392.7 393.1 392.9 391.8 390.4 389.3 387.0 384.4 382.3 379.5 377.6 379.0 380.5 381.9 383.4 384.9 386.4 387.9 389.4 390.9 392.4 393.9 395.4 396.9 398.5 400.0 400.2 400.5 399.9 399.1 399.5 399.7 399.3 399.3 399.5 398.8 398.7 398.8 398.5 398.4 398.6 398.4 398.2 398.3 398.2 398.2 398.3 398.2 398.2 398.2 397.9 397.7 397.7 397.5 397.0 396.8 396.6 395.9 394.5 392.7 389.3 385.4 379.1 374.9 368.5 364.5 367.2 370.0 372.8 375.6 378.4 381.2 384.1 387.0 389.9 392.8 395.7 398.7 401.7 402.3 401.6 401.4 400.0 399.2 398.8 398.4 398.0 397.9 398.1 397.9 397.9 398.1 398.4 398.4 398.6 398.6 398.4 398.6 398.6 398.4 398.6 398.6 398.4 398.4 398.4 398.2 398.3 398.2 398.0 398.2 397.9 397.9 398.2 398.0 397.9 397.7 397.3 397.2 397.0 396.3 396.6 396.2 395.4 395.9 396.1 395.9 397.0 397.7 397.9 398.8 399.3 399.5 399.5 399.7 399.5 399.5 399.5 399.3 399.3 399.1 398.8 398.7 398.4 398.2 397.7 397.5 397.5 397.2 397.0 397.2 397.0 397.2 397.2 397.2 397.2 397.5 397.3 397.2 397.2 396.6 395.9 395.5 393.6 390.8 387.3 382.6 378.6 373.4 371.1 370.8 371.0 371.2 371.5 371.7 371.9 372.1 372.3 372.5 372.7 373.0 373.2 373.4 373.6 373.8 374.0 374.3 374.5 374.7 374.9 375.1 375.3 375.5 375.6 375.8 375.8 375.4 375.9 375.4 375.2 375.6 375.2 375.2 375.4 374.8 374.7 374.7 374.5 374.5 374.7 374.5 374.7 374.7 374.7 374.9 374.9 374.7 374.0 372.4 370.7 366.6 363.0 358.7 353.9 350.2 349.0 351.4 353.9 356.3 358.8 361.3 363.8 366.3 368.8 371.4 373.9 376.5 379.1 381.7 384.4 387.0 389.7 392.4 395.1 397.8 399.1 399.3 399.1 398.8 398.6 398.8 398.6 398.6 399.1 398.9 398.6 398.8 398.7 398.2 398.3 398.2 397.9 397.9 397.7 397.5 397.5 397.5 397.3 397.2 397.2 396.8 397.0 397.0 396.8 397.0 397.0 397.0 397.2 397.2 397.0 397.2 397.0 396.6 396.3 395.4 394.0 392.2 393.2 394.7 395.1 395.6 396.8 397.7 398.8 401.0 404.4 407.9 411.7 417.7 424.0 426.2 428.7 431.4 434.7 436.2 438.7 440.3 440.9 441.5 442.2 442.8 443.3 443.5 444.1 444.3 444.3 444.9 445.1 444.9 445.1 445.1 444.3 444.6 444.1 443.3 442.0 440.8 438.6 434.7 432.4 429.0 426.0 425.0 423.9 422.8 421.7 420.7 419.6 418.5 417.5 416.4 415.4 414.3 413.2 412.2 411.1 410.1 409.1 408.0 407.0 405.9 404.9 403.9 402.9 401.8 400.7 400.0 399.6 399.5 399.7 399.3 399.5 399.3 398.4 398.4 397.9 397.2 397.4 397.0 396.8 396.8 396.8 396.4 396.8 396.6 396.6 396.8 396.6 396.6 396.7 396.8 396.6 396.7 396.8 396.6 396.8 396.8 396.6 396.8 396.6 396.3 396.6 396.6 396.3 396.5 397.0 397.0 397.0 397.2 396.8 396.3 396.3 395.6 395.2 393.8 393.4 393.1 392.9 392.7 392.4 392.2 391.9 391.7 391.5 391.2 391.0 390.7 390.5 390.2 390.0 389.8 389.5 389.3 389.0 388.8 388.6 388.3 388.1 387.8 387.6 387.4 387.1 386.9 386.6 386.4 386.2 385.9 385.7 385.5 385.2 385.0 384.7 384.5 384.3 384.0 383.8 383.6 383.3 383.1 382.8 382.6 382.4 382.1 381.9 381.7 381.4 381.2 381.0 380.7 380.5 380.3 380.0 379.8 379.3 378.8 378.0 378.2 378.2 378.0 377.8 377.8 377.8 377.2 376.9 376.7 376.0 376.0 375.8 375.6 375.6 375.4 375.4 375.4 375.4 375.4 375.4 375.2 375.2 375.2 375.0 374.3 373.0 371.6 367.2 362.8 355.5 350.4 346.4 343.2 344.7 346.1 347.6 349.1 350.5 352.0 353.5 355.0 356.5 358.0 359.5 361.0 362.6 364.1 365.6 367.2 368.7 370.3 371.9 373.4 374.7 375.2 375.4 374.8 375.2 375.8 375.4 375.2 375.5 374.9 374.8 375.2 374.5 374.5 374.7 374.5 374.3 374.5 374.3 374.3 374.3 373.9 373.6 372.7 371.3 369.1 364.7 360.3 355.1 347.4 341.1 336.4 331.7 329.2 330.4 332.6 334.8 337.1 339.3 341.6 343.9 346.2 348.5 350.8 353.1 355.5 357.8 360.2 362.6 365.0 367.5 369.9 372.4 374.9 375.4 375.2 375.2 374.7 375.1 375.6 375.6 376.0 376.2 376.1 376.2 376.4 376.0 376.2 376.2 376.0 376.2 376.2 376.0 376.2 376.0 375.8 376.0 375.4 374.5 373.0 368.7 364.1 357.2 351.6 343.8 339.9 334.2 332.1 332.3 332.5 332.7 333.0 333.2 333.4 333.6 333.8 334.0 334.2 334.4 334.7 334.9 335.1 335.3 335.5 335.7 335.9 336.1 336.4 336.8 337.1 337.0 336.6 336.6 336.6 336.6 336.4 336.6 336.2 336.0 336.0 335.6 335.6 335.6 335.6 335.2 335.4 335.4 335.2 335.2 335.4 335.0 335.0 334.8 334.8 334.6 334.5 334.4 334.4 334.3 334.4 334.6 334.8 335.0 335.5 335.4 335.4 335.6 334.6 333.8 333.1 331.8 329.8 329.4 328.8 328.1 328.7 329.4 329.8 330.8 331.9 332.7 333.8 334.6 335.1 335.6 335.7 336.2 336.4 336.2 336.4 336.2 336.0 335.8 335.5 335.2 335.2 335.0 335.0 334.8 334.8 334.6 334.8 334.8 334.8 335.0 335.0 334.8 334.6 334.1 333.7 331.9 328.7 324.3 320.8 317.1 315.7 316.4 317.5 318.6 319.8 320.9 322.0 323.2 324.3 325.5 326.7 327.8 329.0 330.2 331.3 332.5 333.7 334.9 336.1 337.3 338.5 339.7 340.1 339.7 340.1 339.6 339.5 339.3 338.8 337.9 337.3 336.4 336.0 335.5 335.6 335.8 335.8 335.8 336.1 335.8 335.8 336.0 336.0 335.8 336.0 335.8 336.2 336.2 336.2 336.2 336.2 336.0 335.8 335.6 335.0 334.8 332.7 331.0 326.8 320.8 314.7 310.4 306.5 305.0 306.8 309.5 312.2 314.9 317.6 320.3 323.1 325.9 328.7 331.6 334.5 337.3 340.3 339.3 338.6 337.9 337.1 336.4 335.8 335.6 335.2 335.2 335.0 335.0 335.0 334.8 334.6 335.0 334.6 334.8 334.8 334.5 334.4 334.8 334.5 334.4 334.6 334.0 333.8 333.8 332.3 331.3 327.5 322.8 318.4 312.2 308.6 309.1 309.6 310.1 310.5 311.0 311.5 312.0 312.4 312.9 313.4 313.9 314.4 314.9 315.3 315.8 316.3 316.8 317.3 317.8 318.3 318.8 320.8 323.3 326.2 329.4 331.7 333.1 334.8 335.8 336.1 336.6 336.4 336.2 336.2 336.0 335.8 335.4 335.2 335.0 334.8 334.8 334.8 334.8 334.6 334.5 333.8 333.3 330.9 327.5 322.1 317.1 310.9 307.2 302.6 304.0 305.3 306.7 308.1 309.4 310.8 312.2 313.6 315.0 316.4 317.8 319.2 320.7 322.1 323.5 325.0 326.4 327.9 329.4 330.8 332.3 333.7 335.4 336.5 338.3 339.0 339.5 339.5 339.0 338.5 337.9 337.1 336.6 336.2 336.0 335.8 335.6 335.4 335.4 335.0 335.0 334.8 334.4 334.1 333.7 332.9 332.1 331.9 331.1 331.0 331.0 330.8 330.4 330.7 330.6 330.4 330.4 330.0 329.7 329.4 329.0 328.7 325.5 322.8 319.5 317.5 314.4 312.0 310.5 307.9 305.6 304.0 302.1 300.6 298.3 299.3 299.7 299.8 299.5 299.0 298.5 298.8 298.5 298.5 298.5 298.3 297.9 298.1 298.0 297.6 297.7 297.6 297.6 297.7 297.6 298.1 298.1 298.3 298.4 298.5 298.6 298.5 298.5 298.2 295.9 294.7 291.0 286.8 282.4 277.5 274.2 271.5 270.1 273.9 277.8 281.8 285.8 289.8 294.0 298.2 302.4 306.7 311.1 315.5 320.0 324.5 329.2 333.8 336.4 337.7 338.0 337.5 337.1 337.0 336.7 336.4 336.0 336.0 335.5 335.0 335.0 335.0 334.8 335.1 335.4 335.4 335.4 335.2 335.2 335.0 334.7 334.8 334.8 334.6 334.8 334.8 334.6 334.8 334.6 334.2 334.4 334.4 333.8 334.0 333.3 332.3 329.8 327.4 324.4 321.2 319.5 318.2 321.7 325.2 328.8 332.3 336.0 339.6 343.4 347.1 350.9 354.7 358.6 362.5 366.5 370.5 372.1 371.9 372.8 373.9 374.9 374.9 376.5 376.1 375.8 375.6 375.6 374.9 375.7 375.4 375.6 376.5 377.8 378.3 381.1 381.6 383.0 384.8 386.5 388.2 390.2 392.0 393.1 393.7 395.0 395.0 395.6 395.9 395.7 395.4 395.6 395.4 395.4 395.4 395.2 395.2 395.2 395.0 395.0 394.7 394.2 394.0 393.1 391.2 389.5 386.5 381.1 376.9 372.6 366.4 362.7 359.0 356.5 353.9 351.3 348.8 346.3 343.8 341.3 338.9 336.4 334.0 331.6 329.2 326.8 324.5 322.1 319.8 317.5 315.2 313.0 310.7 308.5 306.2 304.0 301.8 299.7 299.3 299.2 299.1 298.5 298.1 298.3 298.0 297.6 297.6 297.1 296.9 296.6 296.4 296.1 296.2 296.0 295.9 296.0 296.0 295.9 296.2 296.1 296.0 296.2 296.2 296.0 296.3 296.4 296.4 296.6 296.6 296.6 296.7 296.7 296.7 296.7 296.9 296.9 297.0 297.1 297.1 297.2 297.4 297.3 297.4 297.2 297.2 297.2 297.2 297.2 297.1 296.9 296.9 296.7 296.2 295.9 294.3 292.3 289.2 284.5 279.8 276.8 273.4 271.8 272.8 274.5 276.2 277.9 279.6 281.3 283.0 284.8 286.5 288.3 290.0 291.8 293.6 295.4 297.2 299.0 300.9 301.0 300.9 300.6 300.5 300.2 300.0 299.7 299.5 299.0 299.0 298.7 298.5 298.3 298.1 297.9 297.9 298.1 298.0 298.1 298.1 298.1 298.1 298.5 297.9 297.9 297.9 297.1 296.4 293.8 290.8 287.7 284.0 281.9 281.1 282.2 283.3 284.4 285.6 286.7 287.8 289.0 290.1 291.3 292.5 293.6 294.8 296.0 297.2 298.3 299.5 300.7 301.9 303.1 304.3 305.6 306.8 306.5 306.6 305.8 305.4 304.4 303.1 302.3 301.2 300.2 299.3 298.8 298.2 297.6 297.6 297.2 296.9 296.9 296.6 296.6 296.6 296.4 296.5 296.6 296.6 296.7 296.7 296.6 296.7 296.9 296.8 296.7 296.9 296.9 296.9 297.3 297.2 297.4 297.6 297.6 297.6 297.9 297.8 297.8 297.6 297.6 297.6 297.6 297.4 297.4 297.3 297.1 296.9 296.6 296.4 296.2 295.9 295.7 295.9 295.5 295.4 295.7 295.6 295.7 296.2 296.4 296.6 297.2 297.4 297.8 298.5 299.0 299.1 299.7 299.3 298.5 297.2 296.2 294.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7 293.7",
23
+ "input_type": "phoneme",
24
+ "offset": 17.946
25
+ },
26
+ {
27
+ "text": "SP 体 温 SP 伴 着 喘 息 有 点 上 升 SP 窗 外 吹 来 了 夏 天 的 风 SP",
28
+ "ph_seq": "SP t i w en SP b an zh e ch uan x i y ou d ian sh ang sh eng SP ch uang w ai ch ui l ai l e x ia t ian d e f eng SP",
29
+ "note_seq": "rest D4 D4 G4 G4 rest G4 G4 G4 G4 G4 G4 G4 G4 F#4 F#4 G4 G4 A4 A4 G4 G4 rest F#4 F#4 F#4 F#4 E4 E4 E4 E4 E4 E4 D#4 D#4 D#4 D#4 D#4 D#4 E4 E4 rest",
30
+ "note_dur_seq": "0.327 0.272 0.272 0.546 0.546 0.273 0.5450001 0.5450001 0.273 0.273 0.2719998 0.2719998 0.273 0.273 0.273 0.273 0.273 0.273 0.2720001 0.2720001 0.546 0.546 0.2730002 0.2719998 0.2719998 0.2729998 0.2729998 0.4090004 0.4090004 0.4089999 0.4089999 0.2729998 0.2729998 0.4089999 0.4089999 0.4090004 0.4090004 0.2729998 0.2729998 1.091 1.091 0.081",
31
+ "is_slur_seq": "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0",
32
+ "ph_dur": "0.179272 0.147728 0.168591 0.103409 0.546 0.224138 0.048862 0.47909 0.06591 0.14232 0.13068 0.119729 0.152271 0.202546 0.070454 0.218454 0.054546 0.136636 0.136364 0.119729 0.152271 0.546 0.158227 0.114773 0.184498 0.087502 0.103681 0.169319 0.327183 0.081818 0.34309 0.06591 0.109365 0.163635 0.27832 0.13068 0.321499 0.087502 0.087773 0.185226 1.091 0.081",
33
+ "f0_timestep": "0.005",
34
+ "f0_seq": "294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.3 294.5 295.2 295.3 295.8 295.8 296.0 295.7 295.7 295.9 295.9 295.9 295.9 295.9 296.0 295.9 295.9 296.0 295.9 295.9 296.4 296.2 296.4 296.9 296.7 296.7 297.0 296.7 296.6 296.8 296.5 296.4 296.4 296.1 295.7 295.3 297.7 300.0 301.5 302.6 304.9 306.3 307.4 310.6 312.4 315.5 319.3 322.7 327.9 332.5 338.3 344.3 349.6 358.1 364.5 366.4 367.8 369.6 370.6 372.4 373.3 373.6 373.4 373.9 373.2 374.6 374.8 375.2 374.5 374.7 373.9 374.9 374.9 376.4 376.2 377.3 377.7 379.3 380.9 383.0 384.1 385.6 386.2 387.9 388.6 390.5 391.1 392.0 392.6 393.4 394.0 394.8 395.0 395.0 395.0 395.1 395.0 395.0 395.2 395.2 395.2 395.2 395.2 395.2 395.4 395.2 395.2 395.4 395.2 395.2 395.4 395.2 395.2 395.4 395.2 395.2 395.4 395.2 395.2 395.4 395.4 395.4 395.6 395.4 395.6 395.8 395.6 395.6 395.6 395.6 395.6 395.6 395.6 395.6 395.6 395.7 395.9 395.9 395.9 395.9 395.9 396.1 395.9 395.9 396.1 396.1 396.2 396.6 396.6 396.8 397.5 397.7 397.9 398.6 398.5 398.3 399.0 398.8 398.3 397.9 397.5 397.1 396.8 396.4 396.0 395.6 395.2 394.8 394.5 394.1 393.7 393.3 392.9 392.5 392.2 391.8 391.4 391.0 390.6 390.3 389.9 389.5 389.1 388.7 388.4 388.0 387.6 387.2 386.9 386.5 386.1 385.7 385.4 385.0 384.6 384.2 383.9 383.5 383.1 382.7 382.4 382.0 381.6 381.3 380.9 380.5 380.1 379.8 379.4 379.0 378.7 378.3 377.9 377.6 377.2 376.8 376.5 376.1 375.6 374.7 376.1 376.3 376.5 375.0 375.2 374.5 374.0 372.4 372.3 371.2 371.5 371.8 372.1 371.2 371.7 370.7 371.2 372.1 372.8 373.0 375.0 375.4 376.9 379.8 381.5 383.7 386.4 387.5 389.6 391.3 393.0 393.8 394.7 395.1 395.9 396.2 396.8 397.0 397.0 397.2 397.0 397.0 397.0 397.0 396.8 397.0 396.8 396.8 396.7 396.6 396.6 396.5 396.3 396.6 396.6 396.6 396.6 396.8 396.8 397.0 397.1 397.2 397.2 397.0 396.8 396.5 396.2 395.9 395.3 394.7 393.5 393.2 392.2 391.8 392.0 391.7 391.4 391.8 391.7 391.5 392.1 391.6 390.9 391.1 390.1 388.8 387.3 385.5 381.7 379.3 376.5 375.2 376.5 377.8 379.1 380.4 381.7 383.0 384.3 385.6 386.9 388.3 389.6 390.9 392.3 393.6 395.0 396.3 396.6 396.6 396.6 396.6 396.8 397.4 397.2 397.1 397.2 396.8 396.5 396.3 395.9 395.9 395.9 395.6 395.6 395.6 395.7 395.9 395.6 395.6 395.7 395.2 395.2 394.3 392.1 390.0 385.8 381.9 375.7 372.1 366.9 365.1 366.2 367.2 368.3 369.4 370.5 371.6 372.6 373.7 374.8 375.9 377.0 378.1 379.2 380.3 381.4 382.5 383.7 384.8 385.9 387.0 388.2 390.4 391.3 393.9 395.3 396.1 397.0 397.2 397.7 397.7 397.7 397.4 397.2 397.4 397.2 397.0 397.0 397.0 396.7 396.6 396.8 396.4 396.6 396.5 396.3 396.1 395.4 394.5 392.1 389.5 387.0 383.5 382.4 381.0 381.6 382.3 382.9 383.6 384.3 384.9 385.6 386.2 386.9 387.6 388.2 388.9 389.6 390.2 390.9 391.6 392.3 392.9 393.6 394.3 395.0 395.6 395.4 395.4 395.7 395.8 395.4 395.4 395.6 395.0 394.5 394.5 393.8 393.6 393.6 393.7 393.4 393.6 393.6 393.7 394.1 394.3 394.1 394.3 394.5 394.5 394.5 394.6 394.0 394.2 393.9 393.4 393.3 393.1 392.2 392.0 391.8 390.5 390.0 389.5 388.3 386.8 385.2 383.8 383.2 382.7 381.9 380.7 380.2 378.6 377.4 376.5 375.2 374.3 374.6 375.0 375.2 374.7 374.7 374.4 374.1 374.1 373.8 373.6 373.5 373.6 373.4 373.4 373.4 373.4 373.3 373.4 373.3 373.4 373.7 373.9 373.9 374.1 374.1 373.9 373.9 373.9 373.6 373.6 373.1 372.6 371.9 370.4 368.3 363.6 359.2 355.5 350.9 349.3 351.6 353.9 356.2 358.6 360.9 363.3 365.7 368.1 370.5 373.0 375.4 377.9 380.4 382.9 385.4 387.9 390.0 390.2 391.2 391.5 392.5 392.8 393.6 394.4 395.3 396.0 396.6 397.0 397.2 397.2 397.7 397.5 397.5 397.9 397.5 397.5 397.6 397.2 397.1 397.1 396.5 396.3 396.1 395.0 394.3 392.0 389.4 386.6 381.8 379.4 377.7 377.2 380.6 384.1 387.7 391.3 394.9 398.5 402.2 405.9 409.6 413.4 417.2 421.1 425.0 428.9 432.8 436.8 440.9 444.9 448.2 448.1 447.7 446.6 446.0 445.5 444.7 444.1 443.7 443.3 443.4 443.6 443.8 444.1 444.3 444.6 444.6 444.9 445.1 444.9 445.1 444.9 444.9 445.0 444.6 444.0 443.6 442.3 440.4 436.9 432.7 427.8 423.5 419.8 418.2 417.4 416.7 415.9 415.1 414.4 413.6 412.9 412.1 411.4 410.6 409.9 409.1 408.4 407.6 406.9 406.1 405.4 404.7 403.9 403.2 402.4 401.6 400.5 400.0 399.8 399.0 398.6 398.6 398.0 397.5 397.6 397.2 397.0 397.2 397.0 396.8 397.0 396.8 396.8 396.8 396.8 396.7 396.6 396.5 396.3 396.1 396.1 396.1 396.1 396.1 396.1 396.1 396.3 396.4 396.6 396.6 396.8 396.8 397.0 397.0 397.0 397.0 397.0 397.0 397.0 397.0 396.8 396.8 397.0 397.0 397.0 397.0 397.0 397.0 397.0 397.0 397.0 397.2 397.2 397.2 397.3 397.4 397.2 397.5 397.4 397.2 397.0 397.0 396.7 396.6 396.5 396.3 396.1 396.3 396.0 395.9 395.9 395.9 395.7 395.9 396.0 395.7 396.1 396.3 396.1 396.4 396.7 396.6 397.1 397.5 397.6 398.0 398.6 398.5 399.1 399.9 400.4 401.2 402.2 402.6 402.6 402.7 402.6 401.5 400.9 400.2 399.6 399.0 398.4 397.7 397.1 396.5 395.9 395.3 394.7 394.0 393.4 392.8 392.2 391.6 391.0 390.4 389.8 389.1 388.5 387.9 387.3 386.7 386.1 385.5 384.9 384.3 383.7 383.1 382.5 381.9 381.3 380.7 380.1 379.5 378.9 378.4 377.8 377.2 376.6 376.0 375.4 374.8 374.2 373.7 373.1 372.5 371.9 371.3 370.7 370.2 369.6 369.0 368.4 367.9 367.3 366.7 366.1 365.6 365.4 367.0 369.8 371.1 373.6 374.3 375.0 375.4 375.4 375.6 375.5 374.9 374.7 374.5 373.6 373.4 372.7 372.3 372.1 372.1 371.7 372.3 372.2 372.4 372.7 373.4 373.4 374.1 374.1 374.5 374.7 375.1 374.7 374.7 374.3 374.0 373.8 373.5 372.8 372.0 371.3 370.1 369.7 369.6 369.5 370.0 371.0 371.9 372.8 373.8 374.7 375.6 376.6 377.1 377.7 378.2 378.1 377.8 377.8 376.8 376.4 376.2 375.7 375.4 375.3 375.2 375.2 375.3 375.2 375.2 375.2 374.9 374.9 374.7 374.5 374.5 374.3 374.0 373.9 373.0 371.7 368.2 364.6 360.1 353.7 348.1 344.3 341.0 339.5 338.1 336.6 335.2 333.7 332.3 330.9 329.5 328.1 326.7 325.3 323.9 322.5 321.1 319.8 318.4 317.0 315.7 314.3 313.0 311.7 311.2 314.3 317.2 319.2 319.7 320.6 321.2 320.6 320.6 320.8 319.8 319.5 319.5 318.9 318.6 318.8 318.4 318.6 319.4 319.9 320.9 322.3 323.4 324.7 326.2 328.0 329.3 330.5 331.2 331.5 332.0 332.3 332.3 332.5 332.7 332.9 333.1 333.4 333.3 333.3 333.0 332.9 332.9 332.9 332.9 333.1 333.1 333.5 333.5 333.7 333.7 333.8 333.8 333.7 333.7 333.6 333.5 333.6 333.4 333.3 333.2 333.1 333.1 333.0 332.9 332.7 332.6 332.2 331.5 331.5 330.8 330.0 329.9 329.1 328.3 328.5 328.4 328.3 329.1 330.0 331.1 331.9 334.0 334.1 334.4 334.8 334.6 334.3 334.6 334.6 334.3 334.6 334.3 334.1 334.2 334.2 334.0 334.2 334.2 334.0 334.4 334.4 334.3 334.4 334.2 334.1 334.2 334.2 334.3 334.4 334.4 334.4 334.8 334.6 334.6 334.6 334.5 334.3 334.8 334.8 334.6 334.8 334.9 334.6 334.6 334.7 334.4 334.2 334.2 334.0 333.6 333.5 333.4 333.0 332.9 333.0 332.9 332.9 333.1 333.1 333.4 333.8 333.7 333.7 333.7 332.9 332.3 331.9 331.0 330.2 329.9 329.1 328.3 328.5 328.3 328.0 328.7 329.4 330.2 331.0 332.6 333.3 334.1 335.0 335.2 335.6 335.5 335.4 335.4 335.2 335.2 335.0 334.8 334.6 334.4 334.2 334.2 334.0 334.0 333.8 333.8 333.8 334.0 334.1 334.2 334.2 334.0 334.0 333.7 333.3 332.5 331.0 328.5 325.5 321.7 316.7 313.1 310.0 310.8 311.5 312.2 313.0 313.7 314.4 315.2 315.9 316.7 317.4 318.2 318.9 319.7 320.4 321.2 322.0 322.7 323.5 324.2 325.0 325.8 326.5 327.1 326.8 325.4 325.2 324.5 322.7 321.7 320.5 319.4 318.6 317.7 316.9 316.5 316.4 316.2 316.2 316.0 315.9 316.0 315.8 316.1 316.2 316.0 316.1 316.4 316.0 316.2 316.5 316.2 316.4 316.7 316.6 316.6 316.7 316.6 316.6 316.6 316.2 316.2 316.0 315.8 315.5 315.2 314.5 312.4 310.3 306.1 301.6 297.4 294.2 290.9 288.8 289.7 290.5 291.4 292.3 293.1 294.0 294.9 295.8 296.7 297.5 298.4 299.3 300.2 301.1 302.0 302.9 303.8 304.7 305.7 306.6 307.5 308.4 309.3 310.3 311.2 312.1 313.1 314.0 315.0 316.3 317.1 317.8 318.4 318.7 318.6 318.8 318.9 318.4 318.6 318.8 318.2 318.2 318.4 317.8 317.6 317.2 316.7 316.6 316.3 316.0 315.9 315.6 315.5 315.3 315.5 315.9 316.2 316.2 316.4 316.6 316.5 316.7 316.7 316.6 316.7 316.5 316.2 316.0 315.1 314.1 313.2 312.0 311.1 311.1 310.8 311.0 311.4 311.7 311.9 312.1 312.2 312.2 312.2 312.4 312.2 312.1 312.2 312.0 311.9 312.0 311.5 310.9 310.8 309.3 307.5 305.4 303.0 301.6 299.5 298.5 299.7 302.7 305.7 308.7 311.7 314.8 317.7 319.5 320.4 320.1 319.6 319.0 318.4 317.8 317.3 316.7 316.5 316.4 316.1 316.2 316.0 316.0 316.0 316.0 315.8 316.2 316.0 316.0 316.0 316.0 315.8 315.8 315.8 314.9 314.6 312.9 310.2 308.1 303.6 299.8 295.0 292.4 290.5 291.9 293.3 294.7 296.1 297.5 299.0 300.4 301.8 303.3 304.8 306.2 307.7 309.2 310.7 312.2 313.7 315.2 316.7 318.2 319.8 321.3 322.8 322.7 321.8 321.0 321.1 320.1 319.3 318.4 318.4 317.1 317.3 317.7 317.6 317.1 317.4 316.6 316.8 316.9 316.9 317.0 318.0 317.6 318.4 319.3 320.1 320.2 322.4 322.9 324.0 325.9 327.1 328.1 329.9 330.4 331.5 332.3 333.3 333.6 334.2 334.2 334.2 334.2 334.2 334.2 334.2 334.2 334.2 334.1 334.2 334.2 334.0 334.2 334.2 334.0 334.2 334.2 334.1 334.2 334.2 334.2 334.2 334.2 334.2 334.2 334.2 334.1 334.2 334.2 334.0 334.2 334.2 334.1 334.2 334.2 334.1 334.2 334.2 334.2 334.2 334.2 334.2 334.2 334.2 334.0 334.2 334.0 334.1 334.2 334.0 334.1 334.2 334.0 334.0 334.0 334.0 334.0 334.0 334.0 334.0 334.0 334.0 334.0 334.0 334.0 334.1 334.2 334.0 334.1 334.2 334.0 334.0 334.2 334.0 334.0 334.2 334.0 334.1 334.2 334.0 334.1 334.2 334.0 334.0 334.0 334.0 334.0 334.0 334.0 334.0 334.0 334.0 334.0 334.0 334.0 334.0 334.0 334.0 334.0 334.0 334.0 334.0 334.0 334.0 334.0 334.0 334.0 334.0 334.0 334.0 333.8 333.8 333.8 333.7 333.7 333.7 333.6 333.5 333.5 333.5 333.3 333.5 333.5 333.5 333.5 333.7 333.6 333.5 333.7 333.7 333.7 333.7 333.7 333.8 333.7 333.8 333.8 333.7 333.7 333.8 333.7 333.7 333.8 333.7 333.7 333.7 333.7 333.5 333.8 333.5 333.5 333.7 333.6 333.5 333.7 333.7 333.7 333.8 333.8 333.7 334.0 334.0 334.1 334.5 334.8 335.1 335.7 336.2 336.7 337.1 337.4 337.5 336.6 336.1 335.6 334.6 334.6 334.6 334.6 334.6 334.6 334.6 334.6 334.6 334.6 334.6 334.6 334.6 334.6 334.6 334.6 334.6 334.6 334.6 334.6 334.6",
35
+ "input_type": "phoneme",
36
+ "offset": 43.037
37
+ },
38
+ {
39
+ "text": "SP 夕 阳 晚 照 轻 眺 余 晖 风 在 背 后 温 柔 的 吹 SP 看 得 见 的 青 春 体 会 SP 看 不 清 我 究 竟 是 谁 SP 风 高 夜 黑 星 目 剑 眉 从 来 不 会 空 手 而 归 无 声 的 鬼 魅 SP",
40
+ "ph_seq": "SP x i y ang w an zh ao q ing t iao y v h ui f eng z ai b ei h ou w en r ou d e ch ui SP k an d e j ian d e q ing ch un t i h ui SP k an b u q ing w o j iu j ing sh ir sh ui SP f eng g ao y E h ei x ing m u j ian m ei c ong l ai b u h ui k ong sh ou er g ui w u sh eng d e g ui m ei SP",
41
+ "note_seq": "rest G4 G4 G4 G4 G4 G4 G4 G4 G4 G4 G4 G4 G4 G4 G4 G4 G4 G4 G4 G4 G4 G4 G4 G4 G4 G4 G4 G4 G4 G4 G4 G4 rest F#4 F#4 F#4 F#4 F#4 F#4 F#4 F#4 F#4 F#4 F#4 F#4 F#4 F#4 F#4 F#4 rest G4 G4 G4 G4 G4 G4 G4 G4 G4 G4 A4 A4 A4 A4 G4 G4 rest G4 G4 G4 G4 G4 G4 G4 G4 G4 G4 G4 G4 G4 G4 G4 G4 G4 G4 G4 G4 G4 G4 G4 G4 G4 G4 G4 G4 G4 G4 G4 F#4 F#4 F#4 F#4 E4 E4 E4 E4 D#4 D#4 rest",
42
+ "note_dur_seq": "0.6 0.272 0.272 0.273 0.273 0.273 0.273 0.273 0.273 0.2720001 0.2720001 0.273 0.273 0.273 0.273 0.2719998 0.2719998 0.273 0.273 0.273 0.273 0.273 0.273 0.2720001 0.2720001 0.273 0.273 0.2730002 0.2730002 0.2729998 0.2729998 0.1360002 0.1360002 0.1359997 0.2730002 0.2730002 0.2729998 0.2729998 0.2720003 0.2720003 0.2729998 0.2729998 0.2730002 0.2730002 0.2729998 0.2729998 0.2720003 0.2720003 0.1369996 0.1369996 0.1360002 0.2730002 0.2730002 0.2729998 0.2729998 0.2719998 0.2719998 0.2730002 0.2730002 0.2729998 0.2729998 0.2720003 0.2720003 0.2729998 0.2729998 0.1370001 0.1370001 0.1359997 0.2730007 0.2730007 0.2719994 0.2719994 0.2730007 0.2730007 0.2729998 0.2729998 0.2729998 0.2729998 0.2720003 0.2720003 0.2729998 0.2729998 0.1359997 0.1359997 0.2720003 0.2720003 0.2729998 0.2729998 0.2729998 0.2729998 0.2730007 0.2730007 0.2719994 0.2719994 0.2730007 0.2730007 0.2729998 0.2729998 0.2729998 0.8179998 0.8179998 0.2720003 0.2720003 0.8190002 0.8190002 0.2719994 0.2719994 1.091 1.091 0.218",
43
+ "is_slur_seq": "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0",
44
+ "ph_dur": "0.436365 0.163635 0.217454 0.054546 0.175271 0.097729 0.20709 0.06591 0.130956 0.142044 0.184499 0.087502 0.20709 0.06591 0.185498 0.087502 0.151547 0.120453 0.218454 0.054546 0.234365 0.038635 0.185498 0.087502 0.20609 0.06591 0.163907 0.109093 0.212774 0.060226 0.087773 0.185226 0.136 0.011 0.125 0.218454 0.054546 0.163907 0.109093 0.217454 0.054546 0.093453 0.179546 0.148 0.125 0.202546 0.070454 0.184499 0.087502 0.137 0.065547 0.070454 0.240045 0.032955 0.148 0.125 0.201546 0.070454 0.185499 0.087502 0.191182 0.081818 0.135636 0.136364 0.103681 0.169319 0.137 0.00532 0.13068 0.234365 0.038635 0.217453 0.054546 0.185499 0.087502 0.130956 0.142044 0.202542 0.070457 0.162908 0.109093 0.202542 0.070457 0.115048 0.157951 0.20609 0.06591 0.245725 0.027275 0.148 0.125 0.229818 0.043182 0.129955 0.142044 0.212774 0.235501 0.097725 0.185498 0.087502 0.594138 0.223862 0.178815 0.093185 0.709908 0.109093 0.233364 0.038635 1.091 0.218",
45
+ "f0_timestep": "0.005",
46
+ "f0_seq": "364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.7 364.5 364.3 365.2 366.3 367.1 368.3 369.0 369.8 370.8 371.3 372.2 373.5 374.5 376.1 377.4 378.9 381.2 383.4 385.1 386.9 389.0 390.6 392.3 393.8 394.6 395.3 395.8 396.0 396.5 396.7 396.4 396.6 396.6 396.2 396.3 396.6 396.3 396.2 396.3 396.1 396.0 396.1 395.7 395.5 395.6 395.5 395.6 396.0 396.4 396.8 397.2 397.6 397.7 397.7 397.7 397.6 397.4 397.0 396.2 395.7 395.5 395.0 394.5 394.4 394.2 394.0 394.0 393.9 393.9 394.0 394.2 394.3 394.3 394.4 394.5 394.6 394.9 395.1 395.2 395.4 395.6 395.6 395.5 395.3 394.8 393.9 392.7 391.3 389.6 388.2 386.9 385.7 384.6 383.9 383.7 383.9 384.7 386.3 388.6 390.5 392.6 394.8 396.3 397.3 398.2 398.7 398.8 399.0 398.9 398.5 398.2 397.9 397.4 396.7 396.7 396.6 396.3 396.5 396.6 396.7 396.9 397.0 397.1 397.1 396.9 396.6 395.9 395.4 394.6 393.7 393.0 392.4 391.9 391.9 392.0 391.8 391.7 391.5 390.4 389.2 387.7 385.1 382.5 380.2 378.2 377.6 378.9 380.2 381.5 382.8 384.2 385.5 386.8 388.1 389.5 390.8 392.1 393.5 394.8 396.2 397.0 397.0 397.1 397.1 397.0 396.8 396.3 395.8 395.5 395.3 395.1 394.8 394.7 394.6 394.5 394.6 394.9 395.1 395.5 395.9 396.1 396.4 396.9 397.0 397.2 397.5 397.4 397.1 396.9 396.4 395.2 393.4 390.9 387.0 382.7 377.8 373.0 369.0 368.3 369.8 371.2 372.7 374.2 375.7 377.2 378.7 380.2 381.7 383.2 384.7 386.2 387.8 389.3 390.9 392.4 394.0 395.5 397.1 398.7 399.1 399.1 399.3 399.3 399.6 399.5 399.1 398.6 398.1 397.5 397.0 396.6 396.3 396.2 396.4 396.6 396.6 396.6 396.6 396.6 396.3 396.1 396.1 395.7 394.9 394.5 393.8 392.4 391.3 389.9 388.1 385.6 382.5 379.2 375.8 373.3 371.3 371.1 372.4 373.6 374.8 376.0 377.2 378.4 379.7 380.9 382.1 383.4 384.6 385.9 387.1 388.4 389.7 391.4 393.8 395.1 396.1 396.9 396.9 396.8 396.7 396.3 396.0 395.6 395.1 394.7 394.4 394.3 394.3 394.3 394.5 394.8 395.0 395.2 395.5 395.7 396.1 396.4 396.6 396.7 396.7 396.4 396.3 396.2 395.7 395.4 395.3 395.1 395.0 395.0 394.6 393.9 393.4 392.6 391.5 390.8 389.9 389.4 389.4 389.2 389.6 390.6 391.3 392.1 393.1 393.6 394.2 395.0 395.8 396.5 397.1 397.6 397.9 398.2 398.3 398.4 398.4 398.4 398.3 398.0 398.0 397.9 397.6 397.5 397.3 397.1 397.1 397.1 397.0 397.0 396.7 396.6 396.5 396.0 395.9 395.1 393.0 390.5 387.5 383.7 380.8 378.1 375.8 375.4 377.0 378.5 380.2 381.8 383.4 385.0 386.6 388.3 389.9 391.6 393.2 394.9 396.6 398.3 399.9 401.6 402.3 402.4 402.6 402.3 401.9 401.5 400.9 400.3 399.8 399.3 398.8 398.6 398.5 398.4 398.4 398.4 398.4 398.4 398.4 398.4 398.5 398.6 398.8 398.8 398.6 398.4 398.0 397.4 396.4 394.6 392.3 389.2 385.3 381.2 377.5 375.3 374.8 376.1 377.5 378.8 380.2 381.5 382.9 384.2 385.6 386.9 388.3 389.7 391.1 392.4 393.8 395.2 396.6 398.0 399.4 395.7 391.9 392.2 392.2 391.8 391.5 391.9 392.5 392.9 393.8 394.6 395.1 395.6 396.3 396.7 397.0 397.4 397.5 397.6 397.7 397.7 397.6 397.2 397.0 396.7 396.3 395.7 395.4 394.6 392.9 391.9 390.3 387.7 385.5 382.9 380.3 377.4 375.4 374.2 374.7 376.5 378.3 380.1 381.9 383.7 385.5 387.3 389.2 391.0 392.9 394.7 396.6 398.5 400.4 400.7 400.6 400.7 400.4 400.4 400.3 400.3 400.3 400.2 399.9 399.5 399.4 399.0 398.6 398.5 398.0 397.7 397.6 397.2 397.0 396.9 396.7 396.7 396.8 396.9 397.3 397.6 397.8 398.2 398.4 398.2 398.2 397.8 396.9 395.7 393.4 390.2 385.6 379.9 375.0 371.3 371.1 373.1 375.0 376.9 378.9 380.8 382.8 384.8 386.8 388.8 390.8 392.8 394.8 396.9 398.9 399.8 399.8 399.8 399.7 399.4 399.2 399.1 399.1 398.9 399.0 399.0 398.7 398.6 398.5 398.1 397.9 397.9 397.8 397.7 397.7 397.6 397.7 397.9 397.9 398.2 398.4 398.4 398.4 398.2 397.7 396.9 395.4 393.5 391.0 387.7 384.7 381.8 379.5 378.8 379.8 380.9 381.9 382.9 383.9 385.0 386.0 387.0 388.1 389.1 390.2 391.2 392.3 393.3 394.4 395.4 396.3 397.6 399.2 399.9 400.6 400.9 400.9 400.9 400.8 400.4 400.2 400.0 399.4 399.1 398.8 398.5 398.4 398.2 398.2 398.3 398.3 398.3 398.4 398.3 398.3 398.5 398.5 398.5 398.6 398.5 398.5 398.6 398.4 398.2 398.2 397.9 397.7 397.6 397.0 396.1 395.4 394.3 393.2 392.4 391.7 391.9 392.7 393.9 395.3 396.4 396.9 397.4 397.8 398.2 398.7 399.0 399.2 399.3 399.2 399.1 399.0 398.5 398.0 397.7 397.2 397.0 396.9 396.5 396.5 396.4 396.2 396.2 396.2 396.2 396.3 396.3 396.3 396.7 397.1 397.4 397.8 398.2 398.0 398.2 398.4 398.3 398.3 398.4 398.2 397.9 397.9 397.7 397.5 397.5 397.2 396.9 396.8 396.4 395.7 395.4 395.1 394.4 394.2 394.4 394.6 395.0 395.5 395.9 396.1 396.5 396.7 396.9 397.0 396.9 396.9 397.0 396.9 396.8 396.8 396.6 396.6 396.7 396.8 396.8 396.9 396.9 396.8 396.9 397.0 397.1 397.2 397.2 397.4 397.5 397.5 397.6 397.7 397.6 397.5 397.5 397.2 396.7 396.0 394.8 392.3 389.6 386.0 381.3 376.2 371.3 367.5 364.1 364.9 368.9 372.9 376.9 380.9 385.1 389.2 393.4 397.7 401.2 402.2 402.0 401.3 400.5 399.3 398.6 398.1 397.7 397.5 397.4 397.2 397.2 397.1 396.9 396.9 396.9 396.8 396.9 397.0 397.0 397.1 397.1 397.0 397.2 397.4 397.1 397.1 396.8 396.3 395.5 393.2 390.2 386.1 381.6 376.2 371.4 367.4 364.9 365.0 365.8 366.6 367.4 368.2 369.0 369.9 370.7 371.5 372.3 373.2 374.0 374.8 375.7 376.5 377.3 378.2 379.0 379.9 380.7 381.7 384.4 387.8 390.9 393.5 395.1 396.2 396.7 397.1 397.2 397.1 397.1 397.2 397.2 397.2 397.2 397.2 397.2 397.2 397.5 397.7 397.7 397.8 397.8 397.6 397.4 396.9 396.0 394.2 390.6 386.9 382.7 378.5 376.8 376.2 375.6 375.0 374.4 373.8 373.2 372.6 372.1 371.5 370.9 370.3 369.7 369.2 368.6 368.0 367.4 366.9 366.3 365.7 365.1 366.2 368.4 370.6 373.3 375.7 377.0 378.1 379.2 379.5 379.7 379.5 379.0 378.4 378.0 377.7 377.4 377.4 377.4 377.3 377.2 376.7 376.0 375.3 374.6 373.9 373.2 372.9 372.1 371.2 371.3 370.7 370.0 369.9 369.6 368.7 367.8 366.8 364.7 361.8 358.9 355.4 352.6 350.0 347.1 346.4 347.3 349.2 353.6 359.7 365.9 372.3 376.9 379.8 380.3 379.5 378.6 377.7 376.8 376.1 375.7 375.3 375.1 374.9 374.8 374.6 374.5 374.4 374.4 374.4 374.4 374.5 374.5 374.5 374.6 374.6 374.5 374.6 374.5 374.2 373.7 371.9 369.5 365.4 359.7 353.8 348.7 345.1 342.0 341.8 343.5 345.1 346.7 348.4 350.1 351.7 353.4 355.1 356.8 358.5 360.2 361.9 363.6 365.4 367.1 368.8 370.6 372.4 373.8 374.3 374.9 375.4 375.5 375.7 376.2 376.5 376.6 376.8 376.8 376.5 376.3 376.1 376.0 376.0 376.0 376.0 375.9 375.8 375.8 375.7 375.5 375.1 374.8 374.3 373.5 373.2 372.6 371.9 371.5 370.7 370.1 370.4 370.2 369.8 369.9 369.6 368.7 367.5 365.7 362.8 359.3 355.7 352.9 350.9 350.9 351.8 355.2 359.8 365.4 371.7 376.7 379.6 380.3 379.8 379.0 378.2 377.2 376.6 376.1 375.6 375.4 375.2 375.1 374.8 374.7 374.6 374.5 374.5 374.5 374.6 374.6 374.5 374.6 374.7 374.6 374.5 374.3 373.9 373.2 371.3 368.5 364.1 359.3 353.3 347.4 343.2 342.6 344.2 345.7 347.3 348.9 350.5 352.1 353.7 355.4 357.0 358.6 360.3 361.9 363.6 365.3 366.9 368.6 370.3 372.0 373.7 375.4 377.2 378.0 378.0 378.1 377.9 377.9 378.0 378.0 377.9 377.5 377.0 376.5 375.9 375.5 375.1 374.7 374.4 374.3 374.0 374.1 374.5 374.8 375.3 375.9 376.2 376.5 376.8 376.8 376.8 376.9 376.5 376.0 375.7 374.6 373.1 371.5 369.0 365.1 361.5 358.7 356.8 356.4 357.4 358.4 359.4 360.4 361.5 362.5 363.5 364.6 365.6 366.7 367.7 368.8 369.8 370.9 371.9 372.7 374.1 375.8 376.8 377.3 377.7 377.7 377.4 377.3 377.1 376.6 376.5 376.2 376.1 376.2 376.1 375.8 375.5 374.9 374.4 374.2 373.6 373.0 372.7 371.9 371.5 371.9 371.6 371.3 371.3 370.6 370.0 369.1 368.1 366.1 363.3 361.0 358.7 358.4 359.5 360.5 361.6 362.6 363.7 364.8 365.8 366.9 368.0 369.1 370.1 371.2 372.3 373.4 374.5 375.6 376.2 376.6 376.9 376.8 376.9 376.9 376.7 376.7 376.5 376.1 376.0 375.7 375.4 375.4 375.3 375.3 375.2 375.0 375.4 375.8 375.6 375.8 376.1 375.7 375.4 375.1 374.3 373.2 371.7 369.4 365.7 362.4 359.0 356.3 354.3 354.6 356.4 358.3 360.1 362.0 363.9 365.7 367.6 369.5 371.5 373.4 375.3 377.2 379.2 381.2 383.1 383.8 383.8 383.5 382.7 381.7 380.7 379.7 379.0 378.2 377.4 377.0 376.6 376.4 376.2 376.0 375.8 375.8 375.8 375.8 375.8 375.8 375.9 376.0 376.1 376.2 376.2 376.1 375.9 375.5 374.6 373.0 369.7 366.1 361.6 357.4 356.1 357.4 358.7 360.0 361.4 362.7 364.0 365.4 366.7 368.1 369.5 370.8 372.2 373.6 375.0 376.3 377.7 379.1 380.5 381.9 383.4 385.1 387.4 389.7 392.3 395.3 397.2 398.6 399.9 400.1 400.0 399.9 399.5 399.1 398.7 398.4 398.0 397.9 397.8 397.4 397.2 397.0 396.6 396.1 395.5 394.9 394.3 394.3 394.2 393.7 394.0 394.4 394.2 394.0 393.9 392.9 391.4 390.0 387.5 384.9 382.5 379.6 377.7 376.1 376.9 379.1 381.4 383.6 385.9 388.2 390.5 392.8 395.1 397.4 399.7 401.2 401.3 401.0 400.3 400.0 400.1 399.8 399.4 399.5 399.4 398.8 398.9 398.8 398.5 398.7 398.6 398.4 398.5 398.5 398.4 398.5 398.6 398.5 398.4 398.3 397.9 397.6 397.2 396.7 395.1 393.0 389.5 385.1 379.3 373.4 368.3 364.0 363.8 365.5 367.2 369.0 370.7 372.4 374.2 376.0 377.7 379.5 381.3 383.1 384.9 386.7 388.5 390.3 392.1 394.0 395.8 397.7 399.6 400.5 400.3 400.5 400.5 400.6 400.7 400.6 400.3 400.1 399.8 399.2 398.7 398.3 397.9 397.6 397.2 396.7 396.2 396.1 396.0 395.9 396.1 396.6 397.1 397.5 397.9 398.0 398.2 398.7 398.7 398.7 399.0 398.9 399.0 399.1 398.8 398.6 398.5 398.0 397.7 397.6 397.1 396.8 396.9 396.8 396.9 397.5 397.7 398.0 398.4 398.6 398.7 399.1 399.3 399.4 399.7 399.8 399.6 399.7 399.7 399.5 399.4 399.3 399.0 398.7 398.5 398.3 398.2 398.2 398.0 397.9 397.9 397.8 397.6 397.6 397.6 397.2 397.0 396.7 395.7 394.3 391.8 387.9 383.2 378.6 373.5 368.5 365.9 366.4 368.1 369.9 371.7 373.5 375.3 377.1 378.9 380.8 382.6 384.4 386.3 388.2 390.0 391.9 393.8 395.7 397.6 398.9 399.2 399.2 399.2 399.0 399.0 399.1 398.8 398.6 398.5 398.2 397.8 397.7 397.6 397.5 397.5 397.5 397.5 397.6 397.7 397.8 397.9 397.9 397.9 397.8 397.7 397.6 396.9 396.3 394.8 391.4 387.1 381.9 377.0 372.0 368.6 368.8 372.3 375.8 379.3 382.9 386.5 390.1 393.8 397.5 401.3 405.1 408.9 412.7 416.6 420.5 424.5 428.5 432.5 436.6 438.7 439.9 441.3 442.3 443.4 444.2 444.7 445.0 445.0 445.0 444.4 444.1 443.9 443.4 442.9 442.9 442.9 442.7 442.9 443.2 442.9 443.3 443.9 444.1 444.5 445.2 444.9 444.4 443.7 442.5 441.3 439.0 436.3 434.4 432.8 432.5 433.0 433.4 433.9 434.4 434.9 435.3 435.8 436.3 436.8 437.2 437.7 438.2 438.7 439.2 439.7 440.1 440.6 441.1 441.6 441.9 442.3 443.3 444.5 445.1 445.1 445.4 445.6 445.6 445.6 445.8 445.6 445.5 445.8 445.8 445.8 446.0 446.1 446.3 446.4 446.4 446.2 446.0 445.8 445.5 444.9 443.7 441.4 438.6 435.3 431.3 428.2 426.8 425.7 424.5 423.3 422.1 420.9 419.6 418.4 417.2 416.0 414.8 413.6 412.4 411.2 410.1 408.9 407.7 406.5 405.4 404.2 403.0 402.4 402.0 401.1 400.6 400.3 400.3 400.3 400.1 399.9 399.5 399.0 398.6 398.3 398.3 398.4 398.5 398.6 398.8 398.7 398.6 398.6 398.6 398.5 398.5 398.6 398.6 398.5 398.4 398.1 397.7 397.1 396.2 395.0 393.5 390.5 387.5 384.5 381.3 380.9 382.1 383.4 384.6 385.8 387.1 388.3 389.6 390.8 392.1 393.3 394.6 395.8 397.1 398.4 399.7 401.0 402.2 402.7 402.1 401.5 400.6 400.1 399.8 399.5 399.4 399.2 399.1 399.1 398.9 399.0 399.1 398.8 398.8 399.1 399.1 399.2 399.4 399.4 399.2 399.1 399.1 399.0 398.7 398.6 398.6 398.3 398.3 398.4 398.2 398.4 398.6 398.3 397.9 397.7 396.6 394.4 391.9 388.5 384.4 380.7 377.9 376.1 374.7 373.3 371.9 370.5 369.1 367.8 366.4 365.0 365.4 366.8 368.4 369.4 370.2 370.9 371.2 371.1 371.1 371.0 371.0 371.1 371.0 370.4 370.4 370.6 371.3 373.4 376.0 378.2 381.6 384.5 387.6 391.1 394.0 396.0 396.8 397.4 397.3 397.1 396.9 396.7 396.3 396.0 395.5 394.9 394.5 393.9 393.4 393.0 392.6 392.3 392.1 391.9 391.8 391.9 391.9 392.0 392.6 393.1 393.8 394.5 395.4 396.3 397.1 397.7 398.0 398.2 398.3 398.4 398.2 398.3 398.3 398.0 398.0 397.9 397.7 397.8 397.7 397.5 397.5 397.2 397.0 397.1 397.1 397.1 397.4 397.4 397.2 397.4 397.1 396.7 395.5 392.7 389.6 385.7 381.7 377.8 374.8 372.7 372.4 373.6 374.7 375.9 377.0 378.2 379.4 380.5 381.7 382.9 384.1 385.3 386.5 387.7 388.9 390.1 391.3 393.0 395.1 397.0 398.4 399.3 400.0 400.3 400.3 400.1 399.8 399.4 399.3 399.2 398.8 398.6 398.6 398.5 398.2 398.2 398.0 397.9 397.9 397.8 397.7 397.6 397.4 396.3 394.3 391.9 388.7 384.8 381.2 377.9 377.0 378.0 379.0 379.9 380.9 381.9 382.9 383.9 384.8 385.8 386.8 387.8 388.8 389.8 390.8 391.8 392.8 393.8 394.8 395.8 396.9 397.9 398.9 398.8 398.4 398.0 397.7 397.9 398.2 398.0 397.9 397.9 397.8 397.6 397.3 397.1 396.8 396.3 396.0 395.5 395.0 395.0 394.7 394.6 394.9 395.2 395.5 396.0 396.6 396.9 397.3 397.6 397.7 397.8 397.9 397.8 397.6 397.6 397.4 396.8 396.7 396.4 395.7 395.4 395.1 394.4 394.0 393.7 392.8 392.6 392.4 391.8 391.9 392.7 393.0 393.8 394.5 395.2 396.1 397.0 398.0 398.8 399.4 399.8 400.1 400.2 400.0 399.9 399.9 399.5 399.4 399.4 399.2 399.2 399.3 399.2 399.2 399.3 399.2 399.1 399.1 398.8 398.5 398.2 397.4 395.4 393.2 389.3 383.1 376.5 370.8 366.3 361.9 358.8 359.1 361.0 362.8 364.7 366.6 368.5 370.4 372.3 374.2 376.1 378.1 380.0 382.0 383.9 385.9 387.9 389.9 391.9 393.6 393.9 394.5 395.2 395.5 396.2 397.1 397.2 397.7 397.9 397.7 397.5 397.3 397.1 397.1 397.0 396.8 396.8 396.6 396.4 396.4 396.4 396.3 396.3 396.3 396.2 396.1 396.0 395.7 395.5 395.3 394.9 394.4 394.1 393.7 392.8 392.2 391.4 390.2 390.0 390.2 390.0 390.1 390.7 390.3 390.1 390.4 389.7 389.6 390.1 390.0 390.4 391.5 392.9 394.7 397.0 398.3 398.5 398.6 398.6 398.5 398.4 398.4 398.4 398.3 398.0 398.0 398.0 397.9 397.9 397.9 397.9 397.9 397.9 397.8 397.8 397.8 397.7 397.6 397.4 396.8 395.2 393.3 389.9 385.6 380.4 374.8 369.8 366.1 365.7 367.0 368.4 369.7 371.1 372.4 373.8 375.1 376.5 377.9 379.3 380.6 382.0 383.4 384.8 386.2 387.6 389.0 390.5 391.9 393.0 394.6 395.9 396.9 397.7 398.2 398.7 399.1 399.2 399.4 399.2 398.7 398.5 398.2 397.9 398.0 397.9 397.8 398.1 398.0 398.0 398.2 398.2 398.3 398.5 398.7 398.8 398.8 399.0 398.9 398.8 398.7 398.5 398.2 397.7 397.5 397.2 396.8 396.6 396.3 395.9 395.6 395.5 395.4 395.7 396.1 396.4 397.0 397.5 398.1 398.5 399.1 399.8 400.0 400.0 399.9 399.4 398.8 398.4 397.9 397.7 397.6 397.4 397.2 397.2 397.1 397.0 397.0 396.9 396.8 396.8 396.8 396.9 397.0 397.1 397.1 397.2 397.6 397.7 397.8 397.8 397.6 397.2 396.6 395.5 393.4 389.4 384.5 379.1 373.7 370.3 369.4 371.1 372.8 374.6 376.3 378.0 379.8 381.5 383.3 385.1 386.9 388.7 390.5 392.3 394.1 395.9 397.7 399.1 399.6 399.4 399.0 399.0 399.3 399.1 399.0 399.2 398.8 398.4 398.5 398.3 398.0 398.3 398.2 398.3 398.5 398.4 398.3 398.7 398.7 398.7 399.0 399.0 398.8 399.0 399.0 399.0 399.1 398.8 398.6 397.6 395.9 393.3 389.3 384.5 380.2 375.4 371.3 369.1 369.6 371.8 374.0 376.2 378.5 380.7 383.0 385.3 387.6 389.9 392.2 394.5 396.9 399.2 401.6 402.8 403.0 403.2 403.2 402.9 402.4 401.7 400.9 400.0 399.4 398.8 398.6 398.6 398.5 398.4 398.4 398.4 398.3 398.2 398.3 398.5 398.6 398.7 398.8 398.6 398.2 397.9 396.4 393.8 390.1 385.2 380.5 375.8 371.8 370.9 371.7 372.6 373.4 374.2 375.1 375.9 376.7 377.6 378.4 379.3 380.1 381.0 381.8 382.7 383.5 384.4 385.2 386.1 387.0 387.8 389.2 391.4 393.6 395.5 396.9 397.7 398.2 398.5 399.0 399.3 399.2 399.2 399.3 399.2 399.1 399.2 399.2 399.1 399.2 399.3 399.4 399.4 399.4 399.4 399.2 398.9 398.4 397.5 395.8 393.7 391.3 388.1 384.4 381.0 378.7 378.7 379.8 380.9 382.0 383.1 384.3 385.4 386.6 387.7 388.8 390.0 391.1 392.3 393.5 394.6 395.8 397.0 398.1 399.3 399.6 399.5 399.2 398.6 398.2 398.0 397.7 397.2 397.0 396.9 396.6 396.3 396.3 396.1 395.9 395.9 395.9 396.0 396.3 396.7 396.8 396.9 397.1 397.4 397.6 397.8 397.9 397.8 397.6 396.9 396.1 394.6 392.7 390.3 387.9 385.3 382.6 381.3 379.8 378.9 379.1 379.4 379.5 380.0 380.7 381.4 382.0 382.7 383.4 384.1 384.5 384.6 385.1 385.9 387.2 388.5 389.4 390.2 391.0 391.5 392.1 392.4 392.7 393.0 393.2 393.6 394.1 394.4 394.7 395.1 395.5 395.8 395.9 396.0 396.0 395.9 396.1 396.2 396.1 396.2 396.0 395.5 394.5 392.2 388.6 384.4 379.4 375.4 373.6 374.6 375.5 376.5 377.5 378.5 379.4 380.4 381.4 382.4 383.4 384.4 385.4 386.3 387.3 388.3 389.3 390.4 391.4 392.4 393.2 394.3 395.7 396.6 397.1 397.8 398.2 398.3 398.7 398.7 398.6 398.5 398.4 398.4 398.4 398.5 398.6 398.6 398.8 399.0 399.0 399.0 399.1 398.9 398.8 399.0 398.8 398.5 398.1 397.7 397.1 396.7 396.3 395.4 394.6 393.7 392.4 390.9 389.0 387.2 384.9 382.4 380.4 378.8 377.0 375.3 374.2 373.7 373.8 373.8 373.6 373.5 373.0 372.4 372.2 372.0 371.7 372.1 373.3 373.9 374.2 374.6 374.7 374.7 374.8 374.9 374.8 374.9 374.9 374.9 374.9 374.9 374.8 375.0 375.2 375.2 375.3 375.5 375.3 375.4 375.6 375.5 375.6 375.9 375.8 375.7 375.9 375.9 375.9 376.0 375.9 375.9 376.0 375.9 375.9 376.0 375.9 375.9 376.0 375.9 375.9 376.0 375.9 375.9 376.0 375.9 375.8 375.9 375.9 375.8 375.9 375.9 375.8 375.9 375.8 375.7 375.9 375.8 375.7 375.9 375.8 375.7 375.9 375.9 375.8 375.9 375.9 375.9 376.0 375.9 375.9 376.1 376.1 376.0 376.1 376.0 375.9 376.0 375.8 375.7 375.8 375.5 375.5 375.6 375.5 375.5 375.6 375.5 375.5 375.6 375.5 375.5 375.5 375.4 375.4 375.4 375.3 375.2 375.2 375.1 375.1 375.2 374.8 374.6 374.8 374.8 375.0 375.4 375.6 375.7 375.9 375.8 375.7 375.9 375.7 375.3 374.9 374.4 373.3 371.9 370.0 367.2 364.3 361.5 359.9 358.3 357.8 358.5 359.1 359.8 360.5 361.2 361.8 362.5 363.2 363.9 364.6 365.3 365.9 366.6 367.3 368.0 368.7 369.4 370.1 370.8 371.5 372.2 372.9 373.6 374.3 375.0 375.7 376.4 377.1 377.8 378.5 379.2 379.0 378.2 377.7 377.2 376.8 376.7 376.8 376.6 376.1 376.0 375.9 375.7 375.7 375.8 375.7 375.6 375.5 375.3 374.9 374.5 373.6 373.0 372.4 371.7 371.1 370.3 369.2 367.2 364.2 360.9 356.4 352.2 348.0 343.0 337.9 333.5 329.9 328.1 328.7 329.2 329.8 330.3 330.9 331.4 331.9 332.5 333.0 333.6 334.1 334.7 335.3 335.8 336.4 336.9 335.9 335.1 335.4 335.4 335.0 334.8 334.8 334.6 334.5 334.6 334.6 334.7 334.7 334.6 334.7 334.7 334.6 334.7 334.6 334.4 334.5 334.5 334.3 334.3 334.3 334.1 334.2 334.1 334.1 334.3 334.3 334.3 334.5 334.6 334.7 334.9 334.9 335.0 335.3 335.2 335.1 335.3 335.2 335.1 335.3 335.2 335.1 335.3 335.2 335.1 335.3 335.2 335.1 335.3 335.2 335.1 335.3 335.2 335.1 335.3 335.2 335.1 335.3 335.2 335.1 335.3 335.2 335.1 335.3 335.2 335.1 335.3 335.2 335.1 335.3 335.2 335.1 335.3 335.2 335.1 335.3 335.3 335.2 335.3 335.2 335.1 335.3 335.2 335.1 335.3 335.2 335.1 335.2 335.1 335.0 335.1 335.0 334.8 334.8 334.6 334.4 334.5 334.4 334.3 334.4 334.3 334.3 334.4 334.4 334.5 334.7 334.7 334.7 334.9 334.9 335.0 335.3 335.2 335.0 335.0 334.8 334.5 334.4 334.3 334.2 334.2 334.1 334.0 333.9 333.7 333.5 333.0 332.8 332.5 332.2 331.9 331.5 331.2 330.1 328.6 326.5 323.9 321.5 320.5 321.1 321.7 322.3 322.9 323.5 324.1 324.7 325.3 326.0 326.6 327.2 327.8 328.4 329.0 329.7 330.3 330.9 331.5 332.1 332.8 333.4 334.0 334.3 334.7 335.4 335.4 335.1 335.4 335.3 335.0 335.1 335.0 334.7 334.6 334.6 334.6 334.7 334.8 335.0 335.2 335.3 335.6 335.7 335.7 335.9 335.9 335.7 335.5 335.1 334.7 334.4 334.0 333.6 333.2 332.6 331.6 330.7 329.7 328.6 327.5 326.7 325.8 325.0 324.3 323.2 321.8 320.9 319.8 318.9 318.2 317.6 317.0 317.0 316.8 316.8 316.9 317.3 317.8 317.8 317.6 317.4 317.1 317.2 317.3 317.1 317.0 317.1 317.0 316.9 317.1 317.0 316.9 317.1 316.9 316.7 316.6 316.2 316.0 316.0 315.8 315.7 315.7 315.3 315.2 315.4 315.4 315.5 315.9 316.1 316.4 316.7 316.8 317.2 317.4 317.5 317.6 317.7 317.6 317.5 317.6 317.4 317.4 317.5 317.4 317.4 317.5 317.4 317.4 317.5 317.4 317.4 317.6 317.4 317.4 317.5 317.4 317.4 317.5 317.4 317.4 317.5 317.4 317.4 317.5 317.4 317.4 317.5 317.4 317.4 317.5 317.4 317.4 317.5 317.4 317.4 317.5 317.4 317.4 317.5 317.4 317.3 317.3 317.2 317.2 317.3 317.2 317.2 317.3 317.2 317.2 317.3 317.2 317.2 317.3 317.2 317.2 317.3 317.2 317.2 317.3 317.2 317.2 317.3 317.2 317.2 317.3 317.2 317.2 317.3 317.2 317.2 317.3 317.2 317.2 317.3 317.2 317.2 317.3 317.2 317.2 317.3 317.2 317.2 317.3 317.2 317.2 317.3 317.2 317.2 317.4 317.4 317.4 317.5 317.4 317.4 317.5 317.4 317.4 317.5 317.4 317.4 317.5 317.4 317.3 317.4 317.3 317.2 317.3 317.2 317.1 317.0 316.7 316.6 316.7 316.7 316.7 316.9 316.8 316.7 316.7 316.7 316.6 316.7 316.7 316.6 316.5 316.4 316.3 316.3 316.4 316.5 316.6 316.5 316.4 316.4 316.4 316.3 316.2 316.2 316.0 315.7 315.7 315.7 315.7 315.7 315.7 315.7 315.9 316.0 316.2 316.7 317.0 317.3 317.7 317.9 318.3 318.6 318.9 319.0 319.0 319.1 319.1 319.2 319.2 319.0 319.0 318.9 318.1 317.7 317.7 317.7 317.7 317.7 317.7 317.7 317.7 317.7 317.7 317.7 317.7 317.7 317.7 317.7 317.7 317.7 317.7 317.7 317.7 317.7 317.7 317.7",
47
+ "input_type": "phoneme",
48
+ "offset": 51.764
49
+ },
50
+ {
51
+ "text": "SP 行 走 在 危 险 边 缘 肆 意 的 怪 盗 SP 自 由 的 跳 着 指 尖 上 的 舞 蹈 SP 轻 盈 的 像 暗 影 中 藏 伏 的 波 斯 猫 等 待 下 一 个 目 标 SP",
52
+ "ph_seq": "SP x ing z ou z ai w ei x ian b ian y van s i0 y i d e g uai d ao SP z i0 y ou d e t iao zh e zh ir j ian sh ang d e w u d ao SP q ing y ing d e x iang an y ing zh ong c ang f u d e b o s i0 m ao d eng d ai x ia y i g e m u u b iao SP",
53
+ "note_seq": "rest D5 D5 B4 B4 D5 D5 G5 G5 D5 D5 C5 C5 B4 B4 A#4 A#4 A4 A4 G4 G4 D4 D4 G4 G4 rest D5 D5 B4 B4 D5 D5 G5 G5 D5 D5 C5 C5 B4 B4 C5 C5 C5 C5 G5 G5 C5 C5 rest D5 D5 B4 B4 D5 D5 G5 G5 D5 C5 C5 B4 B4 A#4 A#4 A#4 A#4 A#4 A#4 A#4 A#4 A#4 A#4 G4 G4 D4 D4 G4 G4 F4 F4 G4 G4 A#4 A#4 C5 C5 C#5 D5 D5 rest",
54
+ "note_dur_seq": "0.6 0.136 0.136 0.137 0.137 0.545 0.545 0.546 0.546 0.2720001 0.2720001 0.273 0.273 0.273 0.273 0.2719998 0.2719998 0.546 0.546 0.5450001 0.5450001 0.2730002 0.2730002 0.4089999 0.4089999 0.1370001 0.1359997 0.1359997 0.1360002 0.1360002 0.546 0.546 0.5450001 0.5450001 0.2729998 0.2729998 0.2730002 0.2730002 0.2719998 0.2719998 0.546 0.546 0.2730002 0.2730002 0.5449996 0.5449996 0.6820002 0.6820002 0.1359997 0.1370001 0.1370001 0.1360006 0.1360006 0.5450001 0.5450001 0.5459995 0.5459995 0.2729998 0.2720003 0.2720003 0.2729998 0.2729998 0.3640003 0.3640003 0.1809998 0.1809998 0.3640003 0.3640003 0.1820002 0.1820002 0.3639994 0.3639994 0.1810007 0.1810007 0.3639994 0.3639994 0.1820002 0.1820002 0.4090004 0.4090004 0.4089994 0.4089994 0.2729998 0.2729998 0.2720003 0.2720003 0.5460005 0.8179989 0.8179989 0.5",
55
+ "is_slur_seq": "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0",
56
+ "ph_dur": "0.387498 0.212502 0.07009 0.06591 0.082457 0.054543 0.474542 0.070457 0.339182 0.206818 0.244725 0.027275 0.20709 0.06591 0.163907 0.109093 0.195866 0.076134 0.442591 0.103409 0.447275 0.097725 0.224134 0.048866 0.409 0.088134 0.048866 0.070089 0.06591 0.081458 0.054543 0.452815 0.093185 0.369997 0.175003 0.103681 0.169319 0.115049 0.157951 0.184498 0.087502 0.475543 0.070457 0.185499 0.087502 0.506364 0.038635 0.682 0.054182 0.081818 0.076774 0.060226 0.097365 0.038635 0.35409 0.19091 0.475542 0.273 0.070457 0.168591 0.103409 0.218457 0.054543 0.276499 0.087502 0.148048 0.032951 0.325365 0.038635 0.067231 0.114769 0.270814 0.093185 0.148049 0.032951 0.286729 0.077271 0.057 0.125 0.311275 0.097725 0.381724 0.027275 0.152547 0.120453 0.272 0.436908 0.109093 0.817999 0.5",
57
+ "f0_timestep": "0.005",
58
+ "f0_seq": "597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 597.9 596.0 595.2 593.7 593.4 592.8 591.4 591.1 589.4 588.3 588.3 586.1 585.0 583.9 580.6 577.6 573.9 570.6 565.6 556.0 549.7 543.5 537.4 531.3 525.3 519.4 513.6 507.8 501.0 501.9 501.6 501.2 500.5 500.2 500.1 499.6 498.9 498.5 497.1 494.8 492.5 487.8 483.2 476.1 469.3 464.6 459.2 456.3 463.8 471.3 479.0 486.9 494.8 502.9 511.1 519.5 527.9 536.6 545.3 552.2 550.9 550.5 551.8 554.3 555.6 557.3 557.6 559.2 559.6 560.4 561.5 562.0 561.5 562.6 562.2 562.4 563.5 563.7 563.1 564.7 564.4 566.4 568.3 570.1 571.5 574.2 575.3 577.2 580.0 581.6 581.6 584.3 584.3 586.3 588.1 590.0 591.4 592.8 592.9 593.2 593.5 593.5 593.5 593.5 593.5 593.5 593.4 593.1 593.1 593.1 593.1 593.1 592.8 592.4 592.5 592.8 592.4 592.4 592.4 592.3 591.8 591.9 591.1 590.5 590.8 591.1 590.9 591.8 591.7 591.5 592.1 591.7 591.4 591.6 591.0 590.4 590.6 590.0 589.4 589.6 589.0 588.7 589.0 588.8 588.0 587.7 587.2 586.3 586.0 586.1 585.3 597.7 606.6 619.7 629.9 639.4 651.2 660.0 673.2 683.2 692.8 706.5 716.4 733.0 746.8 759.5 776.4 779.0 782.6 784.9 787.2 789.2 790.4 791.0 791.7 791.7 790.4 790.4 789.7 789.0 789.0 788.9 788.5 789.0 789.1 789.4 789.5 789.9 789.1 789.5 789.9 789.9 789.9 790.7 790.4 790.4 791.7 791.7 792.0 793.0 792.6 792.4 793.6 793.4 792.9 794.0 793.9 793.1 794.1 794.2 793.1 794.0 794.0 793.3 794.0 794.0 792.8 793.6 792.9 792.3 792.6 792.6 792.2 793.0 792.6 792.6 793.1 793.1 792.8 793.5 793.1 793.2 793.6 793.6 793.6 794.0 793.6 793.6 793.5 792.9 791.7 790.0 788.1 785.0 783.0 780.4 776.1 768.4 760.7 753.1 745.5 738.1 730.7 723.4 716.2 709.0 701.9 694.9 688.0 681.1 674.3 667.5 660.9 654.3 647.7 641.2 634.8 628.5 622.2 616.0 609.8 603.7 597.7 591.7 587.0 587.3 589.1 589.4 590.6 591.5 592.1 592.5 592.8 593.1 593.0 592.4 593.0 592.4 592.9 593.5 593.8 594.3 594.8 594.8 594.8 594.5 593.8 593.5 592.6 591.6 590.7 590.0 588.3 587.9 588.6 588.3 589.1 589.7 589.8 589.0 588.7 587.0 585.3 583.5 580.9 577.2 574.5 572.6 566.3 560.1 553.9 547.9 541.9 535.9 530.0 524.2 518.5 512.8 507.4 508.5 510.7 513.6 514.9 518.0 520.4 522.6 525.3 526.6 527.9 528.2 528.4 528.1 528.1 528.0 527.5 527.5 528.0 527.5 527.9 528.4 528.1 528.2 528.7 528.8 529.1 529.3 529.4 529.6 529.9 529.9 529.9 529.8 529.3 528.8 528.9 528.1 527.2 527.2 525.7 524.7 524.5 523.4 522.3 519.5 517.3 514.9 512.6 511.6 508.5 506.4 505.1 501.9 500.5 498.8 497.4 496.2 497.7 497.9 498.5 498.5 498.5 498.8 498.8 498.9 499.6 499.3 499.7 500.2 500.0 500.5 500.8 500.5 500.8 500.7 500.2 500.2 499.8 499.3 498.7 498.3 497.6 496.3 495.3 493.0 490.4 488.5 484.2 481.2 478.9 477.2 476.7 476.2 475.7 475.2 474.6 474.1 473.6 473.1 472.6 472.1 471.5 471.0 470.5 470.0 469.5 469.0 468.5 468.0 467.5 466.9 466.4 468.1 468.3 469.7 469.9 469.9 469.4 469.4 469.4 469.1 468.9 468.5 468.1 467.8 467.8 467.8 468.1 468.1 468.4 468.6 468.6 468.6 468.9 468.9 468.9 469.1 469.1 469.1 469.4 469.1 468.9 468.9 468.9 468.3 467.9 467.2 466.5 465.4 464.2 462.5 461.9 460.4 459.7 458.7 456.1 453.9 452.0 450.3 448.5 446.4 444.9 442.8 441.8 440.3 441.4 441.8 441.8 442.0 442.5 442.6 443.1 443.3 443.1 443.4 443.8 443.6 443.9 444.1 443.8 443.9 444.3 444.1 444.3 444.8 444.6 444.6 445.1 444.9 445.0 445.3 445.1 445.2 445.4 445.3 445.2 445.4 445.3 445.1 445.3 445.1 445.1 445.3 445.1 445.2 445.3 445.1 445.2 445.4 445.1 445.2 445.4 445.3 445.1 445.6 445.3 445.1 445.3 445.1 444.9 444.8 444.3 443.9 444.1 443.6 443.6 443.6 442.8 442.8 443.0 442.6 443.1 442.7 439.5 437.3 433.1 430.0 425.0 422.6 421.7 420.9 420.1 419.3 418.5 417.7 416.8 416.0 415.2 414.4 413.6 412.8 412.0 411.2 410.4 409.6 408.8 408.0 407.2 406.4 405.6 404.8 404.0 403.3 402.5 401.7 400.9 400.1 399.4 398.6 397.8 397.0 396.3 395.6 395.6 395.8 395.4 395.2 395.2 395.2 395.0 395.4 395.4 395.6 395.6 395.6 395.4 395.6 395.4 395.6 395.6 395.4 395.4 395.4 395.2 395.5 395.6 395.4 395.7 395.9 395.7 395.9 396.1 395.9 396.1 396.3 396.1 396.3 396.5 396.3 396.6 396.6 396.6 396.6 396.6 396.3 396.3 396.3 396.3 396.3 396.3 396.1 396.3 396.3 396.3 396.3 396.3 396.3 396.3 396.1 396.0 395.8 395.4 395.1 394.5 393.5 391.3 390.0 385.9 383.7 380.8 379.4 378.0 377.8 377.1 375.0 372.9 370.9 368.8 366.8 364.8 362.7 360.7 358.7 356.7 354.8 352.8 350.9 348.9 347.0 345.1 343.1 341.2 339.4 337.5 335.6 333.7 331.9 330.1 328.2 326.4 324.6 322.8 321.0 319.2 317.5 315.7 314.0 312.2 310.5 308.8 307.1 305.4 303.4 300.5 301.1 301.5 301.7 300.9 300.4 300.0 299.9 299.5 299.7 300.0 299.6 299.9 300.2 299.9 300.0 299.9 299.7 299.7 299.7 299.1 299.3 299.1 299.1 298.9 299.5 299.0 298.1 297.4 293.6 291.0 285.9 282.5 280.6 285.1 289.6 294.3 299.0 303.8 308.7 313.6 318.7 323.8 329.0 334.2 339.6 345.0 350.6 356.2 361.9 367.7 373.6 379.6 385.7 390.1 389.5 387.9 385.9 384.6 384.1 383.7 383.9 384.1 384.8 385.1 385.7 386.4 386.4 385.7 386.3 385.9 387.0 387.8 388.4 388.4 389.5 389.5 390.4 391.5 392.8 393.1 394.5 394.6 395.0 395.3 395.6 395.3 395.6 395.2 395.2 395.4 395.4 395.5 395.9 395.9 395.9 396.1 396.3 396.3 396.6 396.6 396.6 396.6 396.6 396.6 396.8 396.6 396.9 397.2 397.8 398.2 398.9 399.0 398.8 399.2 398.4 398.2 399.3 402.8 406.3 409.8 413.3 416.9 420.5 424.2 427.8 431.5 435.3 439.0 442.9 446.7 450.6 454.5 458.4 462.4 466.4 470.4 474.5 478.6 482.7 486.9 491.1 495.4 499.7 504.0 508.4 512.8 517.2 521.7 526.2 530.8 535.4 540.0 544.7 549.4 554.2 559.0 563.8 568.7 573.6 578.6 583.6 588.0 589.4 590.4 590.4 591.5 591.8 592.1 591.4 591.4 592.0 591.7 591.4 591.6 590.7 590.5 590.7 590.4 589.1 581.3 569.7 560.6 553.1 542.3 534.6 523.8 514.8 507.2 497.4 491.9 493.7 494.0 494.5 495.0 494.7 494.1 493.5 491.6 487.9 485.1 478.8 473.6 468.9 463.3 459.7 468.3 477.0 485.8 494.9 504.0 513.4 522.9 532.6 542.5 552.6 563.2 573.4 575.9 577.2 578.9 580.2 581.8 582.6 584.2 583.6 584.1 583.6 583.6 583.9 583.9 583.0 583.2 582.6 583.7 584.3 585.2 585.1 586.0 585.7 586.3 587.3 588.4 588.7 589.5 590.0 590.5 590.8 591.4 591.5 591.8 591.8 591.8 591.8 592.1 592.1 592.4 592.4 592.4 592.4 592.4 592.4 592.5 592.8 592.4 592.4 592.8 592.8 592.8 592.7 592.4 592.5 592.7 592.4 592.4 592.4 592.1 592.1 592.1 592.1 592.1 592.1 592.1 592.1 592.1 592.1 592.4 592.4 592.4 592.4 592.1 591.8 590.6 589.7 585.3 581.9 575.8 570.7 568.0 573.4 578.8 584.4 589.9 595.5 601.2 606.9 612.7 618.5 624.4 630.4 636.4 642.4 648.5 654.7 660.9 667.2 673.6 680.0 686.5 693.0 699.6 706.2 713.0 719.8 726.6 733.5 740.5 747.6 754.7 759.5 761.7 766.0 767.9 770.1 772.2 773.6 772.6 773.5 772.8 772.5 773.2 773.3 772.2 773.6 773.8 774.5 777.0 778.1 778.1 781.3 781.3 783.2 785.7 787.2 787.5 789.0 789.4 789.6 790.4 791.2 790.8 792.1 791.7 791.7 792.3 792.6 792.8 793.6 793.6 793.5 793.1 793.0 792.7 793.1 792.6 792.2 792.2 792.1 791.7 792.1 791.7 792.2 792.2 792.2 792.3 792.6 792.3 792.6 792.6 792.2 792.2 792.1 791.7 791.3 791.3 791.3 791.3 791.3 791.3 790.9 791.3 790.7 790.3 789.9 789.4 789.4 789.4 789.0 789.0 786.0 784.4 777.8 771.6 765.6 757.1 751.6 742.7 733.9 725.2 716.6 708.2 699.8 691.5 683.3 675.2 667.2 659.3 651.5 643.8 636.2 628.6 621.2 613.8 606.6 599.4 592.3 586.7 586.8 587.3 588.9 589.7 591.2 591.9 592.4 593.2 593.5 593.2 593.5 593.5 593.2 593.5 593.3 592.8 592.8 592.7 592.4 592.8 592.8 592.8 593.1 592.8 592.1 591.9 590.7 588.4 585.3 579.1 573.4 567.7 561.2 556.6 554.7 552.9 551.0 549.1 547.3 545.4 543.6 541.8 539.9 538.1 536.3 534.5 532.7 530.9 529.1 527.3 525.5 523.8 522.0 520.2 520.4 521.2 521.4 523.4 524.2 524.6 525.5 526.0 526.3 526.6 526.8 526.6 526.6 526.8 526.6 527.0 527.2 527.2 527.2 527.2 527.2 527.2 527.5 527.5 527.5 527.8 527.7 527.2 526.0 523.3 519.8 514.6 510.1 502.5 497.9 493.3 493.2 493.0 492.9 492.7 492.6 492.5 492.3 492.2 492.1 491.9 491.8 491.7 491.5 491.4 491.3 491.1 491.0 490.8 490.7 490.6 490.7 491.6 493.5 494.2 495.8 496.8 497.3 498.6 499.0 499.3 499.3 499.3 499.2 498.8 499.0 499.0 499.0 498.8 498.8 498.4 498.2 498.2 497.3 497.3 496.9 496.4 496.2 495.7 495.0 492.6 490.8 489.6 486.1 485.4 487.4 489.4 491.5 493.5 495.5 497.6 499.7 501.7 503.8 505.9 508.0 510.1 512.2 514.3 516.5 518.6 520.8 522.9 525.1 527.3 529.2 530.0 530.6 530.7 531.2 530.2 529.7 528.4 527.8 527.5 526.8 526.3 526.6 526.1 525.4 525.9 525.6 525.4 526.3 526.3 525.9 526.8 526.6 527.3 528.1 529.0 529.2 530.2 530.2 530.2 530.7 531.1 530.9 531.5 531.5 531.2 531.5 531.8 531.8 532.1 532.1 532.1 532.1 532.1 532.1 532.1 532.0 531.8 531.8 531.8 531.5 531.5 531.5 531.5 531.5 531.5 531.5 531.5 531.4 531.2 531.4 531.2 531.2 530.9 530.9 530.9 530.8 530.6 530.5 530.2 529.9 529.5 529.0 528.3 527.8 527.3 526.5 526.3 526.5 526.3 525.7 526.0 526.0 526.4 526.9 528.1 528.0 527.9 527.8 527.7 527.6 527.5 527.4 527.3 527.3 527.2 527.1 527.0 526.9 526.8 526.7 526.6 526.5 526.4 526.3 526.2 526.1 526.1 526.0 526.3 526.9 526.8 526.6 526.6 526.9 526.6 526.9 527.2 526.9 526.9 526.8 526.6 526.6 526.6 526.6 526.6 526.8 526.6 526.6 526.9 526.6 526.6 526.6 526.5 526.3 526.2 525.9 525.7 525.6 525.4 524.9 524.4 523.9 521.8 520.8 519.2 517.4 516.6 516.5 516.0 535.7 553.0 567.0 587.5 603.8 625.6 643.9 661.5 683.9 700.9 728.8 750.6 769.6 772.1 771.0 771.1 771.9 771.9 771.3 772.8 770.3 769.7 770.1 769.5 768.7 770.0 767.9 767.9 768.5 769.6 771.3 774.5 774.5 777.2 779.0 780.8 783.2 785.8 786.4 787.2 788.4 788.1 788.1 788.9 788.5 788.6 788.9 788.5 788.2 788.5 788.1 788.2 788.5 788.1 788.1 788.4 788.0 787.6 788.4 787.6 787.7 788.0 787.6 787.3 788.1 787.1 786.8 787.2 786.7 786.7 787.5 787.2 787.2 787.6 787.6 787.8 788.5 788.5 788.6 789.0 788.5 788.6 789.0 789.0 789.0 789.0 788.9 788.5 788.6 789.0 788.4 787.6 787.6 783.6 781.7 774.6 768.4 762.1 754.4 750.8 739.7 728.9 718.2 707.6 697.2 687.0 676.9 667.0 657.2 647.5 638.0 628.6 619.4 610.3 601.4 592.5 583.8 575.3 566.8 558.5 550.3 542.2 534.2 524.3 525.1 525.9 525.7 526.0 526.0 526.3 526.9 526.9 527.2 527.5 527.5 527.8 527.8 527.5 527.8 527.8 527.9 528.2 528.4 528.5 529.0 529.0 529.0 529.0 529.0 529.0 529.3 529.0 529.0 529.3 529.3 529.1 529.3 529.3 529.1 529.3 529.0 529.0 529.0 529.0 528.7 529.0 528.7 528.7 528.7 528.7 528.5 528.7 528.4 528.5 528.7 528.4 528.5 528.7 528.4 528.4 528.7 528.4 528.4 528.7 528.4 528.5 528.7 528.4 528.5 528.7 528.7 528.8 529.0 528.7 528.7 529.0 529.0 528.7 529.0 529.0 528.8 529.0 528.7 528.8 529.0 528.7 528.7 528.7 528.5 528.7 529.0 528.8 529.0 529.3 529.0 529.3 529.3 529.0 529.0 529.0 528.8 529.1 529.3 528.8 529.3 529.6 529.7 529.9 530.5 530.2 530.7 531.2 531.2 531.6 532.4 532.7 531.6 532.4 532.3 533.5 534.7 535.9 537.1 538.3 539.5 540.7 541.9 543.2 544.4 545.6 546.8 548.0 549.3 550.5 551.7 553.0 554.2 555.4 556.7 557.9 559.2 560.4 561.7 562.9 564.2 565.5 566.7 568.0 569.3 570.5 571.8 573.1 574.4 575.7 577.0 578.3 579.6 580.9 582.2 583.5 584.8 586.1 587.4 588.7 590.0 591.2 591.9 592.4 593.3 594.2 593.9 594.2 594.2 594.1 593.8 593.7 593.0 592.4 592.0 591.4 591.0 590.6 590.0 578.6 570.6 559.6 551.0 543.9 533.3 525.7 514.8 506.0 498.5 498.0 497.3 496.9 496.0 495.3 494.3 493.6 490.6 488.3 486.0 481.8 479.3 475.7 473.2 471.6 479.4 487.4 495.6 503.8 512.2 520.7 529.4 538.2 547.2 556.3 565.6 574.9 578.1 579.1 580.2 581.5 582.9 583.9 585.1 584.3 585.2 584.6 585.0 585.4 585.6 585.1 585.6 585.1 585.7 586.3 587.3 587.3 588.4 588.7 589.0 589.9 590.7 591.2 591.8 591.8 592.2 592.4 592.8 592.8 592.8 593.1 592.8 593.1 593.1 593.1 593.2 593.5 593.1 593.2 593.5 593.2 593.5 593.4 593.2 593.5 593.5 593.5 593.5 593.4 593.1 593.2 593.5 593.5 593.5 593.5 593.8 593.8 593.9 594.2 594.2 593.9 594.2 594.0 593.6 594.2 593.6 592.4 591.6 590.4 588.7 585.6 583.9 580.4 579.6 584.7 589.8 595.0 600.3 605.6 610.9 616.3 621.7 627.2 632.7 638.3 643.9 649.6 655.3 661.0 666.9 672.7 678.7 684.6 690.7 696.7 702.9 709.1 715.3 721.6 728.0 734.4 740.8 747.4 753.9 760.6 767.3 774.0 780.0 782.6 784.4 785.7 787.2 788.4 789.7 791.3 791.5 792.6 793.0 792.6 792.6 792.5 791.7 792.1 791.7 791.3 792.2 792.2 792.0 793.0 792.6 792.8 793.6 793.8 793.3 794.5 793.9 793.1 793.6 793.4 792.6 793.4 792.6 792.7 793.3 794.0 793.8 794.9 794.8 794.6 794.9 794.5 794.5 794.4 794.0 794.0 794.5 794.5 794.6 794.9 794.9 795.1 795.9 795.5 795.9 796.3 796.4 796.8 797.6 797.3 797.7 797.7 797.7 797.6 797.2 796.8 796.6 795.9 794.4 793.9 793.6 792.6 792.2 791.6 791.0 789.9 787.3 785.8 781.2 776.6 773.6 765.0 756.5 748.1 739.8 731.6 723.5 715.4 707.5 699.6 691.9 684.2 676.6 669.1 661.6 654.3 647.0 639.8 632.7 625.7 618.7 611.9 605.1 597.7 588.3 581.3 573.7 576.2 578.6 582.0 583.9 585.7 587.7 589.7 589.9 591.1 591.8 591.8 592.5 592.8 592.8 592.8 592.8 592.8 592.8 593.1 593.1 593.1 593.2 593.5 593.5 593.6 594.2 594.1 593.9 594.2 594.1 593.8 593.8 593.7 593.1 592.4 592.4 591.5 590.4 590.4 589.2 588.3 581.9 577.0 572.6 567.6 564.1 558.6 554.3 550.9 545.0 541.1 535.5 531.0 527.2 527.6 527.8 527.9 528.2 528.4 528.2 528.7 528.7 528.4 528.4 528.1 527.8 527.7 527.5 527.5 527.5 527.5 526.9 526.9 526.9 526.6 526.9 526.6 526.6 526.9 526.9 526.9 527.5 527.6 527.8 527.8 527.5 527.1 526.2 524.5 522.6 521.1 516.6 513.9 511.9 508.2 507.4 506.7 505.9 505.1 504.4 503.6 502.9 502.1 501.3 500.6 499.8 499.1 498.5 498.5 499.0 499.0 499.4 499.9 499.9 500.2 499.9 499.8 499.3 499.0 499.0 498.8 498.8 498.8 498.8 498.5 498.8 498.5 498.5 498.5 498.4 498.2 497.5 497.2 496.5 496.4 495.9 495.0 494.9 494.2 492.6 491.9 489.3 486.7 484.8 482.5 480.7 477.6 473.2 469.0 464.7 460.5 456.3 452.2 448.1 444.0 440.0 436.0 432.1 428.2 424.3 420.5 416.7 414.1 416.3 418.6 421.6 425.1 428.1 430.9 433.1 433.7 435.3 434.7 434.7 435.4 434.9 435.6 437.3 437.5 439.7 442.0 443.9 446.0 449.2 451.2 453.4 457.8 460.1 461.9 465.6 467.0 469.1 470.4 471.3 471.9 472.4 472.7 472.7 472.7 472.9 472.9 472.7 472.7 472.7 471.5 471.0 468.9 466.6 464.8 460.6 458.4 454.7 452.5 453.3 454.1 454.9 455.7 456.5 457.3 458.0 458.8 459.6 460.4 461.2 462.0 462.8 463.6 464.4 465.2 466.1 466.9 467.7 468.5 469.3 470.1 470.8 470.1 469.7 469.9 470.5 470.5 470.4 469.9 469.7 469.3 468.6 468.3 468.2 467.8 467.8 468.1 467.5 465.9 464.8 460.4 456.8 450.1 445.3 441.0 437.1 440.2 443.3 446.4 449.6 452.8 456.0 459.2 462.5 465.8 469.1 472.4 473.8 473.7 473.2 472.4 472.4 472.0 471.6 471.6 471.5 471.0 470.7 470.4 469.9 469.9 469.9 469.7 469.7 469.9 470.2 470.2 470.2 470.2 470.2 470.5 470.5 470.5 470.7 470.5 470.5 470.8 470.8 470.8 471.0 470.8 470.8 471.0 471.0 470.8 470.8 470.8 470.5 470.5 470.5 470.4 470.2 469.9 469.4 467.8 465.8 462.9 458.9 454.9 451.6 448.1 446.1 447.6 449.1 450.5 452.0 453.5 455.0 456.5 458.0 459.5 461.0 462.5 464.0 465.5 467.0 468.5 470.1 471.4 471.9 472.0 471.3 471.6 471.9 472.1 471.9 472.1 472.3 471.8 471.6 470.7 470.2 468.1 465.4 463.2 457.1 452.1 443.6 437.8 432.9 429.2 430.3 431.4 432.6 433.7 434.9 436.0 437.2 438.3 439.5 440.6 441.8 443.0 444.1 445.3 446.5 447.6 448.8 449.8 449.8 449.9 450.8 451.6 451.8 452.9 454.0 454.5 455.6 456.1 456.6 457.1 457.4 458.3 459.0 459.2 460.1 460.8 461.2 462.1 462.9 463.6 464.0 465.4 465.7 465.9 466.9 466.7 467.1 467.5 467.5 467.6 468.1 468.1 468.4 468.6 468.3 468.3 468.2 467.5 467.2 466.5 465.6 464.7 464.0 462.7 460.9 460.0 457.2 455.1 453.2 449.3 444.3 437.3 432.9 430.0 425.5 423.1 420.7 417.5 415.1 412.5 409.4 405.3 403.7 401.2 398.8 398.4 397.0 396.0 395.4 396.9 397.7 398.2 398.2 398.4 398.4 398.4 398.0 397.1 396.3 393.6 391.5 388.6 386.0 381.9 377.4 372.4 364.3 357.8 353.9 348.6 346.4 342.8 339.3 335.8 332.3 328.9 325.5 322.1 318.8 315.5 312.3 309.0 305.8 302.7 300.4 301.2 302.0 301.6 301.1 300.3 300.0 299.0 298.4 298.1 298.1 297.8 297.9 297.9 297.8 297.8 297.9 297.8 297.8 297.9 297.8 297.8 297.6 297.6 297.6 297.6 297.8 297.9 297.9 297.9 298.3 298.1 298.1 297.9 297.8 297.4 297.2 296.9 296.8 296.9 296.8 296.9 297.2 297.0 297.2 297.4 297.2 297.9 297.9 297.9 298.1 297.4 296.7 295.7 294.9 293.4 291.8 291.0 291.6 298.1 304.8 311.6 318.5 325.6 332.9 340.4 348.0 355.7 363.7 371.8 380.1 388.6 396.9 397.5 397.7 397.5 397.5 397.5 397.7 397.7 397.7 397.7 397.3 396.7 396.6 395.7 394.7 393.1 391.7 389.1 384.3 380.4 374.6 371.4 370.9 370.5 370.0 369.6 369.1 368.7 368.2 367.8 367.3 366.9 366.4 366.0 365.5 365.1 364.7 364.2 363.8 363.4 364.2 363.7 362.8 362.3 361.8 361.1 360.8 360.1 359.2 359.0 358.4 358.2 358.0 357.4 357.2 356.9 356.8 356.8 356.9 356.8 356.7 356.6 356.6 356.5 356.4 356.1 355.9 355.9 355.8 355.9 355.8 355.9 355.9 355.8 355.9 356.1 356.0 356.2 356.1 355.9 355.9 355.9 355.9 355.9 355.9 356.0 356.1 355.9 355.9 355.5 355.3 355.1 354.7 354.9 354.7 354.7 354.7 354.7 354.4 353.7 354.3 354.9 355.5 356.0 356.6 357.2 357.8 358.4 359.0 359.6 360.2 360.8 361.4 362.0 362.6 366.5 370.9 374.3 376.4 378.0 380.7 382.6 385.0 387.4 389.3 390.6 391.7 392.4 393.4 393.6 394.5 394.5 394.5 394.8 395.2 395.5 395.9 396.1 396.4 396.6 397.1 397.3 397.7 397.7 397.9 398.2 398.4 398.4 398.6 398.6 398.6 398.6 398.6 398.6 398.6 398.4 398.4 398.6 398.4 398.4 398.3 398.1 397.9 397.9 397.7 397.5 397.4 397.0 397.0 396.8 396.5 396.3 396.1 395.4 394.0 392.4 389.9 387.3 383.9 381.3 377.9 377.5 381.6 385.7 389.9 394.1 398.4 402.7 407.1 411.5 415.9 420.4 425.0 429.6 434.2 438.9 443.7 448.5 453.4 458.3 463.2 467.3 467.5 468.1 468.1 469.0 469.5 469.7 470.0 470.5 470.5 470.5 470.5 470.7 470.5 470.8 470.8 471.0 470.8 471.0 471.3 471.4 471.9 472.1 472.1 472.4 472.4 472.4 472.4 472.4 472.0 471.2 470.5 469.0 468.1 466.5 464.8 465.9 466.8 468.6 471.9 474.9 477.6 482.3 485.1 488.4 492.1 494.5 497.4 500.5 503.8 506.0 508.9 513.4 516.3 521.6 523.4 523.9 524.2 524.2 524.4 522.9 522.9 521.0 520.2 519.6 519.6 519.3 520.2 520.2 521.0 521.9 522.6 523.8 525.1 525.7 526.1 526.9 527.5 527.5 528.2 528.6 528.1 528.5 529.0 528.5 528.8 529.0 528.7 528.7 529.0 528.7 528.7 529.0 528.7 528.8 529.0 529.1 529.4 529.8 530.6 531.5 532.4 533.7 534.9 536.7 538.2 539.6 541.4 543.0 545.0 546.6 548.1 549.9 551.5 553.1 554.5 555.6 556.9 557.8 558.8 559.3 559.6 559.9 560.2 560.2 560.4 560.2 560.2 560.5 560.2 560.2 560.5 560.2 560.2 560.4 560.2 560.2 560.4 560.2 560.2 560.4 560.2 560.2 560.5 560.2 560.2 560.5 560.2 560.5 560.7 560.2 560.5 560.7 560.2 560.5 560.7 560.2 560.5 560.8 560.2 560.2 560.5 559.9 560.2 560.1 559.8 559.8 560.1 559.8 559.9 560.1 559.8 560.2 560.2 559.9 560.2 560.2 559.8 559.8 560.3 559.6 559.8 559.6 558.2 556.4 552.1 548.9 542.1 537.7 534.2 535.4 536.7 537.9 539.1 540.4 541.6 542.9 544.1 545.4 546.7 547.9 549.2 550.5 551.7 553.0 554.3 555.6 556.9 558.2 559.4 560.7 562.0 563.3 564.6 565.9 567.4 569.6 570.4 572.9 576.5 579.5 582.6 586.2 589.0 591.0 592.3 593.5 593.5 593.5 593.4 593.2 593.8 593.8 593.8 594.6 594.8 594.8 595.5 595.5 595.5 595.5 595.2 595.2 595.2 594.5 594.4 594.2 593.8 593.8 593.8 593.9 594.2 594.1 593.8 594.2 594.1 593.8 594.2 594.2 593.9 594.2 594.2 593.9 594.2 594.4 593.9 594.2 594.4 593.8 594.2 594.4 593.8 594.2 594.5 593.9 594.2 594.5 593.9 594.2 594.4 593.9 594.2 594.1 593.8 594.2 594.1 593.8 594.2 594.2 593.8 593.9 594.2 593.8 593.8 594.1 593.8 593.8 594.1 593.8 593.9 594.1 593.8 593.9 594.2 593.8 593.9 594.2 593.8 593.8 594.1 593.8 593.8 594.1 593.8 593.9 594.1 593.8 593.9 594.2 593.5 593.9 594.2 593.5 593.5 593.7 593.2 593.5 593.4 592.8 593.1 593.0 592.4 592.8 593.1 592.5 592.8 593.1 592.6 593.1 593.1 592.8 593.1 593.4 592.8 593.2 593.4 592.8 593.2 593.5 592.9 593.2 593.5 592.8 593.1 593.3 592.9 593.5 593.8 593.5 594.2 594.5 594.5 596.3 596.6 597.9 599.5 600.4 601.3 602.4 603.6 604.0 604.5 604.9 604.9 604.3 605.0 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6 605.6",
59
+ "input_type": "phoneme",
60
+ "offset": 72.491
61
+ }
62
+ ]
modules/attentions.py ADDED
@@ -0,0 +1,397 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import copy
2
+ import math
3
+ import numpy as np
4
+ import torch
5
+ from torch import nn
6
+ from torch.nn import functional as F
7
+
8
+ import modules.commons as commons
9
+
10
+
11
+ class LayerNorm(nn.Module):
12
+ def __init__(self, channels, eps=1e-5):
13
+ super().__init__()
14
+ self.channels = channels
15
+ self.eps = eps
16
+
17
+ self.gamma = nn.Parameter(torch.ones(channels))
18
+ self.beta = nn.Parameter(torch.zeros(channels))
19
+
20
+ def forward(self, x):
21
+ x = x.transpose(1, -1)
22
+ x = F.layer_norm(x, (self.channels,), self.gamma, self.beta, self.eps)
23
+ return x.transpose(1, -1)
24
+
25
+
26
+ class Encoder(nn.Module):
27
+ def __init__(self, hidden_channels, filter_channels, n_heads, n_layers, kernel_size=1, p_dropout=0., window_size=4, **kwargs):
28
+ super().__init__()
29
+ self.hidden_channels = hidden_channels
30
+ self.filter_channels = filter_channels
31
+ self.n_heads = n_heads
32
+ self.n_layers = n_layers
33
+ self.kernel_size = kernel_size
34
+ self.p_dropout = p_dropout
35
+ self.window_size = window_size
36
+
37
+ self.drop = nn.Dropout(p_dropout)
38
+ self.attn_layers = nn.ModuleList()
39
+ self.norm_layers_1 = nn.ModuleList()
40
+ self.ffn_layers = nn.ModuleList()
41
+ self.norm_layers_2 = nn.ModuleList()
42
+ for i in range(self.n_layers):
43
+ self.attn_layers.append(MultiHeadAttention(hidden_channels, hidden_channels, n_heads, p_dropout=p_dropout, window_size=window_size))
44
+ self.norm_layers_1.append(LayerNorm(hidden_channels))
45
+ self.ffn_layers.append(FFN(hidden_channels, hidden_channels, filter_channels, kernel_size, p_dropout=p_dropout))
46
+ self.norm_layers_2.append(LayerNorm(hidden_channels))
47
+
48
+ def forward(self, x, x_mask):
49
+ attn_mask = x_mask.unsqueeze(2) * x_mask.unsqueeze(-1)
50
+ x = x * x_mask
51
+ for i in range(self.n_layers):
52
+ y = self.attn_layers[i](x, x, attn_mask)
53
+ y = self.drop(y)
54
+ x = self.norm_layers_1[i](x + y)
55
+
56
+ y = self.ffn_layers[i](x, x_mask)
57
+ y = self.drop(y)
58
+ x = self.norm_layers_2[i](x + y)
59
+ x = x * x_mask
60
+ return x
61
+
62
+ class Decoder(nn.Module):
63
+ def __init__(self, hidden_channels, filter_channels, n_heads, n_layers, kernel_size=1, p_dropout=0., proximal_bias=False, proximal_init=True, **kwargs):
64
+ super().__init__()
65
+ self.hidden_channels = hidden_channels
66
+ self.filter_channels = filter_channels
67
+ self.n_heads = n_heads
68
+ self.n_layers = n_layers
69
+ self.kernel_size = kernel_size
70
+ self.p_dropout = p_dropout
71
+ self.proximal_bias = proximal_bias
72
+ self.proximal_init = proximal_init
73
+
74
+ self.drop = nn.Dropout(p_dropout)
75
+ self.self_attn_layers = nn.ModuleList()
76
+ self.norm_layers_0 = nn.ModuleList()
77
+ self.encdec_attn_layers = nn.ModuleList()
78
+ self.norm_layers_1 = nn.ModuleList()
79
+ self.ffn_layers = nn.ModuleList()
80
+ self.norm_layers_2 = nn.ModuleList()
81
+ for i in range(self.n_layers):
82
+ self.self_attn_layers.append(MultiHeadAttention(hidden_channels, hidden_channels, n_heads, p_dropout=p_dropout, proximal_bias=proximal_bias, proximal_init=proximal_init))
83
+ self.norm_layers_0.append(LayerNorm(hidden_channels))
84
+ self.encdec_attn_layers.append(MultiHeadAttention(hidden_channels, hidden_channels, n_heads, p_dropout=p_dropout))
85
+ self.norm_layers_1.append(LayerNorm(hidden_channels))
86
+ self.ffn_layers.append(FFN(hidden_channels, hidden_channels, filter_channels, kernel_size, p_dropout=p_dropout, causal=True))
87
+ self.norm_layers_2.append(LayerNorm(hidden_channels))
88
+
89
+ def forward(self, x, x_mask, h, h_mask):
90
+ """
91
+ x: decoder input
92
+ h: encoder output
93
+ """
94
+ self_attn_mask = commons.subsequent_mask(x_mask.size(2)).to(device=x.device, dtype=x.dtype)
95
+ encdec_attn_mask = h_mask.unsqueeze(2) * x_mask.unsqueeze(-1)
96
+ x = x * x_mask
97
+ for i in range(self.n_layers):
98
+ y = self.self_attn_layers[i](x, x, self_attn_mask)
99
+ y = self.drop(y)
100
+ x = self.norm_layers_0[i](x + y)
101
+
102
+ y = self.encdec_attn_layers[i](x, h, encdec_attn_mask)
103
+ y = self.drop(y)
104
+ x = self.norm_layers_1[i](x + y)
105
+
106
+ y = self.ffn_layers[i](x, x_mask)
107
+ y = self.drop(y)
108
+ x = self.norm_layers_2[i](x + y)
109
+ x = x * x_mask
110
+ return x
111
+
112
+ class FFT(nn.Module):
113
+ def __init__(self, hidden_channels, filter_channels, n_heads, n_layers=1, kernel_size=1, p_dropout=0., proximal_bias=False, proximal_init=True, **kwargs):
114
+ super().__init__()
115
+ self.hidden_channels = hidden_channels
116
+ self.filter_channels = filter_channels
117
+ self.n_heads = n_heads
118
+ self.n_layers = n_layers
119
+ self.kernel_size = kernel_size
120
+ self.p_dropout = p_dropout
121
+ self.proximal_bias = proximal_bias
122
+ self.proximal_init = proximal_init
123
+
124
+ self.drop = nn.Dropout(p_dropout)
125
+ self.self_attn_layers = nn.ModuleList()
126
+ self.norm_layers_0 = nn.ModuleList()
127
+ self.ffn_layers = nn.ModuleList()
128
+ self.norm_layers_1 = nn.ModuleList()
129
+ for i in range(self.n_layers):
130
+ self.self_attn_layers.append(MultiHeadAttention(hidden_channels, hidden_channels, n_heads, p_dropout=p_dropout, proximal_bias=proximal_bias, proximal_init=proximal_init))
131
+ self.norm_layers_0.append(LayerNorm(hidden_channels))
132
+ self.ffn_layers.append(FFN(hidden_channels, hidden_channels, filter_channels, kernel_size, p_dropout=p_dropout, causal=True))
133
+ self.norm_layers_1.append(LayerNorm(hidden_channels))
134
+
135
+ def forward(self, x, x_mask):
136
+ """
137
+ x: decoder input
138
+ h: encoder output
139
+ """
140
+ self_attn_mask = commons.subsequent_mask(x_mask.size(2)).to(device=x.device, dtype=x.dtype)
141
+ x = x * x_mask
142
+ for i in range(self.n_layers):
143
+ y = self.self_attn_layers[i](x, x, self_attn_mask)
144
+ y = self.drop(y)
145
+ x = self.norm_layers_0[i](x + y)
146
+
147
+ y = self.ffn_layers[i](x, x_mask)
148
+ y = self.drop(y)
149
+ x = self.norm_layers_1[i](x + y)
150
+ x = x * x_mask
151
+ return x
152
+
153
+
154
+ class FFNs(nn.Module):
155
+ def __init__(self, hidden_channels, filter_channels, n_heads, n_layers=1, kernel_size=1, p_dropout=0., proximal_bias=False, proximal_init=True, **kwargs):
156
+ super().__init__()
157
+ self.hidden_channels = hidden_channels
158
+ self.filter_channels = filter_channels
159
+ self.n_heads = n_heads
160
+ self.n_layers = n_layers
161
+ self.kernel_size = kernel_size
162
+ self.p_dropout = p_dropout
163
+ self.proximal_bias = proximal_bias
164
+ self.proximal_init = proximal_init
165
+
166
+ self.drop = nn.Dropout(p_dropout)
167
+ #self.self_attn_layers = nn.ModuleList()
168
+ #self.norm_layers_0 = nn.ModuleList()
169
+ self.ffn_layers = nn.ModuleList()
170
+ self.norm_layers_1 = nn.ModuleList()
171
+ for i in range(self.n_layers):
172
+ #self.self_attn_layers.append(MultiHeadAttention(hidden_channels, hidden_channels, n_heads, p_dropout=p_dropout, proximal_bias=proximal_bias, proximal_init=proximal_init))
173
+ #self.norm_layers_0.append(LayerNorm(hidden_channels))
174
+ self.ffn_layers.append(FFN(hidden_channels, hidden_channels, filter_channels, kernel_size, p_dropout=p_dropout, causal=True))
175
+ self.norm_layers_1.append(LayerNorm(hidden_channels))
176
+
177
+ def forward(self, x, x_mask):
178
+ """
179
+ x: decoder input
180
+ h: encoder output
181
+ """
182
+ self_attn_mask = commons.subsequent_mask(x_mask.size(2)).to(device=x.device, dtype=x.dtype)
183
+ x = x * x_mask
184
+ for i in range(self.n_layers):
185
+ #y = self.self_attn_layers[i](x, x, self_attn_mask)
186
+ #y = self.drop(y)
187
+ #x = self.norm_layers_0[i](x + y)
188
+
189
+ y = self.ffn_layers[i](x, x_mask)
190
+ y = self.drop(y)
191
+ x = self.norm_layers_1[i](x + y)
192
+ x = x * x_mask
193
+ return x
194
+
195
+ class MultiHeadAttention(nn.Module):
196
+ def __init__(self, channels, out_channels, n_heads, p_dropout=0., window_size=None, heads_share=True, block_length=None, proximal_bias=False, proximal_init=False):
197
+ super().__init__()
198
+ assert channels % n_heads == 0
199
+
200
+ self.channels = channels
201
+ self.out_channels = out_channels
202
+ self.n_heads = n_heads
203
+ self.p_dropout = p_dropout
204
+ self.window_size = window_size
205
+ self.heads_share = heads_share
206
+ self.block_length = block_length
207
+ self.proximal_bias = proximal_bias
208
+ self.proximal_init = proximal_init
209
+ self.attn = None
210
+
211
+ self.k_channels = channels // n_heads
212
+ self.conv_q = nn.Conv1d(channels, channels, 1)
213
+ self.conv_k = nn.Conv1d(channels, channels, 1)
214
+ self.conv_v = nn.Conv1d(channels, channels, 1)
215
+ self.conv_o = nn.Conv1d(channels, out_channels, 1)
216
+ self.drop = nn.Dropout(p_dropout)
217
+
218
+ if window_size is not None:
219
+ n_heads_rel = 1 if heads_share else n_heads
220
+ rel_stddev = self.k_channels**-0.5
221
+ self.emb_rel_k = nn.Parameter(torch.randn(n_heads_rel, window_size * 2 + 1, self.k_channels) * rel_stddev)
222
+ self.emb_rel_v = nn.Parameter(torch.randn(n_heads_rel, window_size * 2 + 1, self.k_channels) * rel_stddev)
223
+
224
+ nn.init.xavier_uniform_(self.conv_q.weight)
225
+ nn.init.xavier_uniform_(self.conv_k.weight)
226
+ nn.init.xavier_uniform_(self.conv_v.weight)
227
+ if proximal_init:
228
+ with torch.no_grad():
229
+ self.conv_k.weight.copy_(self.conv_q.weight)
230
+ self.conv_k.bias.copy_(self.conv_q.bias)
231
+
232
+ def forward(self, x, c, attn_mask=None):
233
+ q = self.conv_q(x)
234
+ k = self.conv_k(c)
235
+ v = self.conv_v(c)
236
+
237
+ x, self.attn = self.attention(q, k, v, mask=attn_mask)
238
+
239
+ x = self.conv_o(x)
240
+ return x
241
+
242
+ def attention(self, query, key, value, mask=None):
243
+ # reshape [b, d, t] -> [b, n_h, t, d_k]
244
+ b, d, t_s, t_t = (*key.size(), query.size(2))
245
+ query = query.view(b, self.n_heads, self.k_channels, t_t).transpose(2, 3)
246
+ key = key.view(b, self.n_heads, self.k_channels, t_s).transpose(2, 3)
247
+ value = value.view(b, self.n_heads, self.k_channels, t_s).transpose(2, 3)
248
+
249
+ scores = torch.matmul(query / math.sqrt(self.k_channels), key.transpose(-2, -1))
250
+ if self.window_size is not None:
251
+ assert t_s == t_t, "Relative attention is only available for self-attention."
252
+ key_relative_embeddings = self._get_relative_embeddings(self.emb_rel_k, t_s)
253
+ rel_logits = self._matmul_with_relative_keys(query /math.sqrt(self.k_channels), key_relative_embeddings)
254
+ scores_local = self._relative_position_to_absolute_position(rel_logits)
255
+ scores = scores + scores_local
256
+ if self.proximal_bias:
257
+ assert t_s == t_t, "Proximal bias is only available for self-attention."
258
+ scores = scores + self._attention_bias_proximal(t_s).to(device=scores.device, dtype=scores.dtype)
259
+ if mask is not None:
260
+ scores = scores.masked_fill(mask == 0, -1e4)
261
+ if self.block_length is not None:
262
+ assert t_s == t_t, "Local attention is only available for self-attention."
263
+ block_mask = torch.ones_like(scores).triu(-self.block_length).tril(self.block_length)
264
+ scores = scores.masked_fill(block_mask == 0, -1e4)
265
+ p_attn = F.softmax(scores, dim=-1) # [b, n_h, t_t, t_s]
266
+ p_attn = self.drop(p_attn)
267
+ output = torch.matmul(p_attn, value)
268
+ if self.window_size is not None:
269
+ relative_weights = self._absolute_position_to_relative_position(p_attn)
270
+ value_relative_embeddings = self._get_relative_embeddings(self.emb_rel_v, t_s)
271
+ output = output + self._matmul_with_relative_values(relative_weights, value_relative_embeddings)
272
+ output = output.transpose(2, 3).contiguous().view(b, d, t_t) # [b, n_h, t_t, d_k] -> [b, d, t_t]
273
+ return output, p_attn
274
+
275
+ def _matmul_with_relative_values(self, x, y):
276
+ """
277
+ x: [b, h, l, m]
278
+ y: [h or 1, m, d]
279
+ ret: [b, h, l, d]
280
+ """
281
+ ret = torch.matmul(x, y.unsqueeze(0))
282
+ return ret
283
+
284
+ def _matmul_with_relative_keys(self, x, y):
285
+ """
286
+ x: [b, h, l, d]
287
+ y: [h or 1, m, d]
288
+ ret: [b, h, l, m]
289
+ """
290
+ ret = torch.matmul(x, y.unsqueeze(0).transpose(-2, -1))
291
+ return ret
292
+
293
+ def _get_relative_embeddings(self, relative_embeddings, length):
294
+ max_relative_position = 2 * self.window_size + 1
295
+ # Pad first before slice to avoid using cond ops.
296
+ pad_length = max(length - (self.window_size + 1), 0)
297
+ slice_start_position = max((self.window_size + 1) - length, 0)
298
+ slice_end_position = slice_start_position + 2 * length - 1
299
+ if pad_length > 0:
300
+ padded_relative_embeddings = F.pad(
301
+ relative_embeddings,
302
+ commons.convert_pad_shape([[0, 0], [pad_length, pad_length], [0, 0]]))
303
+ else:
304
+ padded_relative_embeddings = relative_embeddings
305
+ used_relative_embeddings = padded_relative_embeddings[:,slice_start_position:slice_end_position]
306
+ return used_relative_embeddings
307
+
308
+ def _relative_position_to_absolute_position(self, x):
309
+ """
310
+ x: [b, h, l, 2*l-1]
311
+ ret: [b, h, l, l]
312
+ """
313
+ batch, heads, length, _ = x.size()
314
+ # Concat columns of pad to shift from relative to absolute indexing.
315
+ x = F.pad(x, commons.convert_pad_shape([[0,0],[0,0],[0,0],[0,1]]))
316
+
317
+ # Concat extra elements so to add up to shape (len+1, 2*len-1).
318
+ x_flat = x.view([batch, heads, length * 2 * length])
319
+ x_flat = F.pad(x_flat, commons.convert_pad_shape([[0,0],[0,0],[0,length-1]]))
320
+
321
+ # Reshape and slice out the padded elements.
322
+ x_final = x_flat.view([batch, heads, length+1, 2*length-1])[:, :, :length, length-1:]
323
+ return x_final
324
+
325
+ def _absolute_position_to_relative_position(self, x):
326
+ """
327
+ x: [b, h, l, l]
328
+ ret: [b, h, l, 2*l-1]
329
+ """
330
+ batch, heads, length, _ = x.size()
331
+ # padd along column
332
+ x = F.pad(x, commons.convert_pad_shape([[0, 0], [0, 0], [0, 0], [0, length-1]]))
333
+ x_flat = x.view([batch, heads, length**2 + length*(length -1)])
334
+ # add 0's in the beginning that will skew the elements after reshape
335
+ x_flat = F.pad(x_flat, commons.convert_pad_shape([[0, 0], [0, 0], [length, 0]]))
336
+ x_final = x_flat.view([batch, heads, length, 2*length])[:,:,:,1:]
337
+ return x_final
338
+
339
+ def _attention_bias_proximal(self, length):
340
+ """Bias for self-attention to encourage attention to close positions.
341
+ Args:
342
+ length: an integer scalar.
343
+ Returns:
344
+ a Tensor with shape [1, 1, length, length]
345
+ """
346
+ r = torch.arange(length, dtype=torch.float32)
347
+ diff = torch.unsqueeze(r, 0) - torch.unsqueeze(r, 1)
348
+ return torch.unsqueeze(torch.unsqueeze(-torch.log1p(torch.abs(diff)), 0), 0)
349
+
350
+
351
+ class FFN(nn.Module):
352
+ def __init__(self, in_channels, out_channels, filter_channels, kernel_size, p_dropout=0., activation=None, causal=False):
353
+ super().__init__()
354
+ self.in_channels = in_channels
355
+ self.out_channels = out_channels
356
+ self.filter_channels = filter_channels
357
+ self.kernel_size = kernel_size
358
+ self.p_dropout = p_dropout
359
+ self.activation = activation
360
+ self.causal = causal
361
+
362
+ if causal:
363
+ self.padding = self._causal_padding
364
+ else:
365
+ self.padding = self._same_padding
366
+
367
+ self.conv_1 = nn.Conv1d(in_channels, filter_channels, kernel_size)
368
+ self.conv_2 = nn.Conv1d(filter_channels, out_channels, kernel_size)
369
+ self.drop = nn.Dropout(p_dropout)
370
+
371
+ def forward(self, x, x_mask):
372
+ x = self.conv_1(self.padding(x * x_mask))
373
+ if self.activation == "gelu":
374
+ x = x * torch.sigmoid(1.702 * x)
375
+ else:
376
+ x = torch.relu(x)
377
+ x = self.drop(x)
378
+ x = self.conv_2(self.padding(x * x_mask))
379
+ return x * x_mask
380
+
381
+ def _causal_padding(self, x):
382
+ if self.kernel_size == 1:
383
+ return x
384
+ pad_l = self.kernel_size - 1
385
+ pad_r = 0
386
+ padding = [[0, 0], [0, 0], [pad_l, pad_r]]
387
+ x = F.pad(x, commons.convert_pad_shape(padding))
388
+ return x
389
+
390
+ def _same_padding(self, x):
391
+ if self.kernel_size == 1:
392
+ return x
393
+ pad_l = (self.kernel_size - 1) // 2
394
+ pad_r = self.kernel_size // 2
395
+ padding = [[0, 0], [0, 0], [pad_l, pad_r]]
396
+ x = F.pad(x, commons.convert_pad_shape(padding))
397
+ return x
modules/commons.py ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ import numpy as np
3
+ import torch
4
+ from torch import nn
5
+ from torch.nn import functional as F
6
+
7
+
8
+ def init_weights(m, mean=0.0, std=0.01):
9
+ classname = m.__class__.__name__
10
+ if classname.find("Conv") != -1:
11
+ m.weight.data.normal_(mean, std)
12
+
13
+
14
+ def get_padding(kernel_size, dilation=1):
15
+ return int((kernel_size*dilation - dilation)/2)
16
+
17
+
18
+ def convert_pad_shape(pad_shape):
19
+ l = pad_shape[::-1]
20
+ pad_shape = [item for sublist in l for item in sublist]
21
+ return pad_shape
22
+
23
+
24
+ def intersperse(lst, item):
25
+ result = [item] * (len(lst) * 2 + 1)
26
+ result[1::2] = lst
27
+ return result
28
+
29
+
30
+ def kl_divergence(m_p, logs_p, m_q, logs_q):
31
+ """KL(P||Q)"""
32
+ kl = (logs_q - logs_p) - 0.5
33
+ kl += 0.5 * (torch.exp(2. * logs_p) + ((m_p - m_q)**2)) * torch.exp(-2. * logs_q)
34
+ return kl
35
+
36
+
37
+ def rand_gumbel(shape):
38
+ """Sample from the Gumbel distribution, protect from overflows."""
39
+ uniform_samples = torch.rand(shape) * 0.99998 + 0.00001
40
+ return -torch.log(-torch.log(uniform_samples))
41
+
42
+
43
+ def rand_gumbel_like(x):
44
+ g = rand_gumbel(x.size()).to(dtype=x.dtype, device=x.device)
45
+ return g
46
+
47
+
48
+ def slice_segments(x, ids_str, segment_size=4):
49
+ ret = torch.zeros_like(x[:, :, :segment_size])
50
+ # print("ret shape: ",ret.shape, ids_str)
51
+ for i in range(x.size(0)):
52
+ idx_str = ids_str[i]
53
+ idx_end = idx_str + segment_size
54
+ ret[i] = x[i, :, idx_str:idx_end]
55
+ return ret
56
+
57
+
58
+ def rand_slice_segments(x, x_lengths=None, segment_size=4):
59
+ b, d, t = x.size()
60
+ if x_lengths is None:
61
+ x_lengths = t
62
+ ids_str_max = x_lengths - segment_size - 1
63
+ ids_str = (torch.rand([b]).to(device=x.device) * ids_str_max).to(dtype=torch.long)
64
+ ret = slice_segments(x, ids_str, segment_size)
65
+ return ret, ids_str
66
+
67
+
68
+ def get_timing_signal_1d(
69
+ length, channels, min_timescale=1.0, max_timescale=1.0e4):
70
+ position = torch.arange(length, dtype=torch.float)
71
+ num_timescales = channels // 2
72
+ log_timescale_increment = (
73
+ math.log(float(max_timescale) / float(min_timescale)) /
74
+ (num_timescales - 1))
75
+ inv_timescales = min_timescale * torch.exp(
76
+ torch.arange(num_timescales, dtype=torch.float) * -log_timescale_increment)
77
+ scaled_time = position.unsqueeze(0) * inv_timescales.unsqueeze(1)
78
+ signal = torch.cat([torch.sin(scaled_time), torch.cos(scaled_time)], 0)
79
+ signal = F.pad(signal, [0, 0, 0, channels % 2])
80
+ signal = signal.view(1, channels, length)
81
+ return signal
82
+
83
+
84
+ def add_timing_signal_1d(x, min_timescale=1.0, max_timescale=1.0e4):
85
+ b, channels, length = x.size()
86
+ signal = get_timing_signal_1d(length, channels, min_timescale, max_timescale)
87
+ return x + signal.to(dtype=x.dtype, device=x.device)
88
+
89
+
90
+ def cat_timing_signal_1d(x, min_timescale=1.0, max_timescale=1.0e4, axis=1):
91
+ b, channels, length = x.size()
92
+ signal = get_timing_signal_1d(length, channels, min_timescale, max_timescale)
93
+ return torch.cat([x, signal.to(dtype=x.dtype, device=x.device)], axis)
94
+
95
+
96
+ def subsequent_mask(length):
97
+ mask = torch.tril(torch.ones(length, length)).unsqueeze(0).unsqueeze(0)
98
+ return mask
99
+
100
+
101
+ @torch.jit.script
102
+ def fused_add_tanh_sigmoid_multiply(input_a, input_b, n_channels):
103
+ n_channels_int = n_channels[0]
104
+ in_act = input_a + input_b
105
+ t_act = torch.tanh(in_act[:, :n_channels_int, :])
106
+ s_act = torch.sigmoid(in_act[:, n_channels_int:, :])
107
+ acts = t_act * s_act
108
+ return acts
109
+
110
+
111
+ def convert_pad_shape(pad_shape):
112
+ l = pad_shape[::-1]
113
+ pad_shape = [item for sublist in l for item in sublist]
114
+ return pad_shape
115
+
116
+
117
+ def shift_1d(x):
118
+ x = F.pad(x, convert_pad_shape([[0, 0], [0, 0], [1, 0]]))[:, :, :-1]
119
+ return x
120
+
121
+
122
+ def sequence_mask(length, max_length=None):
123
+ if max_length is None:
124
+ max_length = length.max()
125
+ x = torch.arange(max_length, dtype=length.dtype, device=length.device)
126
+ return x.unsqueeze(0) < length.unsqueeze(1)
127
+
128
+
129
+ def generate_path(duration, mask):
130
+ """
131
+ duration: [b, 1, t_x]
132
+ mask: [b, 1, t_y, t_x]
133
+ """
134
+ device = duration.device
135
+
136
+ b, _, t_y, t_x = mask.shape
137
+ cum_duration = torch.cumsum(duration, -1)
138
+
139
+ cum_duration_flat = cum_duration.view(b * t_x)
140
+ path = sequence_mask(cum_duration_flat, t_y).to(mask.dtype)
141
+ path = path.view(b, t_x, t_y)
142
+ path = path - F.pad(path, convert_pad_shape([[0, 0], [1, 0], [0, 0]]))[:, :-1]
143
+ path = path.unsqueeze(1).transpose(2,3) * mask
144
+ return path
145
+
146
+
147
+ def clip_grad_value_(parameters, clip_value, norm_type=2):
148
+ if isinstance(parameters, torch.Tensor):
149
+ parameters = [parameters]
150
+ parameters = list(filter(lambda p: p.grad is not None, parameters))
151
+ norm_type = float(norm_type)
152
+ if clip_value is not None:
153
+ clip_value = float(clip_value)
154
+
155
+ total_norm = 0
156
+ for p in parameters:
157
+ param_norm = p.grad.data.norm(norm_type)
158
+ total_norm += param_norm.item() ** norm_type
159
+ if clip_value is not None:
160
+ p.grad.data.clamp_(min=-clip_value, max=clip_value)
161
+ total_norm = total_norm ** (1. / norm_type)
162
+ return total_norm
modules/ddsp.py ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ from torch.nn import functional as F
4
+ import torch.fft as fft
5
+ import numpy as np
6
+ import librosa as li
7
+ import math
8
+ from scipy.signal import get_window
9
+
10
+ def safe_log(x):
11
+ return torch.log(x + 1e-7)
12
+
13
+
14
+ @torch.no_grad()
15
+ def mean_std_loudness(dataset):
16
+ mean = 0
17
+ std = 0
18
+ n = 0
19
+ for _, _, l in dataset:
20
+ n += 1
21
+ mean += (l.mean().item() - mean) / n
22
+ std += (l.std().item() - std) / n
23
+ return mean, std
24
+
25
+
26
+ def multiscale_fft(signal, scales, overlap):
27
+ stfts = []
28
+ for s in scales:
29
+ S = torch.stft(
30
+ signal,
31
+ s,
32
+ int(s * (1 - overlap)),
33
+ s,
34
+ torch.hann_window(s).to(signal),
35
+ True,
36
+ normalized=True,
37
+ return_complex=True,
38
+ ).abs()
39
+ stfts.append(S)
40
+ return stfts
41
+
42
+
43
+ def resample(x, factor: int):
44
+ batch, frame, channel = x.shape
45
+ x = x.permute(0, 2, 1).reshape(batch * channel, 1, frame)
46
+
47
+ window = torch.hann_window(
48
+ factor * 2,
49
+ dtype=x.dtype,
50
+ device=x.device,
51
+ ).reshape(1, 1, -1)
52
+ y = torch.zeros(x.shape[0], x.shape[1], factor * x.shape[2]).to(x)
53
+ y[..., ::factor] = x
54
+ y[..., -1:] = x[..., -1:]
55
+ y = torch.nn.functional.pad(y, [factor, factor])
56
+ y = torch.nn.functional.conv1d(y, window)[..., :-1]
57
+
58
+ y = y.reshape(batch, channel, factor * frame).permute(0, 2, 1)
59
+
60
+ return y
61
+
62
+
63
+ def upsample(signal, factor):
64
+ signal = signal.permute(0, 2, 1)
65
+ signal = nn.functional.interpolate(signal, size=signal.shape[-1] * factor)
66
+ return signal.permute(0, 2, 1)
67
+
68
+
69
+ def remove_above_nyquist(amplitudes, pitch, sampling_rate):
70
+ n_harm = amplitudes.shape[-1]
71
+ pitches = pitch * torch.arange(1, n_harm + 1).to(pitch)
72
+ aa = (pitches < sampling_rate / 2).float() + 1e-4
73
+ return amplitudes * aa
74
+
75
+
76
+ def scale_function(x):
77
+ return 2 * torch.sigmoid(x)**(math.log(10)) + 1e-7
78
+
79
+
80
+ def extract_loudness(signal, sampling_rate, block_size, n_fft=2048):
81
+ S = li.stft(
82
+ signal,
83
+ n_fft=n_fft,
84
+ hop_length=block_size,
85
+ win_length=n_fft,
86
+ center=True,
87
+ )
88
+ S = np.log(abs(S) + 1e-7)
89
+ f = li.fft_frequencies(sampling_rate, n_fft)
90
+ a_weight = li.A_weighting(f)
91
+
92
+ S = S + a_weight.reshape(-1, 1)
93
+
94
+ S = np.mean(S, 0)[..., :-1]
95
+
96
+ return S
97
+
98
+
99
+ def extract_pitch(signal, sampling_rate, block_size):
100
+ length = signal.shape[-1] // block_size
101
+ f0 = crepe.predict(
102
+ signal,
103
+ sampling_rate,
104
+ step_size=int(1000 * block_size / sampling_rate),
105
+ verbose=1,
106
+ center=True,
107
+ viterbi=True,
108
+ )
109
+ f0 = f0[1].reshape(-1)[:-1]
110
+
111
+ if f0.shape[-1] != length:
112
+ f0 = np.interp(
113
+ np.linspace(0, 1, length, endpoint=False),
114
+ np.linspace(0, 1, f0.shape[-1], endpoint=False),
115
+ f0,
116
+ )
117
+
118
+ return f0
119
+
120
+
121
+ def mlp(in_size, hidden_size, n_layers):
122
+ channels = [in_size] + (n_layers) * [hidden_size]
123
+ net = []
124
+ for i in range(n_layers):
125
+ net.append(nn.Linear(channels[i], channels[i + 1]))
126
+ net.append(nn.LayerNorm(channels[i + 1]))
127
+ net.append(nn.LeakyReLU())
128
+ return nn.Sequential(*net)
129
+
130
+
131
+ def gru(n_input, hidden_size):
132
+ return nn.GRU(n_input * hidden_size, hidden_size, batch_first=True)
133
+
134
+
135
+ def harmonic_synth(pitch, amplitudes, sampling_rate):
136
+ n_harmonic = amplitudes.shape[-1]
137
+ omega = torch.cumsum(2 * math.pi * pitch / sampling_rate, 1)
138
+ omegas = omega * torch.arange(1, n_harmonic + 1).to(omega)
139
+ signal = (torch.sin(omegas) * amplitudes).sum(-1, keepdim=True)
140
+ return signal
141
+
142
+
143
+ def amp_to_impulse_response(amp, target_size):
144
+ amp = torch.stack([amp, torch.zeros_like(amp)], -1)
145
+ amp = torch.view_as_complex(amp)
146
+ amp = fft.irfft(amp)
147
+
148
+ filter_size = amp.shape[-1]
149
+
150
+ amp = torch.roll(amp, filter_size // 2, -1)
151
+ win = torch.hann_window(filter_size, dtype=amp.dtype, device=amp.device)
152
+
153
+ amp = amp * win
154
+
155
+ amp = nn.functional.pad(amp, (0, int(target_size) - int(filter_size)))
156
+ amp = torch.roll(amp, -filter_size // 2, -1)
157
+
158
+ return amp
159
+
160
+
161
+ def fft_convolve(signal, kernel):
162
+ signal = nn.functional.pad(signal, (0, signal.shape[-1]))
163
+ kernel = nn.functional.pad(kernel, (kernel.shape[-1], 0))
164
+
165
+ output = fft.irfft(fft.rfft(signal) * fft.rfft(kernel))
166
+ output = output[..., output.shape[-1] // 2:]
167
+
168
+ return output
169
+
170
+
171
+ def init_kernels(win_len, win_inc, fft_len, win_type=None, invers=False):
172
+ if win_type == 'None' or win_type is None:
173
+ window = np.ones(win_len)
174
+ else:
175
+ window = get_window(win_type, win_len, fftbins=True)#**0.5
176
+
177
+ N = fft_len
178
+ fourier_basis = np.fft.rfft(np.eye(N))[:win_len]
179
+ real_kernel = np.real(fourier_basis)
180
+ imag_kernel = np.imag(fourier_basis)
181
+ kernel = np.concatenate([real_kernel, imag_kernel], 1).T
182
+
183
+ if invers :
184
+ kernel = np.linalg.pinv(kernel).T
185
+
186
+ kernel = kernel*window
187
+ kernel = kernel[:, None, :]
188
+ return torch.from_numpy(kernel.astype(np.float32)), torch.from_numpy(window[None,:,None].astype(np.float32))
189
+
modules/losses.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch.nn import functional as F
3
+
4
+ import modules.commons
5
+ import math
6
+
7
+ def feature_loss(fmap_r, fmap_g):
8
+ loss = 0
9
+ for dr, dg in zip(fmap_r, fmap_g):
10
+ for rl, gl in zip(dr, dg):
11
+ rl = rl.float().detach()
12
+ gl = gl.float()
13
+ loss += torch.mean(torch.abs(rl - gl))
14
+
15
+ return loss * 2
16
+
17
+
18
+ def discriminator_loss(disc_real_outputs, disc_generated_outputs):
19
+ loss = 0
20
+ r_losses = []
21
+ g_losses = []
22
+ for dr, dg in zip(disc_real_outputs, disc_generated_outputs):
23
+ dr = dr.float()
24
+ dg = dg.float()
25
+ r_loss = torch.mean((1-dr)**2)
26
+ g_loss = torch.mean(dg**2)
27
+ loss += (r_loss + g_loss)
28
+ r_losses.append(r_loss.item())
29
+ g_losses.append(g_loss.item())
30
+
31
+ return loss, r_losses, g_losses
32
+
33
+
34
+ def generator_loss(disc_outputs):
35
+ loss = 0
36
+ gen_losses = []
37
+ for dg in disc_outputs:
38
+ dg = dg.float()
39
+ l = torch.mean((1-dg)**2)
40
+ gen_losses.append(l)
41
+ loss += l
42
+
43
+ return loss, gen_losses
44
+
45
+
46
+ def kl_loss(z_p, logs_q, m_p, logs_p, z_mask):
47
+ """
48
+ z_p, logs_q: [b, h, t_t]
49
+ m_p, logs_p: [b, h, t_t]
50
+ """
51
+ z_p = z_p.float()
52
+ logs_q = logs_q.float()
53
+ m_p = m_p.float()
54
+ logs_p = logs_p.float()
55
+ z_mask = z_mask.float()
56
+
57
+ kl = logs_p - logs_q - 0.5
58
+ kl += 0.5 * ((z_p - m_p)**2) * torch.exp(-2. * logs_p)
59
+ kl = torch.sum(kl * z_mask)
60
+ l = kl / torch.sum(z_mask)
61
+ return l
62
+
modules/modules.py ADDED
@@ -0,0 +1,450 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import copy
2
+ import math
3
+ import numpy as np
4
+ import scipy
5
+ import torch
6
+ from torch import nn
7
+ from torch.nn import functional as F
8
+ from torch.autograd import Function
9
+ from typing import Any, Optional, Tuple
10
+
11
+ from torch.nn import Conv1d, ConvTranspose1d, AvgPool1d, Conv2d
12
+ from torch.nn.utils import weight_norm, remove_weight_norm
13
+
14
+ import modules.commons as commons
15
+ import modules.attentions as attentions
16
+ from modules.commons import init_weights, get_padding
17
+ from modules.transforms import piecewise_rational_quadratic_transform
18
+
19
+
20
+ LRELU_SLOPE = 0.1
21
+
22
+
23
+ class LayerNorm(nn.Module):
24
+ def __init__(self, channels, eps=1e-5):
25
+ super().__init__()
26
+ self.channels = channels
27
+ self.eps = eps
28
+
29
+ self.gamma = nn.Parameter(torch.ones(channels))
30
+ self.beta = nn.Parameter(torch.zeros(channels))
31
+
32
+ def forward(self, x):
33
+ x = x.transpose(1, -1)
34
+ x = F.layer_norm(x, (self.channels,), self.gamma, self.beta, self.eps)
35
+ return x.transpose(1, -1)
36
+
37
+
38
+ class ConvReluNorm(nn.Module):
39
+ def __init__(self, in_channels, hidden_channels, out_channels, kernel_size, n_layers, p_dropout):
40
+ super().__init__()
41
+ self.in_channels = in_channels
42
+ self.hidden_channels = hidden_channels
43
+ self.out_channels = out_channels
44
+ self.kernel_size = kernel_size
45
+ self.n_layers = n_layers
46
+ self.p_dropout = p_dropout
47
+ assert n_layers > 1, "Number of layers should be larger than 0."
48
+
49
+ self.conv_layers = nn.ModuleList()
50
+ self.norm_layers = nn.ModuleList()
51
+ self.conv_layers.append(nn.Conv1d(in_channels, hidden_channels, kernel_size, padding=kernel_size//2))
52
+ self.norm_layers.append(LayerNorm(hidden_channels))
53
+ self.relu_drop = nn.Sequential(
54
+ nn.ReLU(),
55
+ nn.Dropout(p_dropout))
56
+ for _ in range(n_layers-1):
57
+ self.conv_layers.append(nn.Conv1d(hidden_channels, hidden_channels, kernel_size, padding=kernel_size//2))
58
+ self.norm_layers.append(LayerNorm(hidden_channels))
59
+ self.proj = nn.Conv1d(hidden_channels, out_channels, 1)
60
+ self.proj.weight.data.zero_()
61
+ self.proj.bias.data.zero_()
62
+
63
+ def forward(self, x, x_mask):
64
+ x_org = x
65
+ for i in range(self.n_layers):
66
+ x = self.conv_layers[i](x * x_mask)
67
+ x = self.norm_layers[i](x)
68
+ x = self.relu_drop(x)
69
+ x = x_org + self.proj(x)
70
+ return x * x_mask
71
+
72
+
73
+ class DDSConv(nn.Module):
74
+ """
75
+ Dialted and Depth-Separable Convolution
76
+ """
77
+ def __init__(self, channels, kernel_size, n_layers, p_dropout=0.):
78
+ super().__init__()
79
+ self.channels = channels
80
+ self.kernel_size = kernel_size
81
+ self.n_layers = n_layers
82
+ self.p_dropout = p_dropout
83
+
84
+ self.drop = nn.Dropout(p_dropout)
85
+ self.convs_sep = nn.ModuleList()
86
+ self.convs_1x1 = nn.ModuleList()
87
+ self.norms_1 = nn.ModuleList()
88
+ self.norms_2 = nn.ModuleList()
89
+ for i in range(n_layers):
90
+ dilation = kernel_size ** i
91
+ padding = (kernel_size * dilation - dilation) // 2
92
+ self.convs_sep.append(nn.Conv1d(channels, channels, kernel_size,
93
+ groups=channels, dilation=dilation, padding=padding
94
+ ))
95
+ self.convs_1x1.append(nn.Conv1d(channels, channels, 1))
96
+ self.norms_1.append(LayerNorm(channels))
97
+ self.norms_2.append(LayerNorm(channels))
98
+
99
+ def forward(self, x, x_mask, g=None):
100
+ if g is not None:
101
+ x = x + g
102
+ for i in range(self.n_layers):
103
+ y = self.convs_sep[i](x * x_mask)
104
+ y = self.norms_1[i](y)
105
+ y = F.gelu(y)
106
+ y = self.convs_1x1[i](y)
107
+ y = self.norms_2[i](y)
108
+ y = F.gelu(y)
109
+ y = self.drop(y)
110
+ x = x + y
111
+ return x * x_mask
112
+
113
+
114
+ class WN(torch.nn.Module):
115
+ def __init__(self, hidden_channels, kernel_size, dilation_rate, n_layers, n_speakers=0, spk_channels=0, p_dropout=0):
116
+ super(WN, self).__init__()
117
+ assert(kernel_size % 2 == 1)
118
+ self.hidden_channels =hidden_channels
119
+ self.kernel_size = kernel_size,
120
+ self.dilation_rate = dilation_rate
121
+ self.n_layers = n_layers
122
+ self.n_speakers = n_speakers
123
+ self.spk_channels = spk_channels
124
+ self.p_dropout = p_dropout
125
+
126
+ self.in_layers = torch.nn.ModuleList()
127
+ self.res_skip_layers = torch.nn.ModuleList()
128
+ self.drop = nn.Dropout(p_dropout)
129
+
130
+ if n_speakers > 0:
131
+ cond_layer = torch.nn.Conv1d(spk_channels, 2*hidden_channels*n_layers, 1)
132
+ self.cond_layer = torch.nn.utils.weight_norm(cond_layer, name='weight')
133
+
134
+ for i in range(n_layers):
135
+ dilation = dilation_rate ** i
136
+ padding = int((kernel_size * dilation - dilation) / 2)
137
+ in_layer = torch.nn.Conv1d(hidden_channels, 2*hidden_channels, kernel_size,
138
+ dilation=dilation, padding=padding)
139
+ in_layer = torch.nn.utils.weight_norm(in_layer, name='weight')
140
+ self.in_layers.append(in_layer)
141
+
142
+ # last one is not necessary
143
+ if i < n_layers - 1:
144
+ res_skip_channels = 2 * hidden_channels
145
+ else:
146
+ res_skip_channels = hidden_channels
147
+
148
+ res_skip_layer = torch.nn.Conv1d(hidden_channels, res_skip_channels, 1)
149
+ res_skip_layer = torch.nn.utils.weight_norm(res_skip_layer, name='weight')
150
+ self.res_skip_layers.append(res_skip_layer)
151
+
152
+ def forward(self, x, x_mask, g=None, **kwargs):
153
+ output = torch.zeros_like(x)
154
+ n_channels_tensor = torch.IntTensor([self.hidden_channels])
155
+
156
+ if g is not None:
157
+ g = self.cond_layer(g)
158
+
159
+ for i in range(self.n_layers):
160
+ x_in = self.in_layers[i](x)
161
+ if g is not None:
162
+ cond_offset = i * 2 * self.hidden_channels
163
+ g_l = g[:,cond_offset:cond_offset+2*self.hidden_channels,:]
164
+ else:
165
+ g_l = torch.zeros_like(x_in)
166
+
167
+ acts = commons.fused_add_tanh_sigmoid_multiply(
168
+ x_in,
169
+ g_l,
170
+ n_channels_tensor)
171
+ acts = self.drop(acts)
172
+
173
+ res_skip_acts = self.res_skip_layers[i](acts)
174
+ if i < self.n_layers - 1:
175
+ res_acts = res_skip_acts[:,:self.hidden_channels,:]
176
+ x = (x + res_acts) * x_mask
177
+ output = output + res_skip_acts[:,self.hidden_channels:,:]
178
+ else:
179
+ output = output + res_skip_acts
180
+ return output * x_mask
181
+
182
+ def remove_weight_norm(self):
183
+ if self.n_speakers > 0:
184
+ torch.nn.utils.remove_weight_norm(self.cond_layer)
185
+ for l in self.in_layers:
186
+ torch.nn.utils.remove_weight_norm(l)
187
+ for l in self.res_skip_layers:
188
+ torch.nn.utils.remove_weight_norm(l)
189
+
190
+
191
+ class ResBlock1(torch.nn.Module):
192
+ def __init__(self, channels, kernel_size=3, dilation=(1, 3, 5)):
193
+ super(ResBlock1, self).__init__()
194
+ self.convs1 = nn.ModuleList([
195
+ weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[0],
196
+ padding=get_padding(kernel_size, dilation[0]))),
197
+ weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[1],
198
+ padding=get_padding(kernel_size, dilation[1]))),
199
+ weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[2],
200
+ padding=get_padding(kernel_size, dilation[2])))
201
+ ])
202
+ self.convs1.apply(init_weights)
203
+
204
+ self.convs2 = nn.ModuleList([
205
+ weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1,
206
+ padding=get_padding(kernel_size, 1))),
207
+ weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1,
208
+ padding=get_padding(kernel_size, 1))),
209
+ weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1,
210
+ padding=get_padding(kernel_size, 1)))
211
+ ])
212
+ self.convs2.apply(init_weights)
213
+
214
+ def forward(self, x, x_mask=None):
215
+ for c1, c2 in zip(self.convs1, self.convs2):
216
+ xt = F.leaky_relu(x, LRELU_SLOPE)
217
+ if x_mask is not None:
218
+ xt = xt * x_mask
219
+ xt = c1(xt)
220
+ xt = F.leaky_relu(xt, LRELU_SLOPE)
221
+ if x_mask is not None:
222
+ xt = xt * x_mask
223
+ xt = c2(xt)
224
+ x = xt + x
225
+ if x_mask is not None:
226
+ x = x * x_mask
227
+ return x
228
+
229
+ def remove_weight_norm(self):
230
+ for l in self.convs1:
231
+ remove_weight_norm(l)
232
+ for l in self.convs2:
233
+ remove_weight_norm(l)
234
+
235
+
236
+ class ResBlock2(torch.nn.Module):
237
+ def __init__(self, channels, kernel_size=3, dilation=(1, 3)):
238
+ super(ResBlock2, self).__init__()
239
+ self.convs = nn.ModuleList([
240
+ weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[0],
241
+ padding=get_padding(kernel_size, dilation[0]))),
242
+ weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[1],
243
+ padding=get_padding(kernel_size, dilation[1])))
244
+ ])
245
+ self.convs.apply(init_weights)
246
+
247
+ def forward(self, x, x_mask=None):
248
+ for c in self.convs:
249
+ xt = F.leaky_relu(x, LRELU_SLOPE)
250
+ if x_mask is not None:
251
+ xt = xt * x_mask
252
+ xt = c(xt)
253
+ x = xt + x
254
+ if x_mask is not None:
255
+ x = x * x_mask
256
+ return x
257
+
258
+ def remove_weight_norm(self):
259
+ for l in self.convs:
260
+ remove_weight_norm(l)
261
+
262
+
263
+ class Log(nn.Module):
264
+ def forward(self, x, x_mask, reverse=False, **kwargs):
265
+ if not reverse:
266
+ y = torch.log(torch.clamp_min(x, 1e-5)) * x_mask
267
+ logdet = torch.sum(-y, [1, 2])
268
+ return y, logdet
269
+ else:
270
+ x = torch.exp(x) * x_mask
271
+ return x
272
+
273
+
274
+ class Flip(nn.Module):
275
+ def forward(self, x, *args, reverse=False, **kwargs):
276
+ x = torch.flip(x, [1])
277
+ if not reverse:
278
+ logdet = torch.zeros(x.size(0)).to(dtype=x.dtype, device=x.device)
279
+ return x, logdet
280
+ else:
281
+ return x
282
+
283
+
284
+ class ElementwiseAffine(nn.Module):
285
+ def __init__(self, channels):
286
+ super().__init__()
287
+ self.channels = channels
288
+ self.m = nn.Parameter(torch.zeros(channels,1))
289
+ self.logs = nn.Parameter(torch.zeros(channels,1))
290
+
291
+ def forward(self, x, x_mask, reverse=False, **kwargs):
292
+ if not reverse:
293
+ y = self.m + torch.exp(self.logs) * x
294
+ y = y * x_mask
295
+ logdet = torch.sum(self.logs * x_mask, [1,2])
296
+ return y, logdet
297
+ else:
298
+ x = (x - self.m) * torch.exp(-self.logs) * x_mask
299
+ return x
300
+
301
+
302
+ class ResidualCouplingLayer(nn.Module):
303
+ def __init__(self,
304
+ channels,
305
+ hidden_channels,
306
+ kernel_size,
307
+ dilation_rate,
308
+ n_layers,
309
+ p_dropout=0,
310
+ n_speakers=0,
311
+ spk_channels=0,
312
+ mean_only=False):
313
+ assert channels % 2 == 0, "channels should be divisible by 2"
314
+ super().__init__()
315
+ self.channels = channels
316
+ self.hidden_channels = hidden_channels
317
+ self.kernel_size = kernel_size
318
+ self.dilation_rate = dilation_rate
319
+ self.n_layers = n_layers
320
+ self.half_channels = channels // 2
321
+ self.mean_only = mean_only
322
+
323
+ self.pre = nn.Conv1d(self.half_channels, hidden_channels, 1)
324
+ self.enc = WN(hidden_channels, kernel_size, dilation_rate, n_layers, p_dropout=p_dropout, n_speakers=n_speakers, spk_channels=spk_channels)
325
+ self.post = nn.Conv1d(hidden_channels, self.half_channels * (2 - mean_only), 1)
326
+ self.post.weight.data.zero_()
327
+ self.post.bias.data.zero_()
328
+
329
+ def forward(self, x, x_mask, g=None, reverse=False):
330
+ x0, x1 = torch.split(x, [self.half_channels]*2, 1)
331
+ h = self.pre(x0) * x_mask
332
+ h = self.enc(h, x_mask, g=g)
333
+ stats = self.post(h) * x_mask
334
+ if not self.mean_only:
335
+ m, logs = torch.split(stats, [self.half_channels]*2, 1)
336
+ else:
337
+ m = stats
338
+ logs = torch.zeros_like(m)
339
+
340
+ if not reverse:
341
+ x1 = m + x1 * torch.exp(logs) * x_mask
342
+ x = torch.cat([x0, x1], 1)
343
+ logdet = torch.sum(logs, [1,2])
344
+ return x, logdet
345
+ else:
346
+ x1 = (x1 - m) * torch.exp(-logs) * x_mask
347
+ x = torch.cat([x0, x1], 1)
348
+ return x
349
+
350
+ class ResidualCouplingBlock(nn.Module):
351
+ def __init__(self,
352
+ channels,
353
+ hidden_channels,
354
+ kernel_size,
355
+ dilation_rate,
356
+ n_layers,
357
+ n_flows=4,
358
+ n_speakers=0,
359
+ gin_channels=0):
360
+ super().__init__()
361
+ self.channels = channels
362
+ self.hidden_channels = hidden_channels
363
+ self.kernel_size = kernel_size
364
+ self.dilation_rate = dilation_rate
365
+ self.n_layers = n_layers
366
+ self.n_flows = n_flows
367
+ self.gin_channels = gin_channels
368
+
369
+ self.flows = nn.ModuleList()
370
+ for i in range(n_flows):
371
+ self.flows.append(ResidualCouplingLayer(channels, hidden_channels, kernel_size, dilation_rate, n_layers, n_speakers=n_speakers, spk_channels=gin_channels, mean_only=True))
372
+ self.flows.append(Flip())
373
+
374
+ def forward(self, x, x_mask, g=None, reverse=False):
375
+ if not reverse:
376
+ for flow in self.flows:
377
+ x, _ = flow(x, x_mask, g=g, reverse=reverse)
378
+ else:
379
+ for flow in reversed(self.flows):
380
+ x = flow(x, x_mask, g=g, reverse=reverse)
381
+ return x
382
+
383
+
384
+ class ConvFlow(nn.Module):
385
+ def __init__(self, in_channels, filter_channels, kernel_size, n_layers, num_bins=10, tail_bound=5.0):
386
+ super().__init__()
387
+ self.in_channels = in_channels
388
+ self.filter_channels = filter_channels
389
+ self.kernel_size = kernel_size
390
+ self.n_layers = n_layers
391
+ self.num_bins = num_bins
392
+ self.tail_bound = tail_bound
393
+ self.half_channels = in_channels // 2
394
+
395
+ self.pre = nn.Conv1d(self.half_channels, filter_channels, 1)
396
+ self.convs = DDSConv(filter_channels, kernel_size, n_layers, p_dropout=0.)
397
+ self.proj = nn.Conv1d(filter_channels, self.half_channels * (num_bins * 3 - 1), 1)
398
+ self.proj.weight.data.zero_()
399
+ self.proj.bias.data.zero_()
400
+
401
+ def forward(self, x, x_mask, g=None, reverse=False):
402
+ x0, x1 = torch.split(x, [self.half_channels]*2, 1)
403
+ h = self.pre(x0)
404
+ h = self.convs(h, x_mask, g=g)
405
+ h = self.proj(h) * x_mask
406
+
407
+ b, c, t = x0.shape
408
+ h = h.reshape(b, c, -1, t).permute(0, 1, 3, 2) # [b, cx?, t] -> [b, c, t, ?]
409
+
410
+ unnormalized_widths = h[..., :self.num_bins] / math.sqrt(self.filter_channels)
411
+ unnormalized_heights = h[..., self.num_bins:2*self.num_bins] / math.sqrt(self.filter_channels)
412
+ unnormalized_derivatives = h[..., 2 * self.num_bins:]
413
+
414
+ x1, logabsdet = piecewise_rational_quadratic_transform(x1,
415
+ unnormalized_widths,
416
+ unnormalized_heights,
417
+ unnormalized_derivatives,
418
+ inverse=reverse,
419
+ tails='linear',
420
+ tail_bound=self.tail_bound
421
+ )
422
+
423
+ x = torch.cat([x0, x1], 1) * x_mask
424
+ logdet = torch.sum(logabsdet * x_mask, [1,2])
425
+ if not reverse:
426
+ return x, logdet
427
+ else:
428
+ return x
429
+
430
+
431
+ class ResStack(nn.Module):
432
+ def __init__(self, channel, kernel_size=3, base=3, nums=4):
433
+ super(ResStack, self).__init__()
434
+
435
+ self.layers = nn.ModuleList([
436
+ nn.Sequential(
437
+ nn.LeakyReLU(),
438
+ nn.utils.weight_norm(nn.Conv1d(channel, channel,
439
+ kernel_size=kernel_size, dilation=base**i, padding=base**i)),
440
+ nn.LeakyReLU(),
441
+ nn.utils.weight_norm(nn.Conv1d(channel, channel,
442
+ kernel_size=kernel_size, dilation=1, padding=1)),
443
+ )
444
+ for i in range(nums)
445
+ ])
446
+
447
+ def forward(self, x):
448
+ for layer in self.layers:
449
+ x = x + layer(x)
450
+ return x
modules/stft.py ADDED
@@ -0,0 +1,512 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from librosa.util import pad_center, tiny
2
+ from scipy.signal import get_window
3
+ from torch import Tensor
4
+ from torch.autograd import Variable
5
+ from typing import Optional, Tuple
6
+
7
+ import librosa
8
+ import librosa.util as librosa_util
9
+ import math
10
+ import numpy as np
11
+ import scipy
12
+ import torch
13
+ import torch.nn.functional as F
14
+ import warnings
15
+
16
+
17
+ def create_fb_matrix(
18
+ n_freqs: int,
19
+ f_min: float,
20
+ f_max: float,
21
+ n_mels: int,
22
+ sample_rate: int,
23
+ norm: Optional[str] = None
24
+ ) -> Tensor:
25
+ r"""Create a frequency bin conversion matrix.
26
+
27
+ Args:
28
+ n_freqs (int): Number of frequencies to highlight/apply
29
+ f_min (float): Minimum frequency (Hz)
30
+ f_max (float): Maximum frequency (Hz)
31
+ n_mels (int): Number of mel filterbanks
32
+ sample_rate (int): Sample rate of the audio waveform
33
+ norm (Optional[str]): If 'slaney', divide the triangular mel weights by the width of the mel band
34
+ (area normalization). (Default: ``None``)
35
+
36
+ Returns:
37
+ Tensor: Triangular filter banks (fb matrix) of size (``n_freqs``, ``n_mels``)
38
+ meaning number of frequencies to highlight/apply to x the number of filterbanks.
39
+ Each column is a filterbank so that assuming there is a matrix A of
40
+ size (..., ``n_freqs``), the applied result would be
41
+ ``A * create_fb_matrix(A.size(-1), ...)``.
42
+ """
43
+
44
+ if norm is not None and norm != "slaney":
45
+ raise ValueError("norm must be one of None or 'slaney'")
46
+
47
+ # freq bins
48
+ # Equivalent filterbank construction by Librosa
49
+ all_freqs = torch.linspace(0, sample_rate // 2, n_freqs)
50
+
51
+ # calculate mel freq bins
52
+ # hertz to mel(f) is 2595. * math.log10(1. + (f / 700.))
53
+ m_min = 2595.0 * math.log10(1.0 + (f_min / 700.0))
54
+ m_max = 2595.0 * math.log10(1.0 + (f_max / 700.0))
55
+ m_pts = torch.linspace(m_min, m_max, n_mels + 2)
56
+ # mel to hertz(mel) is 700. * (10**(mel / 2595.) - 1.)
57
+ f_pts = 700.0 * (10 ** (m_pts / 2595.0) - 1.0)
58
+ # calculate the difference between each mel point and each stft freq point in hertz
59
+ f_diff = f_pts[1:] - f_pts[:-1] # (n_mels + 1)
60
+ slopes = f_pts.unsqueeze(0) - all_freqs.unsqueeze(1) # (n_freqs, n_mels + 2)
61
+ # create overlapping triangles
62
+ down_slopes = (-1.0 * slopes[:, :-2]) / f_diff[:-1] # (n_freqs, n_mels)
63
+ up_slopes = slopes[:, 2:] / f_diff[1:] # (n_freqs, n_mels)
64
+ fb = torch.min(down_slopes, up_slopes)
65
+ fb = torch.clamp(fb, 1e-6, 1)
66
+
67
+ if norm is not None and norm == "slaney":
68
+ # Slaney-style mel is scaled to be approx constant energy per channel
69
+ enorm = 2.0 / (f_pts[2:n_mels + 2] - f_pts[:n_mels])
70
+ fb *= enorm.unsqueeze(0)
71
+ return fb
72
+
73
+
74
+ def lfilter(
75
+ waveform: Tensor,
76
+ a_coeffs: Tensor,
77
+ b_coeffs: Tensor,
78
+ clamp: bool = True,
79
+ ) -> Tensor:
80
+ r"""Perform an IIR filter by evaluating difference equation.
81
+
82
+ Args:
83
+ waveform (Tensor): audio waveform of dimension of ``(..., time)``. Must be normalized to -1 to 1.
84
+ a_coeffs (Tensor): denominator coefficients of difference equation of dimension of ``(n_order + 1)``.
85
+ Lower delays coefficients are first, e.g. ``[a0, a1, a2, ...]``.
86
+ Must be same size as b_coeffs (pad with 0's as necessary).
87
+ b_coeffs (Tensor): numerator coefficients of difference equation of dimension of ``(n_order + 1)``.
88
+ Lower delays coefficients are first, e.g. ``[b0, b1, b2, ...]``.
89
+ Must be same size as a_coeffs (pad with 0's as necessary).
90
+ clamp (bool, optional): If ``True``, clamp the output signal to be in the range [-1, 1] (Default: ``True``)
91
+
92
+ Returns:
93
+ Tensor: Waveform with dimension of ``(..., time)``.
94
+ """
95
+ # pack batch
96
+ shape = waveform.size()
97
+ waveform = waveform.reshape(-1, shape[-1])
98
+
99
+ assert (a_coeffs.size(0) == b_coeffs.size(0))
100
+ assert (len(waveform.size()) == 2)
101
+ assert (waveform.device == a_coeffs.device)
102
+ assert (b_coeffs.device == a_coeffs.device)
103
+
104
+ device = waveform.device
105
+ dtype = waveform.dtype
106
+ n_channel, n_sample = waveform.size()
107
+ n_order = a_coeffs.size(0)
108
+ n_sample_padded = n_sample + n_order - 1
109
+ assert (n_order > 0)
110
+
111
+ # Pad the input and create output
112
+ padded_waveform = torch.zeros(n_channel, n_sample_padded, dtype=dtype, device=device)
113
+ padded_waveform[:, (n_order - 1):] = waveform
114
+ padded_output_waveform = torch.zeros(n_channel, n_sample_padded, dtype=dtype, device=device)
115
+
116
+ # Set up the coefficients matrix
117
+ # Flip coefficients' order
118
+ a_coeffs_flipped = a_coeffs.flip(0)
119
+ b_coeffs_flipped = b_coeffs.flip(0)
120
+
121
+ # calculate windowed_input_signal in parallel
122
+ # create indices of original with shape (n_channel, n_order, n_sample)
123
+ window_idxs = torch.arange(n_sample, device=device).unsqueeze(0) + torch.arange(n_order, device=device).unsqueeze(1)
124
+ window_idxs = window_idxs.repeat(n_channel, 1, 1)
125
+ window_idxs += (torch.arange(n_channel, device=device).unsqueeze(-1).unsqueeze(-1) * n_sample_padded)
126
+ window_idxs = window_idxs.long()
127
+ # (n_order, ) matmul (n_channel, n_order, n_sample) -> (n_channel, n_sample)
128
+ input_signal_windows = torch.matmul(b_coeffs_flipped, torch.take(padded_waveform, window_idxs))
129
+
130
+ input_signal_windows.div_(a_coeffs[0])
131
+ a_coeffs_flipped.div_(a_coeffs[0])
132
+ for i_sample, o0 in enumerate(input_signal_windows.t()):
133
+ windowed_output_signal = padded_output_waveform[:, i_sample:(i_sample + n_order)]
134
+ o0.addmv_(windowed_output_signal, a_coeffs_flipped, alpha=-1)
135
+ padded_output_waveform[:, i_sample + n_order - 1] = o0
136
+
137
+ output = padded_output_waveform[:, (n_order - 1):]
138
+
139
+ if clamp:
140
+ output = torch.clamp(output, min=-1., max=1.)
141
+
142
+ # unpack batch
143
+ output = output.reshape(shape[:-1] + output.shape[-1:])
144
+
145
+ return output
146
+
147
+
148
+
149
+ def biquad(
150
+ waveform: Tensor,
151
+ b0: float,
152
+ b1: float,
153
+ b2: float,
154
+ a0: float,
155
+ a1: float,
156
+ a2: float
157
+ ) -> Tensor:
158
+ r"""Perform a biquad filter of input tensor. Initial conditions set to 0.
159
+ https://en.wikipedia.org/wiki/Digital_biquad_filter
160
+
161
+ Args:
162
+ waveform (Tensor): audio waveform of dimension of `(..., time)`
163
+ b0 (float): numerator coefficient of current input, x[n]
164
+ b1 (float): numerator coefficient of input one time step ago x[n-1]
165
+ b2 (float): numerator coefficient of input two time steps ago x[n-2]
166
+ a0 (float): denominator coefficient of current output y[n], typically 1
167
+ a1 (float): denominator coefficient of current output y[n-1]
168
+ a2 (float): denominator coefficient of current output y[n-2]
169
+
170
+ Returns:
171
+ Tensor: Waveform with dimension of `(..., time)`
172
+ """
173
+
174
+ device = waveform.device
175
+ dtype = waveform.dtype
176
+
177
+ output_waveform = lfilter(
178
+ waveform,
179
+ torch.tensor([a0, a1, a2], dtype=dtype, device=device),
180
+ torch.tensor([b0, b1, b2], dtype=dtype, device=device)
181
+ )
182
+ return output_waveform
183
+
184
+
185
+
186
+ def _dB2Linear(x: float) -> float:
187
+ return math.exp(x * math.log(10) / 20.0)
188
+
189
+
190
+ def highpass_biquad(
191
+ waveform: Tensor,
192
+ sample_rate: int,
193
+ cutoff_freq: float,
194
+ Q: float = 0.707
195
+ ) -> Tensor:
196
+ r"""Design biquad highpass filter and perform filtering. Similar to SoX implementation.
197
+
198
+ Args:
199
+ waveform (Tensor): audio waveform of dimension of `(..., time)`
200
+ sample_rate (int): sampling rate of the waveform, e.g. 44100 (Hz)
201
+ cutoff_freq (float): filter cutoff frequency
202
+ Q (float, optional): https://en.wikipedia.org/wiki/Q_factor (Default: ``0.707``)
203
+
204
+ Returns:
205
+ Tensor: Waveform dimension of `(..., time)`
206
+ """
207
+ w0 = 2 * math.pi * cutoff_freq / sample_rate
208
+ alpha = math.sin(w0) / 2. / Q
209
+
210
+ b0 = (1 + math.cos(w0)) / 2
211
+ b1 = -1 - math.cos(w0)
212
+ b2 = b0
213
+ a0 = 1 + alpha
214
+ a1 = -2 * math.cos(w0)
215
+ a2 = 1 - alpha
216
+ return biquad(waveform, b0, b1, b2, a0, a1, a2)
217
+
218
+
219
+
220
+ def lowpass_biquad(
221
+ waveform: Tensor,
222
+ sample_rate: int,
223
+ cutoff_freq: float,
224
+ Q: float = 0.707
225
+ ) -> Tensor:
226
+ r"""Design biquad lowpass filter and perform filtering. Similar to SoX implementation.
227
+
228
+ Args:
229
+ waveform (torch.Tensor): audio waveform of dimension of `(..., time)`
230
+ sample_rate (int): sampling rate of the waveform, e.g. 44100 (Hz)
231
+ cutoff_freq (float): filter cutoff frequency
232
+ Q (float, optional): https://en.wikipedia.org/wiki/Q_factor (Default: ``0.707``)
233
+
234
+ Returns:
235
+ Tensor: Waveform of dimension of `(..., time)`
236
+ """
237
+ w0 = 2 * math.pi * cutoff_freq / sample_rate
238
+ alpha = math.sin(w0) / 2 / Q
239
+
240
+ b0 = (1 - math.cos(w0)) / 2
241
+ b1 = 1 - math.cos(w0)
242
+ b2 = b0
243
+ a0 = 1 + alpha
244
+ a1 = -2 * math.cos(w0)
245
+ a2 = 1 - alpha
246
+ return biquad(waveform, b0, b1, b2, a0, a1, a2)
247
+
248
+
249
+ def window_sumsquare(window, n_frames, hop_length=200, win_length=800,
250
+ n_fft=800, dtype=np.float32, norm=None):
251
+ """
252
+ # from librosa 0.6
253
+ Compute the sum-square envelope of a window function at a given hop length.
254
+
255
+ This is used to estimate modulation effects induced by windowing
256
+ observations in short-time fourier transforms.
257
+
258
+ Parameters
259
+ ----------
260
+ window : string, tuple, number, callable, or list-like
261
+ Window specification, as in `get_window`
262
+
263
+ n_frames : int > 0
264
+ The number of analysis frames
265
+
266
+ hop_length : int > 0
267
+ The number of samples to advance between frames
268
+
269
+ win_length : [optional]
270
+ The length of the window function. By default, this matches `n_fft`.
271
+
272
+ n_fft : int > 0
273
+ The length of each analysis frame.
274
+
275
+ dtype : np.dtype
276
+ The data type of the output
277
+
278
+ Returns
279
+ -------
280
+ wss : np.ndarray, shape=`(n_fft + hop_length * (n_frames - 1))`
281
+ The sum-squared envelope of the window function
282
+ """
283
+ if win_length is None:
284
+ win_length = n_fft
285
+
286
+ n = n_fft + hop_length * (n_frames - 1)
287
+ x = np.zeros(n, dtype=dtype)
288
+
289
+ # Compute the squared window at the desired length
290
+ win_sq = get_window(window, win_length, fftbins=True)
291
+ win_sq = librosa_util.normalize(win_sq, norm=norm)**2
292
+ win_sq = librosa_util.pad_center(win_sq, n_fft)
293
+
294
+ # Fill the envelope
295
+ for i in range(n_frames):
296
+ sample = i * hop_length
297
+ x[sample:min(n, sample + n_fft)] += win_sq[:max(0, min(n_fft, n - sample))]
298
+ return x
299
+
300
+
301
+ class MelScale(torch.nn.Module):
302
+ r"""Turn a normal STFT into a mel frequency STFT, using a conversion
303
+ matrix. This uses triangular filter banks.
304
+
305
+ User can control which device the filter bank (`fb`) is (e.g. fb.to(spec_f.device)).
306
+
307
+ Args:
308
+ n_mels (int, optional): Number of mel filterbanks. (Default: ``128``)
309
+ sample_rate (int, optional): Sample rate of audio signal. (Default: ``16000``)
310
+ f_min (float, optional): Minimum frequency. (Default: ``0.``)
311
+ f_max (float or None, optional): Maximum frequency. (Default: ``sample_rate // 2``)
312
+ n_stft (int, optional): Number of bins in STFT. Calculated from first input
313
+ if None is given. See ``n_fft`` in :class:`Spectrogram`. (Default: ``None``)
314
+ """
315
+ __constants__ = ['n_mels', 'sample_rate', 'f_min', 'f_max']
316
+
317
+ def __init__(self,
318
+ n_mels: int = 128,
319
+ sample_rate: int = 24000,
320
+ f_min: float = 0.,
321
+ f_max: Optional[float] = None,
322
+ n_stft: Optional[int] = None) -> None:
323
+ super(MelScale, self).__init__()
324
+ self.n_mels = n_mels
325
+ self.sample_rate = sample_rate
326
+ self.f_max = f_max if f_max is not None else float(sample_rate // 2)
327
+ self.f_min = f_min
328
+
329
+ assert f_min <= self.f_max, 'Require f_min: %f < f_max: %f' % (f_min, self.f_max)
330
+
331
+ fb = torch.empty(0) if n_stft is None else create_fb_matrix(
332
+ n_stft, self.f_min, self.f_max, self.n_mels, self.sample_rate)
333
+ self.register_buffer('fb', fb)
334
+
335
+ def forward(self, specgram: Tensor) -> Tensor:
336
+ r"""
337
+ Args:
338
+ specgram (Tensor): A spectrogram STFT of dimension (..., freq, time).
339
+
340
+ Returns:
341
+ Tensor: Mel frequency spectrogram of size (..., ``n_mels``, time).
342
+ """
343
+
344
+ # pack batch
345
+ shape = specgram.size()
346
+ specgram = specgram.reshape(-1, shape[-2], shape[-1])
347
+
348
+ if self.fb.numel() == 0:
349
+ tmp_fb = create_fb_matrix(specgram.size(1), self.f_min, self.f_max, self.n_mels, self.sample_rate)
350
+ # Attributes cannot be reassigned outside __init__ so workaround
351
+ self.fb.resize_(tmp_fb.size())
352
+ self.fb.copy_(tmp_fb)
353
+
354
+ # (channel, frequency, time).transpose(...) dot (frequency, n_mels)
355
+ # -> (channel, time, n_mels).transpose(...)
356
+ mel_specgram = torch.matmul(specgram.transpose(1, 2), self.fb).transpose(1, 2)
357
+
358
+ # unpack batch
359
+ mel_specgram = mel_specgram.reshape(shape[:-2] + mel_specgram.shape[-2:])
360
+
361
+ return mel_specgram
362
+
363
+
364
+ class TorchSTFT(torch.nn.Module):
365
+ def __init__(self, fft_size, hop_size, win_size,
366
+ normalized=False, domain='linear',
367
+ mel_scale=False, ref_level_db=20, min_level_db=-100):
368
+ super().__init__()
369
+ self.fft_size = fft_size
370
+ self.hop_size = hop_size
371
+ self.win_size = win_size
372
+ self.ref_level_db = ref_level_db
373
+ self.min_level_db = min_level_db
374
+ self.window = torch.hann_window(win_size)
375
+ self.normalized = normalized
376
+ self.domain = domain
377
+ self.mel_scale = MelScale(n_mels=(fft_size // 2 + 1),
378
+ n_stft=(fft_size // 2 + 1)) if mel_scale else None
379
+
380
+ def transform(self, x):
381
+ x_stft = torch.stft(x, self.fft_size, self.hop_size, self.win_size,
382
+ self.window.type_as(x), normalized=self.normalized)
383
+ real = x_stft[..., 0]
384
+ imag = x_stft[..., 1]
385
+ mag = torch.clamp(real ** 2 + imag ** 2, min=1e-7)
386
+ mag = torch.sqrt(mag)
387
+ phase = torch.atan2(imag, real)
388
+
389
+ if self.mel_scale is not None:
390
+ mag = self.mel_scale(mag)
391
+
392
+ if self.domain == 'log':
393
+ mag = 20 * torch.log10(mag) - self.ref_level_db
394
+ mag = torch.clamp((mag - self.min_level_db) / -self.min_level_db, 0, 1)
395
+ return mag, phase
396
+ elif self.domain == 'linear':
397
+ return mag, phase
398
+ elif self.domain == 'double':
399
+ log_mag = 20 * torch.log10(mag) - self.ref_level_db
400
+ log_mag = torch.clamp((log_mag - self.min_level_db) / -self.min_level_db, 0, 1)
401
+ return torch.cat((mag, log_mag), dim=1), phase
402
+
403
+ def complex(self, x):
404
+ x_stft = torch.stft(x, self.fft_size, self.hop_size, self.win_size,
405
+ self.window.type_as(x), normalized=self.normalized)
406
+ real = x_stft[..., 0]
407
+ imag = x_stft[..., 1]
408
+ return real, imag
409
+
410
+
411
+
412
+ class STFT(torch.nn.Module):
413
+ """adapted from Prem Seetharaman's https://github.com/pseeth/pytorch-stft"""
414
+ def __init__(self, filter_length=800, hop_length=200, win_length=800,
415
+ window='hann'):
416
+ super(STFT, self).__init__()
417
+ self.filter_length = filter_length
418
+ self.hop_length = hop_length
419
+ self.win_length = win_length
420
+ self.window = window
421
+ self.forward_transform = None
422
+ scale = self.filter_length / self.hop_length
423
+ fourier_basis = np.fft.fft(np.eye(self.filter_length))
424
+
425
+ cutoff = int((self.filter_length / 2 + 1))
426
+ fourier_basis = np.vstack([np.real(fourier_basis[:cutoff, :]),
427
+ np.imag(fourier_basis[:cutoff, :])])
428
+
429
+ forward_basis = torch.FloatTensor(fourier_basis[:, None, :])
430
+ inverse_basis = torch.FloatTensor(
431
+ np.linalg.pinv(scale * fourier_basis).T[:, None, :])
432
+
433
+ if window is not None:
434
+ assert(filter_length >= win_length)
435
+ # get window and zero center pad it to filter_length
436
+ fft_window = get_window(window, win_length, fftbins=True)
437
+ fft_window = pad_center(fft_window, filter_length)
438
+ fft_window = torch.from_numpy(fft_window).float()
439
+
440
+ # window the bases
441
+ forward_basis *= fft_window
442
+ inverse_basis *= fft_window
443
+
444
+ self.register_buffer('forward_basis', forward_basis.float())
445
+ self.register_buffer('inverse_basis', inverse_basis.float())
446
+
447
+ def transform(self, input_data):
448
+ num_batches = input_data.size(0)
449
+ num_samples = input_data.size(1)
450
+
451
+ self.num_samples = num_samples
452
+
453
+ # similar to librosa, reflect-pad the input
454
+ input_data = input_data.view(num_batches, 1, num_samples)
455
+ input_data = F.pad(
456
+ input_data.unsqueeze(1),
457
+ (int(self.filter_length / 2), int(self.filter_length / 2), 0, 0),
458
+ mode='reflect')
459
+ input_data = input_data.squeeze(1)
460
+
461
+ forward_transform = F.conv1d(
462
+ input_data,
463
+ Variable(self.forward_basis, requires_grad=False),
464
+ stride=self.hop_length,
465
+ padding=0)
466
+
467
+ cutoff = int((self.filter_length / 2) + 1)
468
+ real_part = forward_transform[:, :cutoff, :]
469
+ imag_part = forward_transform[:, cutoff:, :]
470
+
471
+ magnitude = torch.sqrt(real_part**2 + imag_part**2)
472
+ phase = torch.autograd.Variable(
473
+ torch.atan2(imag_part.data, real_part.data))
474
+
475
+ return magnitude, phase
476
+
477
+ def inverse(self, magnitude, phase):
478
+ recombine_magnitude_phase = torch.cat(
479
+ [magnitude*torch.cos(phase), magnitude*torch.sin(phase)], dim=1)
480
+
481
+ inverse_transform = F.conv_transpose1d(
482
+ recombine_magnitude_phase,
483
+ Variable(self.inverse_basis, requires_grad=False),
484
+ stride=self.hop_length,
485
+ padding=0)
486
+
487
+ if self.window is not None:
488
+ window_sum = window_sumsquare(
489
+ self.window, magnitude.size(-1), hop_length=self.hop_length,
490
+ win_length=self.win_length, n_fft=self.filter_length,
491
+ dtype=np.float32)
492
+ # remove modulation effects
493
+ approx_nonzero_indices = torch.from_numpy(
494
+ np.where(window_sum > tiny(window_sum))[0])
495
+ window_sum = torch.autograd.Variable(
496
+ torch.from_numpy(window_sum), requires_grad=False)
497
+ window_sum = window_sum.cuda() if magnitude.is_cuda else window_sum
498
+ inverse_transform[:, :, approx_nonzero_indices] /= window_sum[approx_nonzero_indices]
499
+
500
+ # scale by hop ratio
501
+ inverse_transform *= float(self.filter_length) / self.hop_length
502
+
503
+ inverse_transform = inverse_transform[:, :, int(self.filter_length/2):]
504
+ inverse_transform = inverse_transform[:, :, :-int(self.filter_length/2):]
505
+
506
+ return inverse_transform
507
+
508
+ def forward(self, input_data):
509
+ self.magnitude, self.phase = self.transform(input_data)
510
+ reconstruction = self.inverse(self.magnitude, self.phase)
511
+ return reconstruction
512
+
modules/transforms.py ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch.nn import functional as F
3
+
4
+ import numpy as np
5
+
6
+
7
+ DEFAULT_MIN_BIN_WIDTH = 1e-3
8
+ DEFAULT_MIN_BIN_HEIGHT = 1e-3
9
+ DEFAULT_MIN_DERIVATIVE = 1e-3
10
+
11
+
12
+ def piecewise_rational_quadratic_transform(inputs,
13
+ unnormalized_widths,
14
+ unnormalized_heights,
15
+ unnormalized_derivatives,
16
+ inverse=False,
17
+ tails=None,
18
+ tail_bound=1.,
19
+ min_bin_width=DEFAULT_MIN_BIN_WIDTH,
20
+ min_bin_height=DEFAULT_MIN_BIN_HEIGHT,
21
+ min_derivative=DEFAULT_MIN_DERIVATIVE):
22
+
23
+ if tails is None:
24
+ spline_fn = rational_quadratic_spline
25
+ spline_kwargs = {}
26
+ else:
27
+ spline_fn = unconstrained_rational_quadratic_spline
28
+ spline_kwargs = {
29
+ 'tails': tails,
30
+ 'tail_bound': tail_bound
31
+ }
32
+
33
+ outputs, logabsdet = spline_fn(
34
+ inputs=inputs,
35
+ unnormalized_widths=unnormalized_widths,
36
+ unnormalized_heights=unnormalized_heights,
37
+ unnormalized_derivatives=unnormalized_derivatives,
38
+ inverse=inverse,
39
+ min_bin_width=min_bin_width,
40
+ min_bin_height=min_bin_height,
41
+ min_derivative=min_derivative,
42
+ **spline_kwargs
43
+ )
44
+ return outputs, logabsdet
45
+
46
+
47
+ def searchsorted(bin_locations, inputs, eps=1e-6):
48
+ bin_locations[..., -1] += eps
49
+ return torch.sum(
50
+ inputs[..., None] >= bin_locations,
51
+ dim=-1
52
+ ) - 1
53
+
54
+
55
+ def unconstrained_rational_quadratic_spline(inputs,
56
+ unnormalized_widths,
57
+ unnormalized_heights,
58
+ unnormalized_derivatives,
59
+ inverse=False,
60
+ tails='linear',
61
+ tail_bound=1.,
62
+ min_bin_width=DEFAULT_MIN_BIN_WIDTH,
63
+ min_bin_height=DEFAULT_MIN_BIN_HEIGHT,
64
+ min_derivative=DEFAULT_MIN_DERIVATIVE):
65
+ inside_interval_mask = (inputs >= -tail_bound) & (inputs <= tail_bound)
66
+ outside_interval_mask = ~inside_interval_mask
67
+
68
+ outputs = torch.zeros_like(inputs)
69
+ logabsdet = torch.zeros_like(inputs)
70
+
71
+ if tails == 'linear':
72
+ unnormalized_derivatives = F.pad(unnormalized_derivatives, pad=(1, 1))
73
+ constant = np.log(np.exp(1 - min_derivative) - 1)
74
+ unnormalized_derivatives[..., 0] = constant
75
+ unnormalized_derivatives[..., -1] = constant
76
+
77
+ outputs[outside_interval_mask] = inputs[outside_interval_mask]
78
+ logabsdet[outside_interval_mask] = 0
79
+ else:
80
+ raise RuntimeError('{} tails are not implemented.'.format(tails))
81
+
82
+ outputs[inside_interval_mask], logabsdet[inside_interval_mask] = rational_quadratic_spline(
83
+ inputs=inputs[inside_interval_mask],
84
+ unnormalized_widths=unnormalized_widths[inside_interval_mask, :],
85
+ unnormalized_heights=unnormalized_heights[inside_interval_mask, :],
86
+ unnormalized_derivatives=unnormalized_derivatives[inside_interval_mask, :],
87
+ inverse=inverse,
88
+ left=-tail_bound, right=tail_bound, bottom=-tail_bound, top=tail_bound,
89
+ min_bin_width=min_bin_width,
90
+ min_bin_height=min_bin_height,
91
+ min_derivative=min_derivative
92
+ )
93
+
94
+ return outputs, logabsdet
95
+
96
+ def rational_quadratic_spline(inputs,
97
+ unnormalized_widths,
98
+ unnormalized_heights,
99
+ unnormalized_derivatives,
100
+ inverse=False,
101
+ left=0., right=1., bottom=0., top=1.,
102
+ min_bin_width=DEFAULT_MIN_BIN_WIDTH,
103
+ min_bin_height=DEFAULT_MIN_BIN_HEIGHT,
104
+ min_derivative=DEFAULT_MIN_DERIVATIVE):
105
+ if torch.min(inputs) < left or torch.max(inputs) > right:
106
+ raise ValueError('Input to a transform is not within its domain')
107
+
108
+ num_bins = unnormalized_widths.shape[-1]
109
+
110
+ if min_bin_width * num_bins > 1.0:
111
+ raise ValueError('Minimal bin width too large for the number of bins')
112
+ if min_bin_height * num_bins > 1.0:
113
+ raise ValueError('Minimal bin height too large for the number of bins')
114
+
115
+ widths = F.softmax(unnormalized_widths, dim=-1)
116
+ widths = min_bin_width + (1 - min_bin_width * num_bins) * widths
117
+ cumwidths = torch.cumsum(widths, dim=-1)
118
+ cumwidths = F.pad(cumwidths, pad=(1, 0), mode='constant', value=0.0)
119
+ cumwidths = (right - left) * cumwidths + left
120
+ cumwidths[..., 0] = left
121
+ cumwidths[..., -1] = right
122
+ widths = cumwidths[..., 1:] - cumwidths[..., :-1]
123
+
124
+ derivatives = min_derivative + F.softplus(unnormalized_derivatives)
125
+
126
+ heights = F.softmax(unnormalized_heights, dim=-1)
127
+ heights = min_bin_height + (1 - min_bin_height * num_bins) * heights
128
+ cumheights = torch.cumsum(heights, dim=-1)
129
+ cumheights = F.pad(cumheights, pad=(1, 0), mode='constant', value=0.0)
130
+ cumheights = (top - bottom) * cumheights + bottom
131
+ cumheights[..., 0] = bottom
132
+ cumheights[..., -1] = top
133
+ heights = cumheights[..., 1:] - cumheights[..., :-1]
134
+
135
+ if inverse:
136
+ bin_idx = searchsorted(cumheights, inputs)[..., None]
137
+ else:
138
+ bin_idx = searchsorted(cumwidths, inputs)[..., None]
139
+
140
+ input_cumwidths = cumwidths.gather(-1, bin_idx)[..., 0]
141
+ input_bin_widths = widths.gather(-1, bin_idx)[..., 0]
142
+
143
+ input_cumheights = cumheights.gather(-1, bin_idx)[..., 0]
144
+ delta = heights / widths
145
+ input_delta = delta.gather(-1, bin_idx)[..., 0]
146
+
147
+ input_derivatives = derivatives.gather(-1, bin_idx)[..., 0]
148
+ input_derivatives_plus_one = derivatives[..., 1:].gather(-1, bin_idx)[..., 0]
149
+
150
+ input_heights = heights.gather(-1, bin_idx)[..., 0]
151
+
152
+ if inverse:
153
+ a = (((inputs - input_cumheights) * (input_derivatives
154
+ + input_derivatives_plus_one
155
+ - 2 * input_delta)
156
+ + input_heights * (input_delta - input_derivatives)))
157
+ b = (input_heights * input_derivatives
158
+ - (inputs - input_cumheights) * (input_derivatives
159
+ + input_derivatives_plus_one
160
+ - 2 * input_delta))
161
+ c = - input_delta * (inputs - input_cumheights)
162
+
163
+ discriminant = b.pow(2) - 4 * a * c
164
+ assert (discriminant >= 0).all()
165
+
166
+ root = (2 * c) / (-b - torch.sqrt(discriminant))
167
+ outputs = root * input_bin_widths + input_cumwidths
168
+
169
+ theta_one_minus_theta = root * (1 - root)
170
+ denominator = input_delta + ((input_derivatives + input_derivatives_plus_one - 2 * input_delta)
171
+ * theta_one_minus_theta)
172
+ derivative_numerator = input_delta.pow(2) * (input_derivatives_plus_one * root.pow(2)
173
+ + 2 * input_delta * theta_one_minus_theta
174
+ + input_derivatives * (1 - root).pow(2))
175
+ logabsdet = torch.log(derivative_numerator) - 2 * torch.log(denominator)
176
+
177
+ return outputs, -logabsdet
178
+ else:
179
+ theta = (inputs - input_cumwidths) / input_bin_widths
180
+ theta_one_minus_theta = theta * (1 - theta)
181
+
182
+ numerator = input_heights * (input_delta * theta.pow(2)
183
+ + input_derivatives * theta_one_minus_theta)
184
+ denominator = input_delta + ((input_derivatives + input_derivatives_plus_one - 2 * input_delta)
185
+ * theta_one_minus_theta)
186
+ outputs = input_cumheights + numerator / denominator
187
+
188
+ derivative_numerator = input_delta.pow(2) * (input_derivatives_plus_one * theta.pow(2)
189
+ + 2 * input_delta * theta_one_minus_theta
190
+ + input_derivatives * (1 - theta).pow(2))
191
+ logabsdet = torch.log(derivative_numerator) - 2 * torch.log(denominator)
192
+
193
+ return outputs, logabsdet
preprocess/mel_processing.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ import os
3
+ import random
4
+ import torch
5
+ from torch import nn
6
+ import torch.nn.functional as F
7
+ import torch.utils.data
8
+ import numpy as np
9
+ import librosa
10
+ import librosa.util as librosa_util
11
+ from librosa.util import normalize, pad_center, tiny
12
+ from scipy.signal import get_window
13
+ from scipy.io.wavfile import read
14
+ from librosa.filters import mel as librosa_mel_fn
15
+
16
+ MAX_WAV_VALUE = 32768.0
17
+
18
+
19
+ def dynamic_range_compression_torch(x, C=1, clip_val=1e-5):
20
+ """
21
+ PARAMS
22
+ ------
23
+ C: compression factor
24
+ """
25
+ return torch.log(torch.clamp(x, min=clip_val) * C)
26
+
27
+
28
+ def dynamic_range_decompression_torch(x, C=1):
29
+ """
30
+ PARAMS
31
+ ------
32
+ C: compression factor used to compress
33
+ """
34
+ return torch.exp(x) / C
35
+
36
+
37
+ def spectral_normalize_torch(magnitudes):
38
+ output = dynamic_range_compression_torch(magnitudes)
39
+ return output
40
+
41
+
42
+ def spectral_de_normalize_torch(magnitudes):
43
+ output = dynamic_range_decompression_torch(magnitudes)
44
+ return output
45
+
46
+
47
+ mel_basis = {}
48
+ hann_window = {}
49
+
50
+
51
+ def spectrogram_torch(y, n_fft, sampling_rate, hop_size, win_size, center=False):
52
+
53
+ global hann_window
54
+ dtype_device = str(y.dtype) + '_' + str(y.device)
55
+ wnsize_dtype_device = str(win_size) + '_' + dtype_device
56
+ if wnsize_dtype_device not in hann_window:
57
+ hann_window[wnsize_dtype_device] = torch.hann_window(win_size).to(dtype=y.dtype, device=y.device)
58
+
59
+ y = torch.nn.functional.pad(y.unsqueeze(1), (int((n_fft-hop_size)/2), int((n_fft-hop_size)/2)), mode='reflect')
60
+ y = y.squeeze(1)
61
+
62
+ spec = torch.stft(y, n_fft, hop_length=hop_size, win_length=win_size, window=hann_window[wnsize_dtype_device],
63
+ center=center, pad_mode='reflect', normalized=False, onesided=True)
64
+
65
+ spec = torch.sqrt(spec.pow(2).sum(-1) + 1e-6)
66
+ return spec
67
+
68
+
69
+ def spec_to_mel_torch(spec, n_fft, num_mels, sampling_rate, fmin, fmax):
70
+ global mel_basis
71
+ dtype_device = str(spec.dtype) + '_' + str(spec.device)
72
+ fmax_dtype_device = str(fmax) + '_' + dtype_device
73
+ if fmax_dtype_device not in mel_basis:
74
+ mel = librosa_mel_fn(sampling_rate, n_fft, num_mels, fmin, fmax)
75
+ mel_basis[fmax_dtype_device] = torch.from_numpy(mel).to(dtype=spec.dtype, device=spec.device)
76
+ spec = torch.matmul(mel_basis[fmax_dtype_device], spec)
77
+ spec = spectral_normalize_torch(spec)
78
+ return spec
79
+
80
+
81
+ def mel_spectrogram_torch(y, n_fft, num_mels, sampling_rate, hop_size, win_size, fmin, fmax, center=False):
82
+
83
+ global mel_basis, hann_window
84
+ dtype_device = str(y.dtype) + '_' + str(y.device)
85
+ fmax_dtype_device = str(fmax) + '_' + dtype_device
86
+ wnsize_dtype_device = str(win_size) + '_' + dtype_device
87
+ if fmax_dtype_device not in mel_basis:
88
+ mel = librosa_mel_fn(sampling_rate, n_fft, num_mels, fmin, fmax)
89
+ mel_basis[fmax_dtype_device] = torch.from_numpy(mel).to(dtype=y.dtype, device=y.device)
90
+ if wnsize_dtype_device not in hann_window:
91
+ hann_window[wnsize_dtype_device] = torch.hann_window(win_size).to(dtype=y.dtype, device=y.device)
92
+
93
+ y = torch.nn.functional.pad(y.unsqueeze(1), (int((n_fft-hop_size)/2), int((n_fft-hop_size)/2)), mode='reflect')
94
+ y = y.squeeze(1)
95
+
96
+ spec = torch.stft(y, n_fft, hop_length=hop_size, win_length=win_size, window=hann_window[wnsize_dtype_device],
97
+ center=center, pad_mode='reflect', normalized=False, onesided=True)
98
+
99
+ spec = torch.sqrt(spec.pow(2).sum(-1) + 1e-6)
100
+
101
+ spec = torch.matmul(mel_basis[fmax_dtype_device], spec)
102
+ spec = spectral_normalize_torch(spec)
103
+
104
+ return spec
preprocess/prepare_multispeaker.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+
4
+ for spk in os.listdir("data"):
5
+ if os.path.isdir(f"data/{spk}"):
6
+ if os.path.exists(f"data/{spk}/raw/wavs"):
7
+ shutil.move(f"data/{spk}/raw/wavs", f"data/{spk}")
8
+ shutil.move(f"data/{spk}/raw/transcriptions.txt", f"data/{spk}")
9
+ shutil.rmtree(f"data/{spk}/raw")
10
+
preprocess/preprocess.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import glob
2
+ import os
3
+ import sys
4
+ import argparse
5
+ import numpy as np
6
+ from multiprocessing import cpu_count
7
+ from concurrent.futures import ProcessPoolExecutor
8
+ from functools import partial
9
+ from utils import audio
10
+ import utils.utils as utils
11
+ from tqdm import tqdm
12
+ import pyworld as pw
13
+ from random import shuffle
14
+
15
+ import warnings
16
+ warnings.filterwarnings("ignore")
17
+
18
+ def extract_mel(wav, hparams):
19
+ mel_spectrogram = audio.melspectrogram(wav, hparams).astype(np.float32)
20
+ return mel_spectrogram.T, wav
21
+
22
+ def extract_pitch(wav, hps):
23
+ # rapt may be better
24
+ f0, _ = pw.harvest(wav.astype(np.float64),
25
+ hps.sample_rate,
26
+ frame_period=hps.hop_size / hps.sample_rate * 1000)
27
+ return f0
28
+
29
+ def process_utterance(hps, data_root, item):
30
+ out_dir = data_root
31
+
32
+ wav_path = os.path.join(data_root, "wavs",
33
+ "{}.wav".format(item))
34
+ wav = audio.load_wav(wav_path,
35
+ raw_sr=hps.data.sample_rate,
36
+ target_sr=hps.data.sample_rate,
37
+ win_size=hps.data.win_size,
38
+ hop_size=hps.data.hop_size)
39
+
40
+ mel, _ = extract_mel(wav, hps.data)
41
+ out_mel_dir = os.path.join(out_dir, "mels")
42
+ os.makedirs(out_mel_dir, exist_ok=True)
43
+ mel_path = os.path.join(out_mel_dir, item)
44
+ np.save(mel_path, mel)
45
+
46
+ pitch = extract_pitch(wav, hps.data)
47
+ out_pitch_dir = os.path.join(out_dir, "pitch")
48
+ os.makedirs(out_pitch_dir, exist_ok=True)
49
+ pitch_path = os.path.join(out_pitch_dir, item)
50
+ np.save(pitch_path, pitch)
51
+
52
+
53
+ def process(args, hps, data_dir):
54
+ print(os.path.join(data_dir, "wavs"))
55
+ if(not os.path.exists(os.path.join(data_dir, "file.list"))):
56
+ with open(os.path.join(data_dir, "file.list") , "w") as out_file:
57
+ files = os.listdir(os.path.join(data_dir, "wavs"))
58
+ files = [i for i in files if i.endswith(".wav")]
59
+ for f in files:
60
+ out_file.write(f.strip().split(".")[0] + '\n')
61
+ metadata = [
62
+ item.strip() for item in open(
63
+ os.path.join(data_dir, "file.list")).readlines()
64
+ ]
65
+ executor = ProcessPoolExecutor(max_workers=args.num_workers)
66
+ results = []
67
+ for item in metadata:
68
+ results.append(executor.submit(partial(process_utterance, hps, data_dir, item)))
69
+ return [result.result() for result in tqdm(results)]
70
+
71
+ def split_dataset(data_dir):
72
+ metadata = [
73
+ item.strip() for item in open(
74
+ os.path.join(data_dir, "file.list")).readlines()
75
+ ]
76
+ shuffle(metadata)
77
+ train_set = metadata[:-2]
78
+ test_set = metadata[-2:]
79
+ with open(os.path.join(data_dir, "train.list"), "w") as ts:
80
+ for item in train_set:
81
+ ts.write(item+"\n")
82
+ with open(os.path.join(data_dir, "test.list"), "w") as ts:
83
+ for item in test_set:
84
+ ts.write(item+"\n")
85
+
86
+ def main():
87
+ parser = argparse.ArgumentParser()
88
+ parser.add_argument('--config',
89
+ default='config.json',
90
+ help='json files for configurations.')
91
+ parser.add_argument('--num_workers', type=int, default=int(cpu_count()) // 2)
92
+
93
+ args = parser.parse_args()
94
+ hps = utils.get_hparams_from_file(args.config)
95
+ spklist = [spk for spk in os.listdir("data") if os.path.isdir(f"data/{spk}") and not os.path.exists(f"data/{spk}/test.list")]
96
+ for spk in tqdm(spklist):
97
+ print(f"preprocessing {spk}")
98
+ data_dir = f"data/{spk}"
99
+ process(args, hps, data_dir)
100
+ split_dataset(data_dir)
101
+
102
+ if __name__ == "__main__":
103
+ main()
preprocess/preprocess_multispeaker.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import glob
2
+ import json
3
+
4
+ data_root = "data"
5
+
6
+
7
+ transcriptions = glob.glob(f"{data_root}/*/transcriptions.txt")
8
+ spk2id = {}
9
+ spk_id = 0
10
+ ms_transcriptions = open(f'{data_root}/transcriptions.txt', "w")
11
+ ms_train_set = open(f'{data_root}/train.list', "w")
12
+ ms_test_set = open(f'{data_root}/test.list', "w")
13
+ for transcription in transcriptions:
14
+ spk = transcription.split("/")[-2]
15
+ spk2id[spk] = spk_id
16
+ spk_id += 1
17
+ for line in open(transcription).readlines():
18
+ ms_transcriptions.write(f"{spk}/{line}")
19
+ for line in open(transcription.replace("transcriptions.txt", "train.list")):
20
+ ms_train_set.write(f"{spk}/{line}")
21
+ for line in open(transcription.replace("transcriptions.txt", "test.list")):
22
+ ms_test_set.write(f"{spk}/{line}")
23
+
24
+ ms_transcriptions.close()
25
+ ms_train_set.close()
26
+ ms_test_set.close()
27
+ print("请手动将说话人与id的映射粘贴至config文件中")
28
+ print(json.dumps(spk2id))
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ ipython==8.8.0
2
+ librosa==0.8.1
3
+ matplotlib==3.3.2
4
+ numpy==1.19.2
5
+ pyworld==0.3.0
6
+ scipy==1.5.2
7
+ soundfile==0.11.0
8
+ torch==1.8.1
9
+ tqdm==4.50.2
text/npu/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ from text.npu import symbols
2
+ from text.npu.symbol_converter import *
text/npu/symbol_converter.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import numpy as np
3
+ from text.npu.symbols import *
4
+ import os
5
+
6
+ # Mappings from symbol to numeric ID and vice versa:
7
+ _ttsing_phone_to_id = {p: i for i, p in enumerate(ttsing_phone_set)}
8
+ _ttsing_pitch_to_id = {p: i for i, p in enumerate(ttsing_pitch_set)}
9
+ _ttsing_slur_to_id = {s: i for i, s in enumerate(ttsing_slur_set)}
10
+
11
+ ttsing_phone_to_int = {}
12
+ int_to_ttsing_phone = {}
13
+ for idx, item in enumerate(ttsing_phone_set):
14
+ ttsing_phone_to_int[item] = idx
15
+ int_to_ttsing_phone[idx] = item
16
+
17
+ ttsing_pitch_to_int = {}
18
+ int_to_ttsing_pitch = {}
19
+ for idx, item in enumerate(ttsing_pitch_set):
20
+ ttsing_pitch_to_int[item] = idx
21
+ int_to_ttsing_pitch[idx] = item
22
+
23
+ # opencpop
24
+ ttsing_opencpop_pitch_to_int = {}
25
+ for idx, item in enumerate(ttsing_opencpop_pitch_set):
26
+ ttsing_opencpop_pitch_to_int[item] = idx
27
+
28
+ ttsing_slur_to_int = {}
29
+ int_to_ttsing_slur = {}
30
+ for idx, item in enumerate(ttsing_slur_set):
31
+ ttsing_slur_to_int[item] = idx
32
+ int_to_ttsing_slur[idx] = item
33
+
34
+
text/npu/symbols.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ttsing_phone_set = ['_'] + [
3
+ "b", "c", "ch", "d", "f", "g", "h", "j", "k", "l", "m", "n", "p", "q", "r",
4
+ "s", "sh", "t", "x", "z", "zh", "a", "ai", "an", "ang", "ao", "e", "ei",
5
+ "en", "eng", "er", "iii", "ii", "i", "ia", "ian", "iang", "iao", "ie", "in",
6
+ "ing", "iong", "iou", "o", "ong", "ou", "u", "ua", "uai", "uan", "uang",
7
+ "uei", "uen", "ueng", "uo", "v", "van", "ve", "vn", "AH", "AA", "AO", "ER",
8
+ "IH", "IY", "UH", "UW", "EH", "AE", "AY", "EY", "OY", "AW", "OW", "P", "B",
9
+ "T", "D", "K", "G", "M", "N", "NG", "L", "S", "Z", "Y", "TH", "DH", "SH",
10
+ "ZH", "CH", "JH", "V", "W", "F", "R", "HH", "AH0", "AA0", "AO0", "ER0",
11
+ "IH0", "IY0", "UH0", "UW0", "EH0", "AE0", "AY0", "EY0", "OY0", "AW0", "OW0",
12
+ "AH1", "AA1", "AO1", "ER1", "IH1", "IY1", "UH1", "UW1", "EH1", "AE1", "AY1",
13
+ "EY1", "OY1", "AW1", "OW1", "AH2", "AA2", "AO2", "ER2", "IH2", "IY2", "UH2",
14
+ "UW2", "EH2", "AE2", "AY2", "EY2", "OY2", "AW2", "OW2", "AH3", "AA3", "AO3",
15
+ "ER3", "IH3", "IY3", "UH3", "UW3", "EH3", "AE3", "AY3", "EY3", "OY3", "AW3",
16
+ "OW3", "D-1", "T-1", "P*", "B*", "T*", "D*", "K*", "G*", "M*", "N*", "NG*",
17
+ "L*", "S*", "Z*", "Y*", "TH*", "DH*", "SH*", "ZH*", "CH*", "JH*", "V*",
18
+ "W*", "F*", "R*", "HH*", "sp", "sil", "or", "ar", "aor", "our", "angr",
19
+ "eir", "engr", "air", "ianr", "iaor", "ir", "ingr", "ur", "iiir", "uar",
20
+ "uangr", "uenr", "iir", "ongr", "uor", "ueir", "iar", "iangr", "inr",
21
+ "iour", "vr", "uanr", "ruai", "TR", "rest",
22
+ # opencpop
23
+ 'w', 'SP', 'AP', 'un', 'y', 'ui', 'iu',
24
+ "iour", "vr", "uanr", "ruai", "TR", "rest",
25
+ # opencpop
26
+ 'w', 'SP', 'AP', 'un', 'y', 'ui', 'iu',
27
+ # opencpop-strict
28
+ 'i0', 'E', 'En'
29
+ ]
30
+
31
+ ttsing_pitch_set = ['_'] + [
32
+ "C0", "C1", "C2", "C3", "C4", "C5", "C6", "C#/Db0", "C#/Db1", "C#/Db2",
33
+ "C#/Db3", "C#/Db4", "C#/Db5", "C#/Db6", "D0", "D1", "D2", "D3", "D4", "D5",
34
+ "D6", "D#/Eb0", "D#/Eb1", "D#/Eb2", "D#/Eb3", "D#/Eb4", "D#/Eb5", "D#/Eb6",
35
+ "E0", "E1", "E2", "E3", "E4", "E5", "E6", "F0", "F1", "F2", "F3", "F4",
36
+ "F5", "F6", "F#/Gb0", "F#/Gb1", "F#/Gb2", "F#/Gb3", "F#/Gb4", "F#/Gb5",
37
+ "F#/Gb6", "G0", "G1", "G2", "G3", "G4", "G5", "G6", "G#/Ab0", "G#/Ab1",
38
+ "G#/Ab2", "G#/Ab3", "G#/Ab4", "G#/Ab5", "G#/Ab6", "A0", "A1", "A2", "A3",
39
+ "A4", "A5", "A6", "A#/Bb0", "A#/Bb1", "A#/Bb2", "A#/Bb3", "A#/Bb4",
40
+ "A#/Bb5", "A#/Bb6", "B0", "B1", "B2", "B3", "B4", "B5", "B6", "RestRest"
41
+ ]
42
+
43
+ ttsing_opencpop_pitch_set = ['_'] + [
44
+ "C0", "C1", "C2", "C3", "C4", "C5", "C6",
45
+ "C#0/Db0", "C#1/Db1", "C#2/Db2", "C#3/Db3", "C#4/Db4", "C#5/Db5", "C#6/Db6",
46
+ "D0", "D1", "D2", "D3", "D4", "D5", "D6",
47
+ "D#0/Eb0", "D#1/Eb1", "D#2/Eb2", "D#3/Eb3", "D#4/Eb4", "D#5/Eb5", "D#6/Eb6",
48
+ "E0", "E1", "E2", "E3", "E4", "E5", "E6",
49
+ "F0", "F1", "F2", "F3", "F4", "F5", "F6",
50
+ "F#0/Gb0", "F#1/Gb1", "F#2/Gb2", "F#3/Gb3", "F#4/Gb4", "F#5/Gb5", "F#6/Gb6",
51
+ "G0", "G1", "G2", "G3", "G4", "G5", "G6",
52
+ "G#0/Ab0", "G#1/Ab1", "G#2/Ab2", "G#3/Ab3", "G#4/Ab4", "G#5/Ab5", "G#6/Ab6",
53
+ "A0", "A1", "A2", "A3", "A4", "A5", "A6",
54
+ "A#0/Bb0", "A#1/Bb1", "A#2/Bb2", "A#3/Bb3", "A#4/Bb4", "A#5/Bb5", "A#6/Bb6",
55
+ "B0", "B1", "B2", "B3", "B4", "B5", "B6",
56
+ "RestRest", "rest"
57
+ ]
58
+
59
+ ttsing_slur_set = ['_'] + ['0', '1']
60
+
61
+
utils/__init__.py ADDED
File without changes
utils/audio.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from numpy import linalg as LA
3
+ import librosa
4
+ from scipy.io import wavfile
5
+ import soundfile as sf
6
+ import librosa.filters
7
+
8
+
9
+ def load_wav(wav_path, raw_sr, target_sr=16000, win_size=800, hop_size=200):
10
+ audio = librosa.core.load(wav_path, sr=raw_sr)[0]
11
+ if raw_sr != target_sr:
12
+ audio = librosa.core.resample(audio,
13
+ raw_sr,
14
+ target_sr,
15
+ res_type='kaiser_best')
16
+ target_length = (audio.size // hop_size +
17
+ win_size // hop_size) * hop_size
18
+ pad_len = (target_length - audio.size) // 2
19
+ if audio.size % 2 == 0:
20
+ audio = np.pad(audio, (pad_len, pad_len), mode='reflect')
21
+ else:
22
+ audio = np.pad(audio, (pad_len, pad_len + 1), mode='reflect')
23
+ return audio
24
+
25
+
26
+ def save_wav(wav, path, sample_rate, norm=False):
27
+ if norm:
28
+ wav *= 32767 / max(0.01, np.max(np.abs(wav)))
29
+ wavfile.write(path, sample_rate, wav.astype(np.int16))
30
+ else:
31
+ sf.write(path, wav, sample_rate)
32
+
33
+
34
+ _mel_basis = None
35
+ _inv_mel_basis = None
36
+
37
+
38
+ def _build_mel_basis(hparams):
39
+ assert hparams.fmax <= hparams.sample_rate // 2
40
+ return librosa.filters.mel(hparams.sample_rate,
41
+ hparams.n_fft,
42
+ n_mels=hparams.acoustic_dim,
43
+ fmin=hparams.fmin,
44
+ fmax=hparams.fmax)
45
+
46
+
47
+ def _linear_to_mel(spectogram, hparams):
48
+ global _mel_basis
49
+ if _mel_basis is None:
50
+ _mel_basis = _build_mel_basis(hparams)
51
+ return np.dot(_mel_basis, spectogram)
52
+
53
+
54
+ def _mel_to_linear(mel_spectrogram, hparams):
55
+ global _inv_mel_basis
56
+ if _inv_mel_basis is None:
57
+ _inv_mel_basis = np.linalg.pinv(_build_mel_basis(hparams))
58
+ return np.maximum(1e-10, np.dot(_inv_mel_basis, mel_spectrogram))
59
+
60
+
61
+ def _stft(y, hparams):
62
+ return librosa.stft(y=y,
63
+ n_fft=hparams.n_fft,
64
+ hop_length=hparams.hop_size,
65
+ win_length=hparams.win_size)
66
+
67
+
68
+ def _amp_to_db(x, hparams):
69
+ min_level = np.exp(hparams.min_level_db / 20 * np.log(10))
70
+ return 20 * np.log10(np.maximum(min_level, x))
71
+
72
+ def _normalize(S, hparams):
73
+ return hparams.max_abs_value * np.clip(((S - hparams.min_db) /
74
+ (-hparams.min_db)), 0, 1)
75
+
76
+ def _db_to_amp(x):
77
+ return np.power(10.0, (x) * 0.05)
78
+
79
+
80
+ def _stft(y, hparams):
81
+ return librosa.stft(y=y,
82
+ n_fft=hparams.n_fft,
83
+ hop_length=hparams.hop_size,
84
+ win_length=hparams.win_size)
85
+
86
+
87
+ def _istft(y, hparams):
88
+ return librosa.istft(y,
89
+ hop_length=hparams.hop_size,
90
+ win_length=hparams.win_size)
91
+
92
+
93
+ def melspectrogram(wav, hparams):
94
+ D = _stft(wav, hparams)
95
+ S = _amp_to_db(_linear_to_mel(np.abs(D), hparams),
96
+ hparams) - hparams.ref_level_db
97
+ return _normalize(S, hparams)
98
+
99
+
utils/utils.py ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import glob
3
+ import sys
4
+ import argparse
5
+ import logging
6
+ import json
7
+ import subprocess
8
+ import numpy as np
9
+ from scipy.io.wavfile import read
10
+ import torch
11
+
12
+ MATPLOTLIB_FLAG = False
13
+
14
+ logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
15
+ logger = logging
16
+
17
+
18
+ def load_checkpoint(checkpoint_path, model, optimizer=None):
19
+ assert os.path.isfile(checkpoint_path)
20
+ checkpoint_dict = torch.load(checkpoint_path, map_location='cpu')
21
+ iteration = checkpoint_dict['iteration']
22
+ learning_rate = checkpoint_dict['learning_rate']
23
+ if optimizer is not None:
24
+ optimizer.load_state_dict(checkpoint_dict['optimizer'])
25
+ saved_state_dict = checkpoint_dict['model']
26
+ if hasattr(model, 'module'):
27
+ state_dict = model.module.state_dict()
28
+ else:
29
+ state_dict = model.state_dict()
30
+ new_state_dict = {}
31
+ for k, v in state_dict.items():
32
+ try:
33
+ new_state_dict[k] = saved_state_dict[k]
34
+ assert saved_state_dict[k].shape == v.shape, (saved_state_dict[k].shape, v.shape)
35
+ except:
36
+ print("error, %s is not in the checkpoint" % k)
37
+ logger.info("%s is not in the checkpoint" % k)
38
+ new_state_dict[k] = v
39
+ if hasattr(model, 'module'):
40
+ model.module.load_state_dict(new_state_dict)
41
+ else:
42
+ model.load_state_dict(new_state_dict)
43
+ print("load ")
44
+ logger.info("Loaded checkpoint '{}' (iteration {})".format(
45
+ checkpoint_path, iteration))
46
+ return model, optimizer, learning_rate, iteration
47
+
48
+
49
+ def save_checkpoint(model, optimizer, learning_rate, iteration, checkpoint_path, val_steps):
50
+ ckptname = checkpoint_path.split(os.sep)[-1]
51
+ newest_step = int(ckptname.split(".")[0].split("_")[1])
52
+ last_ckptname = checkpoint_path.replace(str(newest_step), str(newest_step - val_steps * 2))
53
+ if newest_step >= val_steps * 2:
54
+ os.system(f"rm {last_ckptname}")
55
+
56
+ logger.info("Saving model and optimizer state at iteration {} to {}".format(
57
+ iteration, checkpoint_path))
58
+ if hasattr(model, 'module'):
59
+ state_dict = model.module.state_dict()
60
+ else:
61
+ state_dict = model.state_dict()
62
+ torch.save({'model': state_dict,
63
+ 'iteration': iteration,
64
+ 'optimizer': optimizer.state_dict(),
65
+ 'learning_rate': learning_rate}, checkpoint_path)
66
+
67
+
68
+ def summarize(writer, global_step, scalars={}, histograms={}, images={}, audios={}, audio_sampling_rate=22050):
69
+ for k, v in scalars.items():
70
+ writer.add_scalar(k, v, global_step)
71
+ for k, v in histograms.items():
72
+ writer.add_histogram(k, v, global_step)
73
+ for k, v in images.items():
74
+ writer.add_image(k, v, global_step, dataformats='HWC')
75
+ for k, v in audios.items():
76
+ writer.add_audio(k, v, global_step, audio_sampling_rate)
77
+
78
+
79
+ def latest_checkpoint_path(dir_path, regex="G_*.pth"):
80
+ f_list = glob.glob(os.path.join(dir_path, regex))
81
+ f_list.sort(key=lambda f: int("".join(filter(str.isdigit, f))))
82
+ x = f_list[-1]
83
+ print(x)
84
+ return x
85
+
86
+
87
+ def plot_spectrogram_to_numpy(spectrogram):
88
+ global MATPLOTLIB_FLAG
89
+ if not MATPLOTLIB_FLAG:
90
+ import matplotlib
91
+ matplotlib.use("Agg")
92
+ MATPLOTLIB_FLAG = True
93
+ mpl_logger = logging.getLogger('matplotlib')
94
+ mpl_logger.setLevel(logging.WARNING)
95
+ import matplotlib.pylab as plt
96
+ import numpy as np
97
+
98
+ fig, ax = plt.subplots(figsize=(10, 2))
99
+ im = ax.imshow(spectrogram, aspect="auto", origin="lower",
100
+ interpolation='none')
101
+ plt.colorbar(im, ax=ax)
102
+ plt.xlabel("Frames")
103
+ plt.ylabel("Channels")
104
+ plt.tight_layout()
105
+
106
+ fig.canvas.draw()
107
+ data = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep='')
108
+ data = data.reshape(fig.canvas.get_width_height()[::-1] + (3,))
109
+ plt.close()
110
+ return data
111
+
112
+
113
+ def plot_alignment_to_numpy(alignment, info=None):
114
+ global MATPLOTLIB_FLAG
115
+ if not MATPLOTLIB_FLAG:
116
+ import matplotlib
117
+ matplotlib.use("Agg")
118
+ MATPLOTLIB_FLAG = True
119
+ mpl_logger = logging.getLogger('matplotlib')
120
+ mpl_logger.setLevel(logging.WARNING)
121
+ import matplotlib.pylab as plt
122
+ import numpy as np
123
+
124
+ fig, ax = plt.subplots(figsize=(6, 4))
125
+ im = ax.imshow(alignment.transpose(), aspect='auto', origin='lower',
126
+ interpolation='none')
127
+ fig.colorbar(im, ax=ax)
128
+ xlabel = 'Decoder timestep'
129
+ if info is not None:
130
+ xlabel += '\n\n' + info
131
+ plt.xlabel(xlabel)
132
+ plt.ylabel('Encoder timestep')
133
+ plt.tight_layout()
134
+
135
+ fig.canvas.draw()
136
+ data = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep='')
137
+ data = data.reshape(fig.canvas.get_width_height()[::-1] + (3,))
138
+ plt.close()
139
+ return data
140
+
141
+
142
+ def load_wav_to_torch(full_path):
143
+ sampling_rate, data = read(full_path)
144
+ return torch.FloatTensor(data.astype(np.float32)), sampling_rate
145
+
146
+
147
+ def load_filepaths_and_text(filename, split="|"):
148
+ with open(filename, encoding='utf-8') as f:
149
+ filepaths_and_text = [line.strip().split(split) for line in f]
150
+ return filepaths_and_text
151
+
152
+
153
+ def get_hparams(init=True):
154
+ parser = argparse.ArgumentParser()
155
+ parser.add_argument('-c', '--config', type=str, default="./configs/base.json",
156
+ help='JSON file for configuration')
157
+ # parser.add_argument('-m', '--model', type=str, required=True,
158
+ # help='Model name')
159
+
160
+ args = parser.parse_args()
161
+
162
+ config_path = args.config
163
+ with open(config_path, "r") as f:
164
+ data = f.read()
165
+ config = json.loads(data)
166
+
167
+ hparams = HParams(**config)
168
+ # hparams.model_dir = model_dir
169
+ model_dir = hparams.train.save_dir
170
+ config_save_path = os.path.join(model_dir, "config.json")
171
+
172
+ if not os.path.exists(model_dir):
173
+ os.makedirs(model_dir)
174
+
175
+ with open(config_save_path, "w") as f:
176
+ f.write(data)
177
+ return hparams
178
+
179
+
180
+ def get_hparams_from_dir(model_dir):
181
+ config_save_path = os.path.join(model_dir, "config.json")
182
+ with open(config_save_path, "r") as f:
183
+ data = f.read()
184
+ config = json.loads(data)
185
+
186
+ hparams = HParams(**config)
187
+ hparams.model_dir = model_dir
188
+ return hparams
189
+
190
+
191
+ def get_hparams_from_file(config_path):
192
+ with open(config_path, "r") as f:
193
+ data = f.read()
194
+ config = json.loads(data)
195
+
196
+ hparams = HParams(**config)
197
+ return hparams
198
+
199
+
200
+ def check_git_hash(model_dir):
201
+ source_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
202
+ if not os.path.exists(os.path.join(source_dir, ".git")):
203
+ logger.warn("{} is not a git repository, therefore hash value comparison will be ignored.".format(
204
+ source_dir
205
+ ))
206
+ return
207
+
208
+ cur_hash = subprocess.getoutput("git rev-parse HEAD")
209
+
210
+ path = os.path.join(model_dir, "githash")
211
+ if os.path.exists(path):
212
+ saved_hash = open(path).read()
213
+ if saved_hash != cur_hash:
214
+ logger.warn("git hash values are different. {}(saved) != {}(current)".format(
215
+ saved_hash[:8], cur_hash[:8]))
216
+ else:
217
+ open(path, "w").write(cur_hash)
218
+
219
+
220
+ def get_logger(model_dir, filename="train.log"):
221
+ global logger
222
+ logger = logging.getLogger(os.path.basename(model_dir))
223
+ logger.setLevel(logging.DEBUG)
224
+
225
+ formatter = logging.Formatter("%(asctime)s\t%(name)s\t%(levelname)s\t%(message)s")
226
+ if not os.path.exists(model_dir):
227
+ os.makedirs(model_dir)
228
+ h = logging.FileHandler(os.path.join(model_dir, filename))
229
+ h.setLevel(logging.DEBUG)
230
+ h.setFormatter(formatter)
231
+ logger.addHandler(h)
232
+ return logger
233
+
234
+
235
+ def count_parameters(model):
236
+ return sum(p.numel() for p in model.parameters() if p.requires_grad) / 1e6
237
+
238
+
239
+ class HParams():
240
+ def __init__(self, **kwargs):
241
+ for k, v in kwargs.items():
242
+ if type(v) == dict:
243
+ v = HParams(**v)
244
+ self[k] = v
245
+
246
+ def keys(self):
247
+ return self.__dict__.keys()
248
+
249
+ def items(self):
250
+ return self.__dict__.items()
251
+
252
+ def values(self):
253
+ return self.__dict__.values()
254
+
255
+ def __len__(self):
256
+ return len(self.__dict__)
257
+
258
+ def __getitem__(self, key):
259
+ return getattr(self, key)
260
+
261
+ def __setitem__(self, key, value):
262
+ return setattr(self, key, value)
263
+
264
+ def __contains__(self, key):
265
+ return key in self.__dict__
266
+
267
+ def __repr__(self):
268
+ return self.__dict__.__repr__()