pytorch 写模型 tensor 常用的操作

人工智能55

某个维度上做扩张 自身重复

tensor 定义数据类型 避免模型训练出错

增加一个1维度.unsqueeze(0) 删除一个1维度squeeze(0)

tensor 拼接 cat 其余唯独应该一致

tensor 转换唯独 .transpose(0,1)

tensor 改变形状 reshape

完整的pytorch 开发模板

import torch
from torch import nn

x = torch.randn(1,2,64)
print(x.shape)
y = x.expand(50,2,64)#此时做expand,可以发现(3,)和(2, 3)是第二个维度相同,因此按第一个维度扩张
print(y.shape)
x = x.type(torch.FloatTensor)
    def forward(self, x, batch_size):
        x = x.type(torch.FloatTensor)
        x = x.to(device)
print("137",x_input.shape,temp_aspect.shape)
137 torch.Size([50, 2, 64]) torch.Size([50, 2, 64])
x_input=torch.cat((x_input,temp_aspect),dim=2)
x_input=x_input.transpose(0,1)
lstm_out=lstm_out.reshape(batch_size,-1)
-*- coding: utf-8 -*-
import pandas as pd
import gensim
import jieba
import re
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from gensim.models import KeyedVectors
from gensim.scripts.glove2word2vec import glove2word2vec
import torch
from torch import nn
import torch.utils.data as data
import torch.nn.functional as F
from torch import tensor
from sklearn.metrics import f1_score
from datetime import datetime
from torch.utils.data import Dataset, DataLoader
from torch.utils.data import random_split
from tqdm import tqdm
def data_process():
        data=pd.read_excel("pre_process_level_2_table(1).xlsx")
        data_neirong=list(data['内容'].values)
        data_1_aspect=list(data['1_aspect'].values)
        data_label=list(data['label'].values)

        aspect_vec_dict={}
        with open("ceshi_1_aspect_vec.txt","r") as f:
            f=f.readlines()
            for line in f:
                temp_word=line.split("_||_")[0]
                temp_vec=line.split("_||_")[1].split(" ")[:-1]
                temp_vec=[float(i) for i in temp_vec]# 转化为数值型列表
                aspect_vec_dict[temp_word]=temp_vec
        print(aspect_vec_dict)
        data_neirong_word_list=[]
        text_len=[]
        for line in data_neirong:
            line=line.strip()
            line=line.split(" ")
            print(line)
            while 1 :
                print(1)
                if '' in line:line.remove('')
                if '' not in line:break
            data_neirong_word_list.append(line)
            text_len.append(len(line))
        print("48-----------------------")
        # print(max(text_len),np.mean(text_len))# 393 14.989528010696924
        # 对句子进行截断重复 设置句子长度是 50
        # pading_data_neirong_word_list=[]
        data_x = []
        temp_data_y=[]
        for idx,line in tqdm(enumerate(data_neirong_word_list)):
            # print("54",idx, len(line),line)
            temp_line = line.copy()
            # 会有数据只有空格这样子 这个while 循环会出问题
            temp_idx = 0  # 设置while循环标志位 来解决这个问题
            if len(line) <60: while 1: line="line+temp_line" # print(len(line)) temp_idx+="1" if len(line)>=50:break
                    if temp_idx==50:break
            if temp_idx != 50:
                line = line[:50]
                data_x.append(line + [data_1_aspect[idx]])
                temp_data_y.append(data_label[idx])
        print("62----&#x6570;&#x636E;&#x6570;&#x76EE;&#xFF1A;---------",len(data_x))
        # &#x77E9;&#x9635;&#x751F;&#x6210;
        wd2 = gensim.models.Word2Vec.load("wd2.bin")#print(wd2.wv['hotel'])
        data_x_vec=[]
        # data_x_aspect=[]
        data_y=[]
        for idx,line in tqdm(enumerate(data_x)):
                try:
                    # print(line)
                    temp_vec=[]
                    line_neirong=line[:-1]
                    line_1_aspect=line[-1]
                    for word in line_neirong:
                        temp_vec.append(wd2.wv[word])

                    temp_vec.append(np.array(aspect_vec_dict[line_1_aspect]))
                    data_x_vec.append(temp_vec)
                    data_y.append(temp_data_y[idx])
                except KeyError:
                    pass
        return np.array(data_y),np.array(data_x_vec)#,np.array(data_x_aspect)

class mydataset(Dataset):
    def __init__(self):  # &#x8BFB;&#x53D6;&#x52A0;&#x8F7D;&#x6570;&#x636E;
        data_y,data_x=data_process()
        self._x = torch.tensor(np.array(data_x).astype(float))
        self._y = torch.tensor(np.array(data_y).astype(float))
        print(len(data_x),data_y.shape,data_y)
        # self._aspect= torch.tensor(np.array(data_x_aspect).astype(float))
        self._len = len(data_y)
    def __getitem__(self, item):
        return self._x[item], self._y[item]#,self._aspect[item]
    def __len__(self):  # &#x8FD4;&#x56DE;&#x6574;&#x4E2A;&#x6570;&#x636E;&#x7684;&#x957F;&#x5EA6;
        return self._len
mydata = mydataset()
&#x5212;&#x5206; &#x8BAD;&#x7EC3;&#x96C6; &#x6D4B;&#x8BD5;&#x96C6;
train_data, test_data = random_split(mydata, [round(0.8 * mydata._len), round(0.2 * mydata._len)])  # &#x8FD9;&#x4E2A;&#x53C2;&#x6570;&#x6709;&#x7684;&#x7248;&#x672C;&#x6CA1;&#x6709; generator=torch.Generator().manual_seed(0)
                    &#x968F;&#x673A;&#x6DF7;&#x4E71;&#x987A;&#x5E8F;&#x5212;&#x5206;&#x7684;     &#x56DB;&#x820D;&#x4E94;&#x5165;
#
train_loader =DataLoader(train_data, batch_size =2, shuffle = True, num_workers = 0 , drop_last=False)
#
# for step,(train_x,train_y) in enumerate(train_loader):
#     print(step,':',(train_x.shape,train_y.shape),(train_x,train_y))
#     break
#
# &#x6D4B;&#x8BD5; loader
test_loader =DataLoader(test_data, batch_size = 2, shuffle = True, num_workers = 0 , drop_last=False)
# dorp_last &#x662F;&#x8BF4;&#x6700;&#x540E;&#x4E00;&#x7EC4;&#x6570;&#x636E;&#x4E0D;&#x8DB3;&#x4E00;&#x4E2A;batch&#x7684;&#x65F6;&#x5019; &#x80FD;&#x7EE7;&#x7EED;&#x7528;&#x8FD8;&#x662F;&#x820D;&#x5F03;&#x3002; # num_workers &#x591A;&#x5C11;&#x4E2A;&#x8FDB;&#x7A0B;&#x8F7D;&#x5165;&#x6570;&#x636E;
#
# &#x6D4B;&#x8BD5;
# for step,(test_x,test_y) in enumerate(test_loader):
#     print(step,':',(test_x.shape,test_y.shape),(test_x,test_y))
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class LSTM_attention(nn.Module):  # &#x6CE8;&#x610F;Module&#x9996;&#x5B57;&#x6BCD;&#x9700;&#x8981;&#x5927;&#x5199;
    def __init__(self, ):
        super().__init__()
        input_size = 64
        hidden_size = 64
        output_size = 64
        # input_size&#xFF1A;&#x8F93;&#x5165;lstm&#x5355;&#x5143;&#x5411;&#x91CF;&#x7684;&#x957F;&#x5EA6; &#xFF0C;hidden_size&#x8F93;&#x51FA;lstm&#x5355;&#x5143;&#x5411;&#x91CF;&#x7684;&#x957F;&#x5EA6;&#x3002;&#x4E5F;&#x662F;&#x8F93;&#x5165;&#x3001;&#x8F93;&#x51FA;&#x9690;&#x85CF;&#x5C42;&#x5411;&#x91CF;&#x7684;&#x957F;&#x5EA6;
        self.lstm = nn.LSTM(input_size, output_size, num_layers=1)  # ,batch_first=True
        self.ReLU = nn.ReLU()
        self.attention = nn.Linear(6400,64)
        self.liner=nn.Linear(128,5)
    def forward(self, x, batch_size):
        x = x.type(torch.FloatTensor)
        x = x.to(device)

        x_input=x[:,:50]
        x_input=x_input.transpose(0,1)

        temp_aspect=x[:,-1]
        temp_aspect=temp_aspect.unsqueeze(0)
        temp_aspect =temp_aspect.expand(50,batch_size, 64)

        #print("137",x_input.shape,temp_aspect.shape)# 137 torch.Size([50, 2, 64]) torch.Size([50, 2, 64])
        x_input=torch.cat((x_input,temp_aspect),dim=2)
        #print("137",x_input.shape,temp_aspect.shape)# 137 torch.Size([50, 2, 128]) torch.Size([50, 2, 64])
        # &#x8F93;&#x5165; lstm&#x7684;&#x77E9;&#x9635;&#x5F62;&#x72B6;&#x662F;&#xFF1A;[&#x5E8F;&#x5217;&#x957F;&#x5EA6;&#xFF0C;batch_size,&#x6BCF;&#x4E2A;&#x5411;&#x91CF;&#x7684;&#x7EF4;&#x5EA6;] [&#x5E8F;&#x5217;&#x957F;&#x5EA6;,batch, 64]
        lstm_out, (h_n, c_n) = self.lstm(x, None)
        lstm_out=self.ReLU(lstm_out)
        last_lstm=lstm_out[:,-1]# &#x53D6;&#x6700;&#x540E;&#x4E00;&#x4E2A;
        lstm_out=lstm_out[:,:-1]
        lstm_out=lstm_out.transpose(0, 1)
        #print("154",lstm_out.shape,temp_aspect.shape)
        lstm_out=torch.cat((lstm_out,temp_aspect),dim=2)
        lstm_out=lstm_out.transpose(0, 1)
        lstm_out=lstm_out.reshape(batch_size,-1)

        lstm_out = self.ReLU(lstm_out)
        lstm_out  = self.attention(lstm_out)
        lstm_out = self.ReLU(lstm_out)

        # print("157",lstm_out.shape,last_lstm.shape)
        out_sum= torch.cat((lstm_out,last_lstm), dim=1)
        # print(out_sum.shape)
        prediction=self.liner(out_sum)
        return prediction

&#x8FD9;&#x4E2A;&#x51FD;&#x6570;&#x662F;&#x6D4B;&#x8BD5;&#x7528;&#x6765;&#x6D4B;&#x8BD5;x_test y_test &#x6570;&#x636E; &#x51FD;&#x6570;
def eval_test(model):  # &#x8FD4;&#x56DE;&#x7684;&#x662F;&#x8FD9;10&#x4E2A; &#x6D4B;&#x8BD5;&#x6570;&#x636E;&#x7684;&#x5E73;&#x5747;loss
    test_epoch_loss = []
    with torch.no_grad():
        optimizer.zero_grad()
        for step, (test_x, test_y) in enumerate(test_loader):
            y_pre = model(test_x, batch_size)
            test_y = test_y.to(device)
            test_loss = loss_function(y_pre, test_y.long())
            test_epoch_loss.append(test_loss.item())
    return np.mean(test_epoch_loss)

epochs = 50
batch_size = 128
&#x5728;&#x6A21;&#x578B;&#x6D4B;&#x8BD5;&#x4E2D; &#x8FD9;&#x4E24;&#x4E2A;&#x503C;&#xFF1A;batch_size = 19 &#x56FA;&#x5B9A;&#x5F97; epochs = &#x968F;&#x4FBF;&#x8BBE;&#x7F6E;
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True, num_workers=0, drop_last=True)
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=0, drop_last=True)

&#x521B;&#x5EFA;LSTM()&#x7C7B;&#x7684;&#x5BF9;&#x8C61;&#xFF0C;&#x5B9A;&#x4E49;&#x635F;&#x5931;&#x51FD;&#x6570;&#x548C;&#x4F18;&#x5316;&#x5668;

model = LSTM_attention().to(device)
loss_function = torch.nn.CrossEntropyLoss().to(device)  # &#x635F;&#x5931;&#x51FD;&#x6570;&#x7684;&#x8BA1;&#x7B97; &#x4EA4;&#x53C9;&#x71B5;&#x635F;&#x5931;&#x51FD;&#x6570;&#x8BA1;&#x7B97;
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)  # &#x5EFA;&#x7ACB;&#x4F18;&#x5316;&#x5668;&#x5B9E;&#x4F8B;
print(model)

sum_train_epoch_loss = []  # &#x5B58;&#x50A8;&#x6BCF;&#x4E2A;epoch &#x4E0B; &#x8BAD;&#x7EC3;train&#x6570;&#x636E;&#x7684;loss
sum_test_epoch_loss = []  # &#x5B58;&#x50A8;&#x6BCF;&#x4E2A;epoch &#x4E0B; &#x6D4B;&#x8BD5; test&#x6570;&#x636E;&#x7684;loss
best_test_loss = 10000
for epoch in tqdm(range(epochs)):
    epoch_loss = []
    for step, (train_x, train_y) in enumerate(train_loader):
        y_pred = model(train_x, batch_size)
        # &#x8BAD;&#x7EC3;&#x8FC7;&#x7A0B;&#x4E2D;&#xFF0C;&#x6B63;&#x5411;&#x4F20;&#x64AD;&#x751F;&#x6210;&#x7F51;&#x7EDC;&#x7684;&#x8F93;&#x51FA;&#xFF0C;&#x8BA1;&#x7B97;&#x8F93;&#x51FA;&#x548C;&#x5B9E;&#x9645;&#x503C;&#x4E4B;&#x95F4;&#x7684;&#x635F;&#x5931;&#x503C;
        # print(y_pred,train_y)
        single_loss = loss_function(y_pred.cpu(), train_y.long())
        # print("single_loss",single_loss)
        single_loss.backward()  # &#x8C03;&#x7528;backward()&#x81EA;&#x52A8;&#x751F;&#x6210;&#x68AF;&#x5EA6;
        optimizer.step()  # &#x4F7F;&#x7528;optimizer.step()&#x6267;&#x884C;&#x4F18;&#x5316;&#x5668;&#xFF0C;&#x628A;&#x68AF;&#x5EA6;&#x4F20;&#x64AD;&#x56DE;&#x6BCF;&#x4E2A;&#x7F51;&#x7EDC;
        epoch_loss.append(single_loss.item())
    train_epoch_loss = np.mean(epoch_loss)
    test_epoch_loss = eval_test(model)  # &#x6D4B;&#x8BD5;&#x6570;&#x636E;&#x7684;&#x5E73;&#x5747;loss

    if test_epoch_loss < best_test_loss:
        best_test_loss = test_epoch_loss
        print("best_test_loss", best_test_loss)
        best_model = model
    sum_train_epoch_loss.append(train_epoch_loss)
    sum_test_epoch_loss.append(test_epoch_loss)
    print("epoch:" + str(epoch) + "  train_epoch_loss&#xFF1A; " + str(train_epoch_loss) + "  test_epoch_loss: " + str(
        test_epoch_loss))

torch.save(best_model, 'best_model.pth')

&#x753B;&#x56FE;
sum_train_epoch_loss=[]
sum_test_epoch_loss=[]
fig = plt.figure(facecolor='white', figsize=(10, 7))
plt.xlabel('&#x7B2C;&#x51E0;&#x4E2A;epoch')
plt.ylabel('loss&#x503C;')
plt.xlim(xmax=len(sum_train_epoch_loss), xmin=0)
plt.ylim(ymax=max(sum_train_epoch_loss), ymin=0)
&#x753B;&#x4E24;&#x6761;&#xFF08;0-9&#xFF09;&#x7684;&#x5750;&#x6807;&#x8F74;&#x5E76;&#x8BBE;&#x7F6E;&#x8F74;&#x6807;&#x7B7E;x&#xFF0C;y

x1 = [i for i in range(0, len(sum_train_epoch_loss), 1)]  # &#x968F;&#x673A;&#x4EA7;&#x751F;300&#x4E2A;&#x5E73;&#x5747;&#x503C;&#x4E3A;2&#xFF0C;&#x65B9;&#x5DEE;&#x4E3A;1.2&#x7684;&#x6D6E;&#x70B9;&#x6570;&#xFF0C;&#x5373;&#x7B2C;&#x4E00;&#x7C07;&#x70B9;&#x7684;x&#x8F74;&#x5750;&#x6807;
y1 = sum_train_epoch_loss  # &#x968F;&#x673A;&#x4EA7;&#x751F;300&#x4E2A;&#x5E73;&#x5747;&#x503C;&#x4E3A;2&#xFF0C;&#x65B9;&#x5DEE;&#x4E3A;1.2&#x7684;&#x6D6E;&#x70B9;&#x6570;&#xFF0C;&#x5373;&#x7B2C;&#x4E00;&#x7C07;&#x70B9;&#x7684;y&#x8F74;&#x5750;&#x6807;

x2 = [i for i in range(0, len(sum_test_epoch_loss), 1)]
y2 = sum_test_epoch_loss

colors1 = '#00CED4'  # &#x70B9;&#x7684;&#x989C;&#x8272;
colors2 = '#DC143C'
area = np.pi * 4 ** 1  # &#x70B9;&#x9762;&#x79EF;
&#x753B;&#x6563;&#x70B9;&#x56FE;
plt.scatter(x1, y1, s=area, c=colors1, alpha=0.4, label='train_loss')
plt.scatter(x2, y2, s=area, c=colors2, alpha=0.4, label='val_loss')
plt.plot([0,9.5],[9.5,0],linewidth = '0.5',color='#000000')
plt.legend()
plt.savefig(r'C:\Users\jichao\Desktop\&#x5927;&#x8BBA;&#x6587;\12345svm.png', dpi=300)
plt.show()

import sklearn
from sklearn.metrics import accuracy_score
&#x6A21;&#x578B;&#x52A0;&#x8F7D;&#xFF1A;
model.load_state_dict(torch.load('best_model.pth').cpu().state_dict())
model.eval()
test_pred = []
test_true = []

with torch.no_grad():
    optimizer.zero_grad()
    for step, (test_x, test_y) in enumerate(test_loader):
        y_pre = model(test_x, batch_size).cpu()
        y_pre = torch.argmax(y_pre, dim=1)
        for i in y_pre:
            test_pred.append(i)
        for i in test_y:
            test_true.append(i)

Acc = accuracy_score(test_pred, test_true)
print(Acc)

</60:>

Original: https://blog.csdn.net/qq_38735017/article/details/126469631
Author: 甜辣uu
Title: pytorch 写模型 tensor 常用的操作



相关阅读1

Title: 语音识别 平常笔记

Voice Recognition

2021年3月21日
HowardXue

语音模型发展:模板匹配(DTW) -> 统计模型(GMM高斯-HMM隐马) -> 深度学习(DNN-HMM,E2E)

音频编码:常用格式PCM的wav格式
语音采样率8khz 或16khz
6阵列mac 声源定位 有空间指向性,定位后,可有效抑制其他方向的声音干扰(旁边的其他人声音)
开源工具:HTK,Kaldi, Espnet(python)
音速序列:英语48个音素 20元音 28辅音,汉语32个音素,10个元音

离散傅里叶变换(DFT) 时域信号 -> 频域信号, 逆傅里叶变换 将频域信号恢复为时域
实际可以用快速傅里叶变换(FFT) 简化计算复杂度
加窗:分帧处理
常用的声学特征:MFCC,FBank,语谱图

HMM马尔科夫链:只根据当前事件,预测下一事件。 --双重随机过程
HMM是声学模型 -> 语音数据
RNN是语言模型 -> 文本数据,词与词之间的组合概率关系,基于统计语言模型
解码器:传统动态网络解码器Viterbi -> WFST静态网络解码器
WFST把发音词典、声学模型、语言模型(三大组件)合并成统一的静态网络 ->解码速度快

DNN的输出节点与HMM的状态节点一一对应,通过DNN的输出得到每个状态的观察值概率
不同音素(a e I ...o)统一关联到DNN的输出节点

DNN使用CNN:语谱图 -> 变为图像处理,提取时域、频域feature map局部特征
RNN - LSTM, GRU
TDNN时延神经网络
CNN - TDNN-F 组合网络,CNN先提取局部频域特征,然后TDNN-F提取上下文的时域特征

E2E ASR Model,只需要输入端的语音特征和输出端的文本信息,将传统ASR三大组件融合成一个网络模型
E2E常用模型:CTC、RNN-T、Transformer
RNN-T联合建模:语音识别+说话人区别(识别后的文字后带有说话人ID)

Attention机制跟人类翻译文章时候的思路有些类似,即将注意力关注于我们翻译部分对应的上下文

序列对序列问题(sequence-to-sequence, seq2seq),通过Encoder/Decoder对输入特征和输出结果进行序列建模
加入Attention机制,改进了seq2seq,

Espnet,特征提取:直接用kaldi原生脚本,可以进行MFCC/FBank/PLP特征的提取
特征提取后,还需对特征进行倒普均值归一化(CMVN)来使特征服从高斯分布(均值为0,方差为1)

语音数据增强:音量扰动和速度扰动(变速)
词典生成:数字对应字符

data2json.sh: 映射文件都打包保存在data2json.sh脚本中
Train.yaml:训练配置文件,例如选择哪个声学模型,选择CTC/Attention/Transformer结构等
Lm_train.py:语言模型训练,输出是:rnnlm.model.best
Asr_train.py: 声学模型训练
默认使用的编码器:BLSTM
Asr.recog.py: 语言识别解码器

模型部署到Edge:编译Kaldi生成动态库.so/dll -> 嵌入式ARM Linux平台编译移植Kaldi

Transformer:

Transformer: 在每个Decoder和Encoder中都采用Attention机制,特别是在Encoder,把传统的RNN完全用Attention替代
Transformer 本质上还是seq2seq结构:
pytorch 写模型 tensor 常用的操作
pytorch 写模型 tensor 常用的操作
pytorch 写模型 tensor 常用的操作
pytorch 写模型 tensor 常用的操作

未完待续。。。

Original: https://blog.csdn.net/HowieXue/article/details/117389549
Author: HowieXue
Title: 语音识别 平常笔记

相关阅读2

Title: NSGA2快速非支配排序实现-python

1 import numpy as np
 2
 3
 4 def compare(p1, p2):
 5     # return 0同层 1 p1支配p2
 6     # 每个维度越小越优秀
 7     # 计D次
 8     D = len(p1)
 9     p1_dominate_p2 = True  # p1 更小
10     p2_dominate_p1 = True
11     for i in range(D):
12         if p1[i] > p2[i]:
13             p1_dominate_p2 = False
14         if p1[i] < p2[i]:
15             p2_dominate_p1 = False
16
17     if p1_dominate_p2 == p2_dominate_p1:
18         return 0
19     return 1 if p1_dominate_p2 else -1
20
21
22 def fast_non_dominated_sort(P):
23     # 成员编号为 0 ~ P_size-1
24     P_size = len(P)
25     # 被支配数
26     n = np.full(shape=P_size, fill_value=0)
27     # 支配的成员
28     S = []
29     # 每层包含的成员编号们
30     f = []  # 0 开始
31     # 所处等级
32     rank = np.full(shape=P_size, fill_value=-1)
33
34     f_0 = []
35     for p in range(P_size):
36         n_p = 0
37         S_p = []
38         for q in range(P_size):
39             if p == q:
40                 continue
41             cmp = compare(P

, P[q]) 42 if cmp == 1: 43 S_p.append(q) 44 elif cmp == -1: # 被支配 45 n_p += 1 46 S.append(S_p) 47 n

= n_p 48 if n_p == 0: 49 rank

= 0 50 f_0.append(p) 51 52 f.append(f_0) # 这时候f[0]必存在 53 54 i = 0 55 while len(f[i]) != 0: # 可能还有i+1层 56 Q = [] 57 for p in f[i]: # i层中每个个体 58 for q in S

: # 被p支配的个体 59 n[q] -= 1 60 if n[q] == 0: 61 rank[q] = i + 1 62 Q.append(q) 63 i += 1 64 f.append(Q) 65 return rank, f 66 67 68 import matplotlib.pyplot as plt 69 70 if __name__ == '__main__': 71 P = np.random.random(size=(200, 2)) 72 rank, f = fast_non_dominated_sort(P) 73 f.pop() 74 # print(rank) 75 # print(f) 76 77 # 绘图 78 for t in f: 79 # 每level 80 x = P[t][:, 0] 81 y = P[t][:, 1] 82 plt.scatter(x, y, s=15) # s 点的大小 c 点的颜色 alpha 透明度 83 84 plt.show()

转载请标记原文地址:https://www.cnblogs.com/Twobox/p/16408840.html

pytorch 写模型 tensor 常用的操作

Original: https://www.cnblogs.com/Twobox/p/16408840.html
Author: Wei_Xiong
Title: NSGA2快速非支配排序实现-python

相关阅读3

Title: 【更好的中文语音识别SpeechBrain Win10/11本地部署,基于Aishell】

pytorch 写模型 tensor 常用的操作
环境:Win11x64+Vscode+Python3.7.2x64+Pytorch1.9(CPU or GPU)
本文默认Win11,Win10 100%素可以得,默认向下兼容!

首先,你得把Vscode弄好(python 插件安装),py环境搭好,我们用默认得base py环境即可,当然,你也可以在conda创建py环境

然后在https://huggingface.co/speechbrain/asr-transformer-aishell/tree/main,下载
pytorch 写模型 tensor 常用的操作
下载完自己改文件名以及后缀,改得和这个框内一模一样的(必须)!
然后vscode创建py工程文件夹,在里面新建pretrained_models/asr-transformer-aishell文件夹,把下载的全部丢进去:
pytorch 写模型 tensor 常用的操作
pip安装环境:
pip install speechbrain
PS:这个命令会安装90%的环境(默认安装 cup版 Pytorch),但是还有一个没得装,就是torchaudio后端,因为这个torchaudio就是一个套壳api,所以手动安装SoundFile或SoX后端,如果已安装可以跳过
pip install SoundFile
or
pip install sox

然后。。。

参考谷歌在线代码编辑器
https://colab.research.google.com/drive/1hX5ZI9S4jHIjahFCZnhwwQmFoGAi3tmu?usp=sharing#scrollTo=OKI0SovKtbZm

我们创建py脚本:

from speechbrain.pretrained import EncoderDecoderASR
import torch
import torchaudio

https://huggingface.co/speechbrain/asr-transformer-aishell/tree/main
https://colab.research.google.com/drive/1hX5ZI9S4jHIjahFCZnhwwQmFoGAi3tmu?usp=sharing#scrollTo=PPB0K9z3B43c

asr_model = EncoderDecoderASR.from_hparams(source="speechbrain/asr-transformer-aishell", savedir="pretrained_models/asr-transformer-aishell")
asr_model.transcribe_file("speechbrain/asr-transformer-aishell/example_mandarin.wav")

audio_1 = "F:/CSharpProject/KaldiDemo/KaldiDemo/bin/x64/Release/妹妹就是爱.flac"
#error:No audio IO backend is available
#安装SoundFile : 运行指令 pip install SoundFile
#or者安装SoX : 运行指令: pip install sox
ddd=torchaudio.list_audio_backends()
print(ddd)
snt_1, fs = torchaudio.load(audio_1)
wav_lens=torch.tensor([1.0])
print('snt_1:',snt_1," wav_lens:",wav_lens)
res=asr_model.transcribe_batch(snt_1, wav_lens)

print('res:',res)

#对于用GPU版pytorch的小伙伴,加载模型可以参考以下代码
Uncomment for using another pre-trained model
#asr_model = EncoderDecoderASR.from_hparams(source="speechbrain/asr-crdnn-rnnlm-librispeech", savedir="pretrained_models/asr-crdnn-rnnlm-librispeech",  run_opts={"device":"cuda"})
#asr_model = EncoderDecoderASR.from_hparams(source="speechbrain/asr-crdnn-transformerlm-librispeech", savedir="pretrained_models/asr-crdnn-transformerlm-librispeech",  run_opts={"device":"cuda"})
asr_model = EncoderDecoderASR.from_hparams(source="speechbrain/asr-transformer-transformerlm-librispeech", savedir="pretrained_models/asr-transformer-transformerlm-librispeech",  run_opts={"device":"cuda"})

pytorch 写模型 tensor 常用的操作
PS:这个识别效率还是灰常高的,在cpu下都很快,gpu应该会更快!
如果你素这样类似得输出,那么恭喜你,你の手中已经抓住了未来
pytorch 写模型 tensor 常用的操作
完整代码和模型文件我已经上传群共享和CSDN,想学习的进群,不想的自己T _B几毛钱买个代_下即可
https://download.csdn.net/download/weixin_44029053/32726942
安装好pytorch和Python环境,vscode设置Python程序根目录直接运行,不需要改任何代码

下一步,我们要用这个来训练我们的唤醒词,进行语音唤醒实战,敬请期待我的博客,记得三连(没有)!

PS:本人并非语音方面专业人士,不过也在学习,大家可以加群一起探讨一下,集思广益,群号:558174476(游戏与人工智能生命体)

Original: https://blog.csdn.net/weixin_44029053/article/details/120057507
Author: superowner001
Title: 【更好的中文语音识别SpeechBrain Win10/11本地部署,基于Aishell】