目前正在搞一个IM的APP,内部好友之间可以发送语音,需要长按实现语音转文字的功能,之前使用的是阿里的NUI.framework,但是这个破玩意,经常出现转出来的文字重复,即使了多声道控制都无法处理掉,体验太差。没办法,就决定替换为apple自己的实现,毕竟siri那么强大!此实现包含本地音频及远程音频,你只需要按时数据model保存对应的path即可,内部会自动识别。
现在来看看实现条件:
在
Info.plist
里面添加两个键值对:
1、Privacy - Speech Recognition Usage Description
(用于请求语音识别)2、
Privacy - Microphone Usage Description
(用于请求麦克风语音输入授权)。并给出相应的文字描述。
导入库文件:
#import <Speech/Speech.h>
以下为实现头文件及逻辑文件:
头文件: NSVoice2Text.h
// // NSVoice2Text.h // 语音转文字 // // Created by wise on 2021/10/13. // #import <Foundation/Foundation.h> #import <Speech/Speech.h> NS_ASSUME_NONNULL_BEGIN typedef NS_ENUM(NSUInteger, NSVoice2TextAuthorationStatus) { NSVoice2TextAuthorizationStatusNotDetermined, //语音识别未授权 NSVoice2TextAuthorizationStatusDenied, //用户拒绝使用语音识别 NSVoice2TextAuthorizationStatusRestricted, //语音识别在这台设备上受到限制 NSVoice2TextAuthorizationStatusAuthorized, //可以语音识别 }; @interface NSVoiceModel : NSObject @property (nonatomic,copy) NSString *path; @property (nonatomic,assign) NSInteger taskId; @property (nonatomic,assign) BOOL isRunning; @property (nonatomic,assign) BOOL isInQueue; @end @interface NSVoice2TextFinal : NSObject @property (nonatomic,copy) NSString *value; @property (nonatomic,assign) NSInteger taskId; @property (nonatomic,copy) NSError * __nullable error; @end @interface NSVoice2Text : NSObject + (BOOL) isRunning; //权限 + (void)voice2TextRequestAuthorationStatus:(void (^)(NSVoice2TextAuthorationStatus status))requestBlock; + (void)voice2TextGotter:(NSArray <NSVoiceModel *>*)glist runningModelBlock:(void (^__nullable)(NSVoiceModel *amodel))runningModelBlock resultsBlock:(void (^)(NSVoice2TextFinal *finalValue))resultsBlock rtaget:(id)rtaget; @end NS_ASSUME_NONNULL_END
实现文件:NSVoice2Text.m
// // NSVoice2Text.m // 语音转文字 // // Created by wise on 2021/10/13. // #import "NSVoice2Text.h" #import "NSMutableTaskQueue.h" typedef void (^VoiceConversionResultsBlock) (NSVoice2TextFinal *finalValue); @interface NSVoiceModel () @property (nonatomic,weak) id taskTarget; @property (nonatomic, copy) VoiceConversionResultsBlock voiceConversionBlock; @property (nonatomic, copy) void (^voiceConversionRunningBlock)(NSVoiceModel *md); @end @implementation NSVoiceModel @end @implementation NSVoice2TextFinal @end static NSVoice2Text *v2text = nil; @interface NSVoice2Text ()<SFSpeechRecognizerDelegate> { NSMutableArray <NSVoiceModel *>* taskList; } @property (nonatomic, assign) NSVoice2TextAuthorationStatus authorationStatus; @property(nonatomic,strong)SFSpeechRecognizer *speechRecognizer;//语音识别器 @end @implementation NSVoice2Text - (instancetype)init { self = [super init]; if (self) { taskList = [NSMutableArray arrayWithCapacity:0]; } return self; } + (instancetype)shareInstance { if (!v2text) { v2text = [[NSVoice2Text alloc] init]; } return v2text; } + (void)releaseInstance { if (v2text) { v2text = nil; } } - (SFSpeechRecognizer *)speechRecognizer { if (_speechRecognizer == nil) { NSLocale *cale = [[NSLocale alloc]initWithLocaleIdentifier:@"zh-CN"]; _speechRecognizer = [[SFSpeechRecognizer alloc]initWithLocale:cale]; _speechRecognizer.delegate = self; } return _speechRecognizer; } + (BOOL) isRunning { NSVoiceModel *md = [[NSVoice2Text shareInstance]->taskList firstObject]; return md.isRunning; } - (void)resume { NSVoiceModel *md = [self->taskList firstObject]; if (md && !md.isInQueue) { md.isInQueue = YES; if (md.voiceConversionRunningBlock) { md.voiceConversionRunningBlock(md); } if (md.path && md.path > 0 && !md.isRunning) { md.isRunning = YES; NSString *text = @"^(http|https)+.*"; NSPredicate *regextest = [NSPredicate predicateWithFormat:@"SELF MATCHES %@", text]; BOOL flag = [regextest evaluateWithObject:md.path]; if (flag) { [self startVoiceConversionWithURL:md.path]; } else { [self startVoiceConversionWithFilePath:md.path]; } } else { NSVoice2TextFinal *el = [[NSVoice2TextFinal alloc] init]; el.taskId = -1; el.error = [NSError errorWithDomain:@"语音路径错误或为空" code:404 userInfo:nil]; md.voiceConversionBlock(el); } } } - (void)addItToTask:(NSVoiceModel *)md { __block BOOL contained = NO; [taskList enumerateObjectsUsingBlock:^(NSVoiceModel * _Nonnull obj, NSUInteger idx, BOOL * _Nonnull stop) { if (obj.taskId == md.taskId) { contained = YES; *stop = YES; } }]; if (!contained) { [taskList addObject:md]; } } + (void)voice2TextRequestAuthorationStatus:(void (^)(NSVoice2TextAuthorationStatus status))requestBlock { //发送语音认证请求(首先要判断设备是否支持语音识别功能) [SFSpeechRecognizer requestAuthorization:^(SFSpeechRecognizerAuthorizationStatus status) { [[NSVoice2Text shareInstance] setAuthorationStatus:status]; requestBlock(status); }]; } + (void)voice2TextGotter:(NSArray <NSVoiceModel *>*)glist runningModelBlock:(void (^__nullable)(NSVoiceModel *amodel))runningModelBlock resultsBlock:(void (^)(NSVoice2TextFinal *finalValue))resultsBlock rtaget:(id)rtaget { [glist enumerateObjectsUsingBlock:^(NSVoiceModel * _Nonnull obj, NSUInteger idx, BOOL * _Nonnull stop) { [obj setVoiceConversionRunningBlock:runningModelBlock]; [obj setVoiceConversionBlock:resultsBlock]; [obj setTaskTarget:rtaget]; [[NSVoice2Text shareInstance] addItToTask:obj]; }]; [[NSVoice2Text shareInstance] resume]; } - (void)startVoiceConversionWithFilePath:(NSString *)path { [self startVoiceConversion:[NSURL fileURLWithPath:path]]; } - (void)startVoiceConversionWithURL:(NSString *)url { [self startVoiceConversion:[NSURL URLWithString:url]]; } #pragma mark - private methods ///开始转换 - (void)startVoiceConversion:(NSURL *)url { __weak typeof(taskList) weakTaskList = taskList; __weak typeof(self) this = self; SFSpeechURLRecognitionRequest *recognitionRequest = [[SFSpeechURLRecognitionRequest alloc]initWithURL:url]; NSLocale *cale = [[NSLocale alloc]initWithLocaleIdentifier:@"zh-CN"]; SFSpeechRecognizer *sp = [[SFSpeechRecognizer alloc]initWithLocale:cale]; NSOperationQueue *otherQuene = [[NSOperationQueue alloc]init]; [sp setQueue:otherQuene]; [sp recognitionTaskWithRequest:recognitionRequest resultHandler:^(SFSpeechRecognitionResult * _Nullable result, NSError * error) { NSVoiceModel *md = [weakTaskList firstObject]; if (md.taskTarget) { if (!error) { if (result) { BOOL isFinal = [result isFinal];//是否结束 if (isFinal) { NSString *str = [[result bestTranscription]formattedString]; NSVoice2TextFinal *el = [[NSVoice2TextFinal alloc] init]; el.taskId = md.taskId; el.error = nil; el.value = str; md.voiceConversionBlock(el); [weakTaskList removeObject:md]; [this resume]; } } else { NSVoice2TextFinal *el = [[NSVoice2TextFinal alloc] init]; el.taskId = md.taskId; el.error = error; md.voiceConversionBlock(el); [weakTaskList removeObject:md]; [this resume]; } } else { NSVoice2TextFinal *el = [[NSVoice2TextFinal alloc] init]; el.taskId = md.taskId; el.error = error; md.voiceConversionBlock(el); [weakTaskList removeObject:md]; [this resume]; } } else { [weakTaskList removeObject:md]; [this resume]; } }]; } @end
此实现内部已经实现了队列转文字功能,你只需要随时传入数据模型即可。
代码分析:
1、权限请求
+ (void)voice2TextRequestAuthorationStatus:(void (^)(NSVoice2TextAuthorationStatus status))requestBlock;
用于请求隐私权限,只有用户同意后方可使用此功能。否则无法使用此功能。
2、传入音频文件路径
+ (void)voice2TextGotter:(NSArray <NSVoiceModel *>*)glist runningModelBlock:(void (^__nullable)(NSVoiceModel *amodel))runningModelBlock resultsBlock:(void (^)(NSVoice2TextFinal *finalValue))resultsBlock rtaget:(id)rtaget
音频以数据模型NSVoiceModel传入,将你的音频文件与此模型实现映射关系,taskID用于实现绑定,参考头文件的定义及实现。
2.1 runningModelBlock,因为支持队列事务,所以,当前正在处理哪条,则会对外输出此条。页面上可以此显示”正在转换中”文字
2.2 resultsBlock,转换结果文字,以NSVoice2TextFinal对外输出,你只需要处理好这里面的逻辑好可。
3、完整使用:
[NSVoice2Text voice2TextRequestAuthorationStatus:^(NSVoice2TextAuthorationStatus status)
{
if (status == NSVoice2TextAuthorizationStatusAuthorized)
{
NSVoiceModel *md = [[NSVoiceModel alloc] init];
[md setTaskId:[bmodel.messageId integerValue]];
[md setPath:bmodel.audioFilePath];
[NSVoice2Text voice2TextGotter:@[md] runningModelBlock:^(NSVoiceModel * _Nonnull amodel)
{
NSString *taskId = intToStr(amodel.taskId);
//通过taskId找到对应的处理的UI,显示"正在转换中"
}
resultsBlock:^(NSVoice2TextFinal * _Nonnull finalValue)
{
if (!finalValue.error)
{
NSString *taskId = intToStr(finalValue.taskId);
NSString *trTexgt = [finalValue value];
//通过taskId找到对应的处理的UI,转换完成,得到转换后的文字
}
else
{
NSString *taskId = intToStr(finalValue.taskId);
//此taskId对应的语音转换失败,亦可找到对应的UI,显示"转换失败"等文字
}
}];
}
else
{
[weakSelf showToastMessageThenHide:@"未授权使用语音识别功能"];
}
}rtaget:weakSelf];