Pub Date : 2023-11-15DOI: 10.1007/s10772-023-10063-8
Nishant Barsainyan, Dileep Kumar Singh
{"title":"Optimized cross-corpus speech emotion recognition framework based on normalized 1D convolutional neural network with data augmentation and feature selection","authors":"Nishant Barsainyan, Dileep Kumar Singh","doi":"10.1007/s10772-023-10063-8","DOIUrl":"https://doi.org/10.1007/s10772-023-10063-8","url":null,"abstract":"","PeriodicalId":14305,"journal":{"name":"International Journal of Speech Technology","volume":"8 4","pages":""},"PeriodicalIF":0.0,"publicationDate":"2023-11-15","publicationTypes":"Journal Article","fieldsOfStudy":null,"isOpenAccess":false,"openAccessPdf":"","citationCount":null,"resultStr":null,"platform":"Semanticscholar","paperid":"139271436","PeriodicalName":null,"FirstCategoryId":null,"ListUrlMain":null,"RegionNum":0,"RegionCategory":"","ArticlePicture":[],"TitleCN":null,"AbstractTextCN":null,"PMCID":"","EPubDate":null,"PubModel":null,"JCR":null,"JCRName":null,"Score":null,"Total":0}
Pub Date : 2023-11-14DOI: 10.1007/s10772-023-10060-x
Salam Nandakishor, Debadatta Pati
{"title":"Usefulness of glottal excitation source information for audio-visual speech recognition system","authors":"Salam Nandakishor, Debadatta Pati","doi":"10.1007/s10772-023-10060-x","DOIUrl":"https://doi.org/10.1007/s10772-023-10060-x","url":null,"abstract":"","PeriodicalId":14305,"journal":{"name":"International Journal of Speech Technology","volume":"26 18","pages":"0"},"PeriodicalIF":0.0,"publicationDate":"2023-11-14","publicationTypes":"Journal Article","fieldsOfStudy":null,"isOpenAccess":false,"openAccessPdf":"","citationCount":null,"resultStr":null,"platform":"Semanticscholar","paperid":"134954312","PeriodicalName":null,"FirstCategoryId":null,"ListUrlMain":null,"RegionNum":0,"RegionCategory":"","ArticlePicture":[],"TitleCN":null,"AbstractTextCN":null,"PMCID":"","EPubDate":null,"PubModel":null,"JCR":null,"JCRName":null,"Score":null,"Total":0}
Pub Date : 2023-11-11DOI: 10.1007/s10772-023-10067-4
Om Prakash Swain, H. Hemanth, Puneet Saran, Mohanaprasad Kothandaraman, Logesh Ravi, Hardik Sailor, K. S. Rajesh
{"title":"Robust and efficient keyword spotting using a bidirectional attention LSTM","authors":"Om Prakash Swain, H. Hemanth, Puneet Saran, Mohanaprasad Kothandaraman, Logesh Ravi, Hardik Sailor, K. S. Rajesh","doi":"10.1007/s10772-023-10067-4","DOIUrl":"https://doi.org/10.1007/s10772-023-10067-4","url":null,"abstract":"","PeriodicalId":14305,"journal":{"name":"International Journal of Speech Technology","volume":"18 9","pages":"0"},"PeriodicalIF":0.0,"publicationDate":"2023-11-11","publicationTypes":"Journal Article","fieldsOfStudy":null,"isOpenAccess":false,"openAccessPdf":"","citationCount":null,"resultStr":null,"platform":"Semanticscholar","paperid":"135043070","PeriodicalName":null,"FirstCategoryId":null,"ListUrlMain":null,"RegionNum":0,"RegionCategory":"","ArticlePicture":[],"TitleCN":null,"AbstractTextCN":null,"PMCID":"","EPubDate":null,"PubModel":null,"JCR":null,"JCRName":null,"Score":null,"Total":0}
Pub Date : 2023-11-11DOI: 10.1007/s10772-023-10053-w
Ghayas Ahmed, Aadil Ahmad Lawaye
{"title":"End-to-end ASR framework for Indian-English accent: using speech CNN-based segmentation","authors":"Ghayas Ahmed, Aadil Ahmad Lawaye","doi":"10.1007/s10772-023-10053-w","DOIUrl":"https://doi.org/10.1007/s10772-023-10053-w","url":null,"abstract":"","PeriodicalId":14305,"journal":{"name":"International Journal of Speech Technology","volume":"19 19","pages":"0"},"PeriodicalIF":0.0,"publicationDate":"2023-11-11","publicationTypes":"Journal Article","fieldsOfStudy":null,"isOpenAccess":false,"openAccessPdf":"","citationCount":null,"resultStr":null,"platform":"Semanticscholar","paperid":"135043271","PeriodicalName":null,"FirstCategoryId":null,"ListUrlMain":null,"RegionNum":0,"RegionCategory":"","ArticlePicture":[],"TitleCN":null,"AbstractTextCN":null,"PMCID":"","EPubDate":null,"PubModel":null,"JCR":null,"JCRName":null,"Score":null,"Total":0}
Pub Date : 2023-11-05DOI: 10.1007/s10772-023-10059-4
Zhor Benhafid, Sid Ahmed Selouani, Abderrahmane Amrouche, Mohammed Sidi Yakoub
{"title":"Attention-based factorized TDNN for a noise-robust and spoof-aware speaker verification system","authors":"Zhor Benhafid, Sid Ahmed Selouani, Abderrahmane Amrouche, Mohammed Sidi Yakoub","doi":"10.1007/s10772-023-10059-4","DOIUrl":"https://doi.org/10.1007/s10772-023-10059-4","url":null,"abstract":"","PeriodicalId":14305,"journal":{"name":"International Journal of Speech Technology","volume":"22 5","pages":"0"},"PeriodicalIF":0.0,"publicationDate":"2023-11-05","publicationTypes":"Journal Article","fieldsOfStudy":null,"isOpenAccess":false,"openAccessPdf":"","citationCount":null,"resultStr":null,"platform":"Semanticscholar","paperid":"135724739","PeriodicalName":null,"FirstCategoryId":null,"ListUrlMain":null,"RegionNum":0,"RegionCategory":"","ArticlePicture":[],"TitleCN":null,"AbstractTextCN":null,"PMCID":"","EPubDate":null,"PubModel":null,"JCR":null,"JCRName":null,"Score":null,"Total":0}
Pub Date : 2023-11-03DOI: 10.1007/s10772-023-10055-8
Aluru V. N. M. Hemateja, Gopikrishnan Kondakath, Susruta Das, Mohanaprasad Kothandaraman, S. Shobha, Abhishek Pandey, Rajin Babu, Abhinav Jain
{"title":"Novel data augmentation for named entity recognition","authors":"Aluru V. N. M. Hemateja, Gopikrishnan Kondakath, Susruta Das, Mohanaprasad Kothandaraman, S. Shobha, Abhishek Pandey, Rajin Babu, Abhinav Jain","doi":"10.1007/s10772-023-10055-8","DOIUrl":"https://doi.org/10.1007/s10772-023-10055-8","url":null,"abstract":"","PeriodicalId":14305,"journal":{"name":"International Journal of Speech Technology","volume":"40 22","pages":"0"},"PeriodicalIF":0.0,"publicationDate":"2023-11-03","publicationTypes":"Journal Article","fieldsOfStudy":null,"isOpenAccess":false,"openAccessPdf":"","citationCount":null,"resultStr":null,"platform":"Semanticscholar","paperid":"135819607","PeriodicalName":null,"FirstCategoryId":null,"ListUrlMain":null,"RegionNum":0,"RegionCategory":"","ArticlePicture":[],"TitleCN":null,"AbstractTextCN":null,"PMCID":"","EPubDate":null,"PubModel":null,"JCR":null,"JCRName":null,"Score":null,"Total":0}
Pub Date : 2023-11-03DOI: 10.1007/s10772-023-10056-7
R. Benazir Begam, M. Palanivelan
{"title":"A speech based diagnostic method for Alzheimer disease using machine learning","authors":"R. Benazir Begam, M. Palanivelan","doi":"10.1007/s10772-023-10056-7","DOIUrl":"https://doi.org/10.1007/s10772-023-10056-7","url":null,"abstract":"","PeriodicalId":14305,"journal":{"name":"International Journal of Speech Technology","volume":"43 9","pages":"0"},"PeriodicalIF":0.0,"publicationDate":"2023-11-03","publicationTypes":"Journal Article","fieldsOfStudy":null,"isOpenAccess":false,"openAccessPdf":"","citationCount":null,"resultStr":null,"platform":"Semanticscholar","paperid":"135820188","PeriodicalName":null,"FirstCategoryId":null,"ListUrlMain":null,"RegionNum":0,"RegionCategory":"","ArticlePicture":[],"TitleCN":null,"AbstractTextCN":null,"PMCID":"","EPubDate":null,"PubModel":null,"JCR":null,"JCRName":null,"Score":null,"Total":0}