Pub Date : 2025-02-27DOI: 10.1109/JSTSP.2025.3539494
{"title":"IEEE Signal Processing Society Information","authors":"","doi":"10.1109/JSTSP.2025.3539494","DOIUrl":"https://doi.org/10.1109/JSTSP.2025.3539494","url":null,"abstract":"","PeriodicalId":13038,"journal":{"name":"IEEE Journal of Selected Topics in Signal Processing","volume":"19 1","pages":"C3-C3"},"PeriodicalIF":8.7,"publicationDate":"2025-02-27","publicationTypes":"Journal Article","fieldsOfStudy":null,"isOpenAccess":false,"openAccessPdf":"https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=10906681","citationCount":null,"resultStr":null,"platform":"Semanticscholar","paperid":"143521551","PeriodicalName":null,"FirstCategoryId":null,"ListUrlMain":null,"RegionNum":1,"RegionCategory":"工程技术","ArticlePicture":[],"TitleCN":null,"AbstractTextCN":null,"PMCID":"OA","EPubDate":null,"PubModel":null,"JCR":null,"JCRName":null,"Score":null,"Total":0}
Pub Date : 2025-02-27DOI: 10.1109/JSTSP.2025.3539490
{"title":"IEEE Signal Processing Society Publication Information","authors":"","doi":"10.1109/JSTSP.2025.3539490","DOIUrl":"https://doi.org/10.1109/JSTSP.2025.3539490","url":null,"abstract":"","PeriodicalId":13038,"journal":{"name":"IEEE Journal of Selected Topics in Signal Processing","volume":"19 1","pages":"C2-C2"},"PeriodicalIF":8.7,"publicationDate":"2025-02-27","publicationTypes":"Journal Article","fieldsOfStudy":null,"isOpenAccess":false,"openAccessPdf":"https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=10906684","citationCount":null,"resultStr":null,"platform":"Semanticscholar","paperid":"143512768","PeriodicalName":null,"FirstCategoryId":null,"ListUrlMain":null,"RegionNum":1,"RegionCategory":"工程技术","ArticlePicture":[],"TitleCN":null,"AbstractTextCN":null,"PMCID":"OA","EPubDate":null,"PubModel":null,"JCR":null,"JCRName":null,"Score":null,"Total":0}
Pub Date : 2025-02-11DOI: 10.1109/JSTSP.2025.3541370
{"title":"2024 Index IEEE Journal of Selected Topics in Signal Processing Vol. 18","authors":"","doi":"10.1109/JSTSP.2025.3541370","DOIUrl":"https://doi.org/10.1109/JSTSP.2025.3541370","url":null,"abstract":"","PeriodicalId":13038,"journal":{"name":"IEEE Journal of Selected Topics in Signal Processing","volume":"18 8","pages":"1562-1590"},"PeriodicalIF":8.7,"publicationDate":"2025-02-11","publicationTypes":"Journal Article","fieldsOfStudy":null,"isOpenAccess":false,"openAccessPdf":"https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=10880692","citationCount":null,"resultStr":null,"platform":"Semanticscholar","paperid":"143388563","PeriodicalName":null,"FirstCategoryId":null,"ListUrlMain":null,"RegionNum":1,"RegionCategory":"工程技术","ArticlePicture":[],"TitleCN":null,"AbstractTextCN":null,"PMCID":"OA","EPubDate":null,"PubModel":null,"JCR":null,"JCRName":null,"Score":null,"Total":0}
Pub Date : 2025-02-05DOI: 10.1109/JSTSP.2025.3535108
{"title":"IEEE Signal Processing Society Information","authors":"","doi":"10.1109/JSTSP.2025.3535108","DOIUrl":"https://doi.org/10.1109/JSTSP.2025.3535108","url":null,"abstract":"","PeriodicalId":13038,"journal":{"name":"IEEE Journal of Selected Topics in Signal Processing","volume":"18 8","pages":"C2-C2"},"PeriodicalIF":8.7,"publicationDate":"2025-02-05","publicationTypes":"Journal Article","fieldsOfStudy":null,"isOpenAccess":false,"openAccessPdf":"https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=10874832","citationCount":null,"resultStr":null,"platform":"Semanticscholar","paperid":"143184261","PeriodicalName":null,"FirstCategoryId":null,"ListUrlMain":null,"RegionNum":1,"RegionCategory":"工程技术","ArticlePicture":[],"TitleCN":null,"AbstractTextCN":null,"PMCID":"OA","EPubDate":null,"PubModel":null,"JCR":null,"JCRName":null,"Score":null,"Total":0}
Pub Date : 2025-02-05DOI: 10.1109/JSTSP.2025.3535110
{"title":"IEEE Signal Processing Society Information","authors":"","doi":"10.1109/JSTSP.2025.3535110","DOIUrl":"https://doi.org/10.1109/JSTSP.2025.3535110","url":null,"abstract":"","PeriodicalId":13038,"journal":{"name":"IEEE Journal of Selected Topics in Signal Processing","volume":"18 8","pages":"C3-C3"},"PeriodicalIF":8.7,"publicationDate":"2025-02-05","publicationTypes":"Journal Article","fieldsOfStudy":null,"isOpenAccess":false,"openAccessPdf":"https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=10874836","citationCount":null,"resultStr":null,"platform":"Semanticscholar","paperid":"143184463","PeriodicalName":null,"FirstCategoryId":null,"ListUrlMain":null,"RegionNum":1,"RegionCategory":"工程技术","ArticlePicture":[],"TitleCN":null,"AbstractTextCN":null,"PMCID":"OA","EPubDate":null,"PubModel":null,"JCR":null,"JCRName":null,"Score":null,"Total":0}
Pub Date : 2025-02-05DOI: 10.1109/JSTSP.2025.3534376
{"title":"List of Reviewers 2024","authors":"","doi":"10.1109/JSTSP.2025.3534376","DOIUrl":"https://doi.org/10.1109/JSTSP.2025.3534376","url":null,"abstract":"","PeriodicalId":13038,"journal":{"name":"IEEE Journal of Selected Topics in Signal Processing","volume":"18 8","pages":"1557-1561"},"PeriodicalIF":8.7,"publicationDate":"2025-02-05","publicationTypes":"Journal Article","fieldsOfStudy":null,"isOpenAccess":false,"openAccessPdf":"https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=10874840","citationCount":null,"resultStr":null,"platform":"Semanticscholar","paperid":"143184262","PeriodicalName":null,"FirstCategoryId":null,"ListUrlMain":null,"RegionNum":1,"RegionCategory":"工程技术","ArticlePicture":[],"TitleCN":null,"AbstractTextCN":null,"PMCID":"OA","EPubDate":null,"PubModel":null,"JCR":null,"JCRName":null,"Score":null,"Total":0}
Pub Date : 2025-01-24DOI: 10.1109/JSTSP.2025.3526289
{"title":"IEEE Signal Processing Society Information","authors":"","doi":"10.1109/JSTSP.2025.3526289","DOIUrl":"https://doi.org/10.1109/JSTSP.2025.3526289","url":null,"abstract":"","PeriodicalId":13038,"journal":{"name":"IEEE Journal of Selected Topics in Signal Processing","volume":"18 6","pages":"C3-C3"},"PeriodicalIF":8.7,"publicationDate":"2025-01-24","publicationTypes":"Journal Article","fieldsOfStudy":null,"isOpenAccess":false,"openAccessPdf":"https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=10852386","citationCount":null,"resultStr":null,"platform":"Semanticscholar","paperid":"143106605","PeriodicalName":null,"FirstCategoryId":null,"ListUrlMain":null,"RegionNum":1,"RegionCategory":"工程技术","ArticlePicture":[],"TitleCN":null,"AbstractTextCN":null,"PMCID":"OA","EPubDate":null,"PubModel":null,"JCR":null,"JCRName":null,"Score":null,"Total":0}
Pub Date : 2025-01-24DOI: 10.1109/JSTSP.2025.3530171
Yinghao Aaron Li;Cong Han;Nima Mesgarani
Text-to-Speech (TTS) has recently seen great progress in synthesizing high-quality speech owing to the rapid development of parallel TTS systems. Yet producing speech with naturalistic prosodic variations, speaking styles, and emotional tones remains challenging. In addition, many existing parallel TTS models often struggle with identifying optimal monotonic alignments since speech and duration generation typically occur independently. Here, we propose StyleTTS, a style-based generative model for parallel TTS that can synthesize diverse speech with natural prosody from a reference speech utterance. Using our novel Transferable Monotonic Aligner (TMA) and duration-invariant data augmentation, StyleTTS significantly outperforms other baseline models on both single and multi-speaker datasets in subjective tests of speech naturalness and synthesized speaker similarity. It also demonstrates higher robustness and emotional similarity to the reference speech as indicated by word error rate (WER) and acoustic feature correlations. Through self-supervised learning, StyleTTS can generate speech with the same emotional and prosodic tone as the reference speech without needing explicit labels for these categories. In addition, when trained with a large number of speakers, our model can perform zero-shot speaker adaption. The source code and audio samples can be found on our demo page at https://styletts.github.io/.
{"title":"StyleTTS: A Style-Based Generative Model for Natural and Diverse Text-to-Speech Synthesis","authors":"Yinghao Aaron Li;Cong Han;Nima Mesgarani","doi":"10.1109/JSTSP.2025.3530171","DOIUrl":"https://doi.org/10.1109/JSTSP.2025.3530171","url":null,"abstract":"Text-to-Speech (TTS) has recently seen great progress in synthesizing high-quality speech owing to the rapid development of parallel TTS systems. Yet producing speech with naturalistic prosodic variations, speaking styles, and emotional tones remains challenging. In addition, many existing parallel TTS models often struggle with identifying optimal monotonic alignments since speech and duration generation typically occur independently. Here, we propose StyleTTS, a style-based generative model for parallel TTS that can synthesize diverse speech with natural prosody from a reference speech utterance. Using our novel Transferable Monotonic Aligner (TMA) and duration-invariant data augmentation, StyleTTS significantly outperforms other baseline models on both single and multi-speaker datasets in subjective tests of speech naturalness and synthesized speaker similarity. It also demonstrates higher robustness and emotional similarity to the reference speech as indicated by word error rate (WER) and acoustic feature correlations. Through self-supervised learning, StyleTTS can generate speech with the same emotional and prosodic tone as the reference speech without needing explicit labels for these categories. In addition, when trained with a large number of speakers, our model can perform zero-shot speaker adaption. The source code and audio samples can be found on our demo page at <uri>https://styletts.github.io/</uri>.","PeriodicalId":13038,"journal":{"name":"IEEE Journal of Selected Topics in Signal Processing","volume":"19 1","pages":"283-296"},"PeriodicalIF":8.7,"publicationDate":"2025-01-24","publicationTypes":"Journal Article","fieldsOfStudy":null,"isOpenAccess":false,"openAccessPdf":"","citationCount":null,"resultStr":null,"platform":"Semanticscholar","paperid":"143512981","PeriodicalName":null,"FirstCategoryId":null,"ListUrlMain":null,"RegionNum":1,"RegionCategory":"工程技术","ArticlePicture":[],"TitleCN":null,"AbstractTextCN":null,"PMCID":"","EPubDate":null,"PubModel":null,"JCR":null,"JCRName":null,"Score":null,"Total":0}
Pub Date : 2025-01-24DOI: 10.1109/JSTSP.2024.3522438
Yi Ma;Yuejie Chi;Ivan Dokmanić;Bihan Wen;John N. Wright;Zhihui Zhu
{"title":"Editorial Introduction to the Special Issue Seeking Low-Dimensionality in Deep Neural Networks (SLowDNN)","authors":"Yi Ma;Yuejie Chi;Ivan Dokmanić;Bihan Wen;John N. Wright;Zhihui Zhu","doi":"10.1109/JSTSP.2024.3522438","DOIUrl":"https://doi.org/10.1109/JSTSP.2024.3522438","url":null,"abstract":"","PeriodicalId":13038,"journal":{"name":"IEEE Journal of Selected Topics in Signal Processing","volume":"18 6","pages":"980-984"},"PeriodicalIF":8.7,"publicationDate":"2025-01-24","publicationTypes":"Journal Article","fieldsOfStudy":null,"isOpenAccess":false,"openAccessPdf":"https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=10852363","citationCount":null,"resultStr":null,"platform":"Semanticscholar","paperid":"143106517","PeriodicalName":null,"FirstCategoryId":null,"ListUrlMain":null,"RegionNum":1,"RegionCategory":"工程技术","ArticlePicture":[],"TitleCN":null,"AbstractTextCN":null,"PMCID":"OA","EPubDate":null,"PubModel":null,"JCR":null,"JCRName":null,"Score":null,"Total":0}