default search action

combined dblp search
author search
venue search
publication search

ask others

BibTeX records: Berrak Sisman

Name: dblp XML data dump
Creator: Schloss Dagstuhl - Leibniz Center for Informatics
Published: 1993
License: https://creativecommons.org/publicdomain/zero/1.0/
Keywords: dblp, XML, computer science, scholarly publications, metadata

> Home > Persons > Berrak Sisman

download as .bib file

@article{DBLP:journals/corr/abs-2601-03115,
  author       = {Xiutian Zhao and
                  Bj{\"{o}}rn W. Schuller and
                  Berrak Sisman},
  title        = {Discovering and Causally Validating Emotion-Sensitive Neurons in Large
                  Audio-Language Models},
  journal      = {CoRR},
  volume       = {abs/2601.03115},
  year         = {2026},
  url          = {https://doi.org/10.48550/arXiv.2601.03115},
  doi          = {10.48550/ARXIV.2601.03115},
  eprinttype   = {arXiv},
  eprint       = {2601.03115},
  timestamp    = {Wed, 11 Feb 2026 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2601-03115.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/spl/LamZCSH25,
  author       = {Perry Lam and
                  Huayun Zhang and
                  Nancy F. Chen and
                  Berrak Sisman and
                  Dorien Herremans},
  title        = {{PRESENT:} Zero-Shot Text-to-Prosody Control},
  journal      = {{IEEE} Signal Process. Lett.},
  volume       = {32},
  pages        = {776--780},
  year         = {2025},
  url          = {https://doi.org/10.1109/LSP.2025.3528359},
  doi          = {10.1109/LSP.2025.3528359},
  timestamp    = {Tue, 14 Oct 2025 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/spl/LamZCSH25.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/taffco/GoncalvesLLSB25,
  author       = {Lucas Goncalves and
                  Seong{-}Gyun Leem and
                  Wei{-}Cheng Lin and
                  Berrak Sisman and
                  Carlos Busso},
  title        = {Versatile Audio-Visual Learning for Emotion Recognition},
  journal      = {{IEEE} Trans. Affect. Comput.},
  volume       = {16},
  number       = {1},
  pages        = {306--318},
  year         = {2025},
  url          = {https://doi.org/10.1109/TAFFC.2024.3433386},
  doi          = {10.1109/TAFFC.2024.3433386},
  timestamp    = {Sun, 15 Jun 2025 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/taffco/GoncalvesLLSB25.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/apsipa/Xian0S025,
  author       = {Huhong Xian and
                  Rui Liu and
                  Berrak Sisman and
                  Haizhou Li},
  title        = {{NE-PADD:} Leveraging Named Entity Knowledge for Robust Partial Audio
                  Deepfake Detection via Attention Aggregation},
  booktitle    = {Asia Pacific Signal and Information Processing Association Annual
                  Summit and Conference, {APSIPA} {ASC} 2025, Singapore, October 22-24,
                  2025},
  pages        = {2199--2204},
  publisher    = {{IEEE}},
  year         = {2025},
  url          = {https://doi.org/10.1109/APSIPAASC65261.2025.11249178},
  doi          = {10.1109/APSIPAASC65261.2025.11249178},
  timestamp    = {Mon, 16 Mar 2026 12:13:26 +0100},
  biburl       = {https://dblp.org/rec/conf/apsipa/Xian0S025.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/emnlp/JiaLSL25,
  author       = {Zhenqi Jia and
                  Rui Liu and
                  Berrak Sisman and
                  Haizhou Li},
  editor       = {Christos Christodoulopoulos and
                  Tanmoy Chakraborty and
                  Carolyn Rose and
                  Violet Peng},
  title        = {Multimodal Fine-grained Context Interaction Graph Modeling for Conversational
                  Speech Synthesis},
  booktitle    = {Proceedings of the 2025 Conference on Empirical Methods in Natural
                  Language Processing, {EMNLP} 2025, Suzhou, China, November 4-9, 2025},
  pages        = {8852--8858},
  publisher    = {Association for Computational Linguistics},
  year         = {2025},
  url          = {https://doi.org/10.18653/v1/2025.emnlp-main.448},
  doi          = {10.18653/V1/2025.EMNLP-MAIN.448},
  timestamp    = {Wed, 04 Feb 2026 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/emnlp/JiaLSL25.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/0008GXSB025,
  author       = {Rui Liu and
                  Pu Gao and
                  Jiatian Xi and
                  Berrak Sisman and
                  Carlos Busso and
                  Haizhou Li},
  editor       = {Odette Scharenborg and
                  Catharine Oertel and
                  Khiet Truong},
  title        = {Towards Emotionally Consistent Text-Based Speech Editing: Introducing
                  EmoCorrector and The {ECD-TSE} Dataset},
  booktitle    = {26th Annual Conference of the International Speech Communication Association,
                  Interspeech 2025, Rotterdam, The Netherlands, 17-21 August 2025},
  publisher    = {{ISCA}},
  year         = {2025},
  url          = {https://doi.org/10.21437/Interspeech.2025-559},
  doi          = {10.21437/INTERSPEECH.2025-559},
  timestamp    = {Thu, 20 Nov 2025 11:03:39 +0100},
  biburl       = {https://dblp.org/rec/conf/interspeech/0008GXSB025.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/ChandraGLBS25,
  author       = {Shreeram Suresh Chandra and
                  Lucas Goncalves and
                  Junchen Lu and
                  Carlos Busso and
                  Berrak Sisman},
  editor       = {Odette Scharenborg and
                  Catharine Oertel and
                  Khiet Truong},
  title        = {EmotionRankCLAP: Bridging Natural Language Speaking Styles and Ordinal
                  Speech Emotion via Rank-N-Contrast},
  booktitle    = {26th Annual Conference of the International Speech Communication Association,
                  Interspeech 2025, Rotterdam, The Netherlands, 17-21 August 2025},
  publisher    = {{ISCA}},
  year         = {2025},
  url          = {https://doi.org/10.21437/Interspeech.2025-1198},
  doi          = {10.21437/INTERSPEECH.2025-1198},
  timestamp    = {Thu, 20 Nov 2025 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/interspeech/ChandraGLBS25.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/MahapatraUNBS25,
  author       = {Aurosweta Mahapatra and
                  Ismail Rasim Ulgen and
                  Abinay Reddy Naini and
                  Carlos Busso and
                  Berrak Sisman},
  editor       = {Odette Scharenborg and
                  Catharine Oertel and
                  Khiet Truong},
  title        = {Can Emotion Fool Anti-spoofing?},
  booktitle    = {26th Annual Conference of the International Speech Communication Association,
                  Interspeech 2025, Rotterdam, The Netherlands, 17-21 August 2025},
  publisher    = {{ISCA}},
  year         = {2025},
  url          = {https://doi.org/10.21437/Interspeech.2025-1234},
  doi          = {10.21437/INTERSPEECH.2025-1234},
  timestamp    = {Thu, 20 Nov 2025 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/interspeech/MahapatraUNBS25.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/NainiGSMUTMGDSB25,
  author       = {Abinay Reddy Naini and
                  Lucas Goncalves and
                  Ali N. Salman and
                  Pravin Mote and
                  Ismail Rasim Ulgen and
                  Thomas Thebaud and
                  Laureano Moro{-}Vel{\'{a}}zquez and
                  Leibny Paola Garc{\'{\i}}a and
                  Najim Dehak and
                  Berrak Sisman and
                  Carlos Busso},
  editor       = {Odette Scharenborg and
                  Catharine Oertel and
                  Khiet Truong},
  title        = {The Interspeech 2025 Challenge on Speech Emotion Recognition in Naturalistic
                  Conditions},
  booktitle    = {26th Annual Conference of the International Speech Communication Association,
                  Interspeech 2025, Rotterdam, The Netherlands, 17-21 August 2025},
  publisher    = {{ISCA}},
  year         = {2025},
  url          = {https://doi.org/10.21437/Interspeech.2025-1972},
  doi          = {10.21437/INTERSPEECH.2025-1972},
  timestamp    = {Fri, 21 Nov 2025 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/interspeech/NainiGSMUTMGDSB25.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/RoseroSCSSKHB25,
  author       = {Karen Rosero and
                  Ali N. Salman and
                  Shreeram Suresh Chandra and
                  Berrak Sisman and
                  Cortney Van't Slot and
                  Alex A. Kane and
                  Rami R. Hallac and
                  Carlos Busso},
  editor       = {Odette Scharenborg and
                  Catharine Oertel and
                  Khiet Truong},
  title        = {Advancing Pediatric {ASR:} The Role of Voice Generation in Disordered
                  Speech},
  booktitle    = {26th Annual Conference of the International Speech Communication Association,
                  Interspeech 2025, Rotterdam, The Netherlands, 17-21 August 2025},
  publisher    = {{ISCA}},
  year         = {2025},
  url          = {https://doi.org/10.21437/Interspeech.2025-1890},
  doi          = {10.21437/INTERSPEECH.2025-1890},
  timestamp    = {Thu, 20 Nov 2025 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/interspeech/RoseroSCSSKHB25.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2505-20341,
  author       = {Rui Liu and
                  Pu Gao and
                  Jiatian Xi and
                  Berrak Sisman and
                  Carlos Busso and
                  Haizhou Li},
  title        = {Towards Emotionally Consistent Text-Based Speech Editing: Introducing
                  EmoCorrector and The {ECD-TSE} Dataset},
  journal      = {CoRR},
  volume       = {abs/2505.20341},
  year         = {2025},
  url          = {https://doi.org/10.48550/arXiv.2505.20341},
  doi          = {10.48550/ARXIV.2505.20341},
  eprinttype   = {arXiv},
  eprint       = {2505.20341},
  timestamp    = {Sat, 06 Sep 2025 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2505-20341.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2505-23732,
  author       = {Shreeram Suresh Chandra and
                  Lucas Goncalves and
                  Junchen Lu and
                  Carlos Busso and
                  Berrak Sisman},
  title        = {EmotionRankCLAP: Bridging Natural Language Speaking Styles and Ordinal
                  Speech Emotion via Rank-N-Contrast},
  journal      = {CoRR},
  volume       = {abs/2505.23732},
  year         = {2025},
  url          = {https://doi.org/10.48550/arXiv.2505.23732},
  doi          = {10.48550/ARXIV.2505.23732},
  eprinttype   = {arXiv},
  eprint       = {2505.23732},
  timestamp    = {Sun, 29 Jun 2025 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2505-23732.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2505-23962,
  author       = {Aurosweta Mahapatra and
                  Ismail Rasim Ulgen and
                  Abinay Reddy Naini and
                  Carlos Busso and
                  Berrak Sisman},
  title        = {Can Emotion Fool Anti-spoofing?},
  journal      = {CoRR},
  volume       = {abs/2505.23962},
  year         = {2025},
  url          = {https://doi.org/10.48550/arXiv.2505.23962},
  doi          = {10.48550/ARXIV.2505.23962},
  eprinttype   = {arXiv},
  eprint       = {2505.23962},
  timestamp    = {Sun, 29 Jun 2025 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2505-23962.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2509-03829,
  author       = {Huhong Xian and
                  Rui Liu and
                  Berrak Sisman and
                  Haizhou Li},
  title        = {{NE-PADD:} Leveraging Named Entity Knowledge for Robust Partial Audio
                  Deepfake Detection via Attention Aggregation},
  journal      = {CoRR},
  volume       = {abs/2509.03829},
  year         = {2025},
  url          = {https://doi.org/10.48550/arXiv.2509.03829},
  doi          = {10.48550/ARXIV.2509.03829},
  eprinttype   = {arXiv},
  eprint       = {2509.03829},
  timestamp    = {Mon, 13 Oct 2025 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2509-03829.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2509-06074,
  author       = {Zhenqi Jia and
                  Rui Liu and
                  Berrak Sisman and
                  Haizhou Li},
  title        = {Multimodal Fine-grained Context Interaction Graph Modeling for Conversational
                  Speech Synthesis},
  journal      = {CoRR},
  volume       = {abs/2509.06074},
  year         = {2025},
  url          = {https://doi.org/10.48550/arXiv.2509.06074},
  doi          = {10.48550/ARXIV.2509.06074},
  eprinttype   = {arXiv},
  eprint       = {2509.06074},
  timestamp    = {Mon, 13 Oct 2025 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2509-06074.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2509-20485,
  author       = {Ismail Rasim Ulgen and
                  Zongyang Du and
                  Junchen Lu and
                  Philipp Koehn and
                  Berrak Sisman},
  title        = {Objective Evaluation of Prosody and Intelligibility in Speech Synthesis
                  via Conditional Prediction of Discrete Tokens},
  journal      = {CoRR},
  volume       = {abs/2509.20485},
  year         = {2025},
  url          = {https://doi.org/10.48550/arXiv.2509.20485},
  doi          = {10.48550/ARXIV.2509.20485},
  eprinttype   = {arXiv},
  eprint       = {2509.20485},
  timestamp    = {Wed, 22 Oct 2025 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2509-20485.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2509-21676,
  author       = {Aurosweta Mahapatra and
                  Ismail Rasim Ulgen and
                  Berrak Sisman},
  title        = {HuLA: Prosody-Aware Anti-Spoofing with Multi-Task Learning for Expressive
                  and Emotional Synthetic Speech},
  journal      = {CoRR},
  volume       = {abs/2509.21676},
  year         = {2025},
  url          = {https://doi.org/10.48550/arXiv.2509.21676},
  doi          = {10.48550/ARXIV.2509.21676},
  eprinttype   = {arXiv},
  eprint       = {2509.21676},
  timestamp    = {Wed, 22 Oct 2025 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2509-21676.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2511-00256,
  author       = {Zongyang Du and
                  Shreeram Suresh Chandra and
                  Ismail Rasim Ulgen and
                  Aurosweta Mahapatra and
                  Ali N. Salman and
                  Carlos Busso and
                  Berrak Sisman},
  title        = {NaturalVoices: {A} Large-Scale, Spontaneous and Emotional Podcast
                  Dataset for Voice Conversion},
  journal      = {CoRR},
  volume       = {abs/2511.00256},
  year         = {2025},
  url          = {https://doi.org/10.48550/arXiv.2511.00256},
  doi          = {10.48550/ARXIV.2511.00256},
  eprinttype   = {arXiv},
  eprint       = {2511.00256},
  timestamp    = {Fri, 02 Jan 2026 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2511-00256.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/access/RajapaksheRKSSB24,
  author       = {Thejan Rajapakshe and
                  Rajib Rana and
                  Sara Khalifa and
                  Berrak Sisman and
                  Bj{\"{o}}rn W. Schuller and
                  Carlos Busso},
  title        = {emoDARTS: Joint Optimization of {CNN} and Sequential Neural Network
                  Architectures for Superior Speech Emotion Recognition},
  journal      = {{IEEE} Access},
  volume       = {12},
  pages        = {110492--110503},
  year         = {2024},
  url          = {https://doi.org/10.1109/ACCESS.2024.3439604},
  doi          = {10.1109/ACCESS.2024.3439604},
  timestamp    = {Thu, 22 Aug 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/access/RajapaksheRKSSB24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/taslp/LiuSGL24,
  author       = {Rui Liu and
                  Berrak Sisman and
                  Guanglai Gao and
                  Haizhou Li},
  title        = {Controllable Accented Text-to-Speech Synthesis With Fine and Coarse-Grained
                  Intensity Rendering},
  journal      = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
  volume       = {32},
  pages        = {2188--2201},
  year         = {2024},
  url          = {https://doi.org/10.1109/TASLP.2024.3378110},
  doi          = {10.1109/TASLP.2024.3378110},
  timestamp    = {Fri, 17 May 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/taslp/LiuSGL24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/fgr/RoseroSSHB24,
  author       = {Karen Rosero and
                  Ali N. Salman and
                  Berrak Sisman and
                  Rami R. Hallac and
                  Carlos Busso},
  title        = {Enhanced Facial Landmarks Detection for Patients with Repaired Cleft
                  Lip and Palate},
  booktitle    = {18th {IEEE} International Conference on Automatic Face and Gesture
                  Recognition, {FG} 2024, Istanbul, Turkey, May 27-31, 2024},
  pages        = {1--10},
  publisher    = {{IEEE}},
  year         = {2024},
  url          = {https://doi.org/10.1109/FG59268.2024.10582022},
  doi          = {10.1109/FG59268.2024.10582022},
  timestamp    = {Wed, 31 Jul 2024 14:00:36 +0200},
  biburl       = {https://dblp.org/rec/conf/fgr/RoseroSSHB24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icassp/UlgenDBS24,
  author       = {Ismail Rasim Ulgen and
                  Zongyang Du and
                  Carlos Busso and
                  Berrak Sisman},
  title        = {Revealing Emotional Clusters in Speaker Embeddings: {A} Contrastive
                  Learning Strategy for Speech Emotion Recognition},
  booktitle    = {{IEEE} International Conference on Acoustics, Speech and Signal Processing,
                  {ICASSP} 2024, Seoul, Republic of Korea, April 14-19, 2024},
  pages        = {12081--12085},
  publisher    = {{IEEE}},
  year         = {2024},
  url          = {https://doi.org/10.1109/ICASSP48485.2024.10447060},
  doi          = {10.1109/ICASSP48485.2024.10447060},
  timestamp    = {Sun, 19 Jan 2025 13:18:23 +0100},
  biburl       = {https://dblp.org/rec/conf/icassp/UlgenDBS24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/MoteSB24,
  author       = {Pravin Mote and
                  Berrak Sisman and
                  Carlos Busso},
  editor       = {Itshak Lapidot and
                  Sharon Gannot},
  title        = {Unsupervised Domain Adaptation for Speech Emotion Recognition using
                  K-Nearest Neighbors Voice Conversion},
  booktitle    = {25th Annual Conference of the International Speech Communication Association,
                  Interspeech 2024, Kos, Greece, September 1-5, 2024},
  publisher    = {{ISCA}},
  year         = {2024},
  url          = {https://doi.org/10.21437/Interspeech.2024-1248},
  doi          = {10.21437/INTERSPEECH.2024-1248},
  timestamp    = {Tue, 20 May 2025 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/MoteSB24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/SalmanDCUBS24,
  author       = {Ali N. Salman and
                  Zongyang Du and
                  Shreeram Suresh Chandra and
                  Ismail Rasim {\"{U}}lgen and
                  Carlos Busso and
                  Berrak Sisman},
  editor       = {Itshak Lapidot and
                  Sharon Gannot},
  title        = {Towards Naturalistic Voice Conversion: NaturalVoices Dataset with
                  an Automatic Processing Pipeline},
  booktitle    = {25th Annual Conference of the International Speech Communication Association,
                  Interspeech 2024, Kos, Greece, September 1-5, 2024},
  publisher    = {{ISCA}},
  year         = {2024},
  url          = {https://doi.org/10.21437/Interspeech.2024-1256},
  doi          = {10.21437/INTERSPEECH.2024-1256},
  timestamp    = {Tue, 20 May 2025 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/SalmanDCUBS24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/odyssey/DuL0KS24,
  author       = {Zongyang Du and
                  Junchen Lu and
                  Kun Zhou and
                  Lakshmish Kaushik and
                  Berrak Sisman},
  editor       = {Najim Dehak and
                  Patrick Cardinal},
  title        = {Converting Anyone's Voice: End-to-End Expressive Voice Conversion
                  with {A} Conditional Diffusion Model},
  booktitle    = {Odyssey 2024: The Speaker and Language Recognition Workshop, Quebec
                  City, Canada, June 18-21, 2024},
  pages        = {172--179},
  publisher    = {{ISCA}},
  year         = {2024},
  url          = {https://doi.org/10.21437/odyssey.2024-25},
  doi          = {10.21437/ODYSSEY.2024-25},
  timestamp    = {Wed, 31 Jul 2024 15:08:41 +0200},
  biburl       = {https://dblp.org/rec/conf/odyssey/DuL0KS24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/odyssey/0003SB0024,
  author       = {Kun Zhou and
                  Berrak Sisman and
                  Carlos Busso and
                  Bin Ma and
                  Haizhou Li},
  editor       = {Najim Dehak and
                  Patrick Cardinal},
  title        = {Mixed-EVC: Mixed Emotion Synthesis and Control in Voice Conversion},
  booktitle    = {Odyssey 2024: The Speaker and Language Recognition Workshop, Quebec
                  City, Canada, June 18-21, 2024},
  pages        = {180--186},
  publisher    = {{ISCA}},
  year         = {2024},
  url          = {https://doi.org/10.21437/odyssey.2024-26},
  doi          = {10.21437/ODYSSEY.2024-26},
  timestamp    = {Tue, 30 Jul 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/odyssey/0003SB0024.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/odyssey/ChandraDS24,
  author       = {Shreeram Suresh Chandra and
                  Zongyang Du and
                  Berrak Sisman},
  editor       = {Najim Dehak and
                  Patrick Cardinal},
  title        = {Exploring speech style spaces with language models: Emotional {TTS}
                  without emotion labels},
  booktitle    = {Odyssey 2024: The Speaker and Language Recognition Workshop, Quebec
                  City, Canada, June 18-21, 2024},
  pages        = {194--200},
  publisher    = {{ISCA}},
  year         = {2024},
  url          = {https://doi.org/10.21437/odyssey.2024-28},
  doi          = {10.21437/ODYSSEY.2024-28},
  timestamp    = {Sun, 06 Oct 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/odyssey/ChandraDS24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/odyssey/GoncalvesSNMT0D24,
  author       = {Lucas Goncalves and
                  Ali N. Salman and
                  Abinay Reddy Naini and
                  Laureano Moro{-}Vel{\'{a}}zquez and
                  Thomas Thebaud and
                  Paola Garc{\'{\i}}a and
                  Najim Dehak and
                  Berrak Sisman and
                  Carlos Busso},
  editor       = {Najim Dehak and
                  Patrick Cardinal},
  title        = {Odyssey 2024 - Speech Emotion Recognition Challenge: Dataset, Baseline
                  Framework, and Results},
  booktitle    = {Odyssey 2024: The Speaker and Language Recognition Workshop, Quebec
                  City, Canada, June 18-21, 2024},
  pages        = {247--254},
  publisher    = {{ISCA}},
  year         = {2024},
  url          = {https://doi.org/10.21437/odyssey.2024-35},
  doi          = {10.21437/ODYSSEY.2024-35},
  timestamp    = {Tue, 30 Jul 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/odyssey/GoncalvesSNMT0D24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/slt/LeeUS24,
  author       = {Philip H. Lee and
                  Ismail Rasim Ulgen and
                  Berrak Sisman},
  title        = {Discrete Unit Based Masking For Improving Disentanglement in Voice
                  Conversion},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2024, Macao, December
                  2-5, 2024},
  pages        = {742--749},
  publisher    = {{IEEE}},
  year         = {2024},
  url          = {https://doi.org/10.1109/SLT61566.2024.10832297},
  doi          = {10.1109/SLT61566.2024.10832297},
  timestamp    = {Fri, 21 Feb 2025 21:48:34 +0100},
  biburl       = {https://dblp.org/rec/conf/slt/LeeUS24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/tencon/LamZCSH24,
  author       = {Perry Lam and
                  Huayun Zhang and
                  Nancy F. Chen and
                  Berrak Sisman and
                  Dorien Herremans},
  title        = {{SNIPER} Training: Single-Shot Sparse Training for Text-to-Speech},
  booktitle    = {{IEEE} Region 10 Conference, {TENCON} 2024, Singapore, December 1-4,
                  2024},
  pages        = {327--330},
  publisher    = {{IEEE}},
  year         = {2024},
  url          = {https://doi.org/10.1109/TENCON61640.2024.10902970},
  doi          = {10.1109/TENCON61640.2024.10902970},
  timestamp    = {Sat, 15 Nov 2025 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/tencon/LamZCSH24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/tencon/MelechovskyMSH24,
  author       = {Jan Melechovsk{\'{y}} and
                  Ambuj Mehrish and
                  Berrak Sisman and
                  Dorien Herremans},
  title        = {Accented Text-to-Speech Synthesis with a Conditional Variational Autoencoder},
  booktitle    = {{IEEE} Region 10 Conference, {TENCON} 2024, Singapore, December 1-4,
                  2024},
  pages        = {343--346},
  publisher    = {{IEEE}},
  year         = {2024},
  url          = {https://doi.org/10.1109/TENCON61640.2024.10902981},
  doi          = {10.1109/TENCON61640.2024.10902981},
  timestamp    = {Sat, 15 Nov 2025 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/tencon/MelechovskyMSH24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/tencon/MelechovskyMSH24a,
  author       = {Jan Melechovsk{\'{y}} and
                  Ambuj Mehrish and
                  Berrak Sisman and
                  Dorien Herremans},
  title        = {Accent Conversion in Text-to-Speech Using Multi-Level {VAE} and Adversarial
                  Training},
  booktitle    = {{IEEE} Region 10 Conference, {TENCON} 2024, Singapore, December 1-4,
                  2024},
  pages        = {473--476},
  publisher    = {{IEEE}},
  year         = {2024},
  url          = {https://doi.org/10.1109/TENCON61640.2024.10902878},
  doi          = {10.1109/TENCON61640.2024.10902878},
  timestamp    = {Sat, 15 Nov 2025 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/tencon/MelechovskyMSH24a.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2401-11017,
  author       = {Ismail Rasim Ulgen and
                  Zongyang Du and
                  Carlos Busso and
                  Berrak Sisman},
  title        = {Revealing Emotional Clusters in Speaker Embeddings: {A} Contrastive
                  Learning Strategy for Speech Emotion Recognition},
  journal      = {CoRR},
  volume       = {abs/2401.11017},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2401.11017},
  doi          = {10.48550/ARXIV.2401.11017},
  eprinttype   = {arXiv},
  eprint       = {2401.11017},
  timestamp    = {Wed, 07 Feb 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2401-11017.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2403-14083,
  author       = {Thejan Rajapakshe and
                  Rajib Rana and
                  Sara Khalifa and
                  Berrak Sisman and
                  Bj{\"{o}}rn W. Schuller and
                  Carlos Busso},
  title        = {emoDARTS: Joint Optimisation of {CNN} {\&} Sequential Neural Network
                  Architectures for Superior Speech Emotion Recognition},
  journal      = {CoRR},
  volume       = {abs/2403.14083},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2403.14083},
  doi          = {10.48550/ARXIV.2403.14083},
  eprinttype   = {arXiv},
  eprint       = {2403.14083},
  timestamp    = {Tue, 24 Mar 2026 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2403-14083.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2405-01730,
  author       = {Zongyang Du and
                  Junchen Lu and
                  Kun Zhou and
                  Lakshmish Kaushik and
                  Berrak Sisman},
  title        = {Converting Anyone's Voice: End-to-End Expressive Voice Conversion
                  with a Conditional Diffusion Model},
  journal      = {CoRR},
  volume       = {abs/2405.01730},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2405.01730},
  doi          = {10.48550/ARXIV.2405.01730},
  eprinttype   = {arXiv},
  eprint       = {2405.01730},
  timestamp    = {Mon, 24 Jun 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2405-01730.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2405-11413,
  author       = {Shreeram Suresh Chandra and
                  Zongyang Du and
                  Berrak Sisman},
  title        = {Exploring speech style spaces with language models: Emotional {TTS}
                  without emotion labels},
  journal      = {CoRR},
  volume       = {abs/2405.11413},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2405.11413},
  doi          = {10.48550/ARXIV.2405.11413},
  eprinttype   = {arXiv},
  eprint       = {2405.11413},
  timestamp    = {Mon, 24 Jun 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2405-11413.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2406-01018,
  author       = {Jan Melechovsk{\'{y}} and
                  Ambuj Mehrish and
                  Berrak Sisman and
                  Dorien Herremans},
  title        = {Accent Conversion in Text-To-Speech Using Multi-Level {VAE} and Adversarial
                  Training},
  journal      = {CoRR},
  volume       = {abs/2406.01018},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2406.01018},
  doi          = {10.48550/ARXIV.2406.01018},
  eprinttype   = {arXiv},
  eprint       = {2406.01018},
  timestamp    = {Wed, 24 Jul 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2406-01018.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2406-03637,
  author       = {Ahad Jawaid and
                  Shreeram Suresh Chandra and
                  Junchen Lu and
                  Berrak Sisman},
  title        = {Style Mixture of Experts for Expressive Text-To-Speech Synthesis},
  journal      = {CoRR},
  volume       = {abs/2406.03637},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2406.03637},
  doi          = {10.48550/ARXIV.2406.03637},
  eprinttype   = {arXiv},
  eprint       = {2406.03637},
  timestamp    = {Wed, 24 Jul 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2406-03637.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2407-04291,
  author       = {Ismail Rasim Ulgen and
                  Carlos Busso and
                  John H. L. Hansen and
                  Berrak Sisman},
  title        = {We Need Variations in Speech Synthesis: Sub-center Modelling for Speaker
                  Embeddings},
  journal      = {CoRR},
  volume       = {abs/2407.04291},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2407.04291},
  doi          = {10.48550/ARXIV.2407.04291},
  eprinttype   = {arXiv},
  eprint       = {2407.04291},
  timestamp    = {Sat, 24 Aug 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2407-04291.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2408-06827,
  author       = {Perry Lam and
                  Huayun Zhang and
                  Nancy F. Chen and
                  Berrak Sisman and
                  Dorien Herremans},
  title        = {{PRESENT:} Zero-Shot Text-to-Prosody Control},
  journal      = {CoRR},
  volume       = {abs/2408.06827},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2408.06827},
  doi          = {10.48550/ARXIV.2408.06827},
  eprinttype   = {arXiv},
  eprint       = {2408.06827},
  timestamp    = {Mon, 03 Mar 2025 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2408-06827.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2408-17432,
  author       = {Ismail Rasim Ulgen and
                  Shreeram Suresh Chandra and
                  Junchen Lu and
                  Berrak Sisman},
  title        = {SelectTTS: Synthesizing Anyone's Voice via Discrete Unit-Based Frame
                  Selection},
  journal      = {CoRR},
  volume       = {abs/2408.17432},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2408.17432},
  doi          = {10.48550/ARXIV.2408.17432},
  eprinttype   = {arXiv},
  eprint       = {2408.17432},
  timestamp    = {Sat, 28 Sep 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2408-17432.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2409-11560,
  author       = {Philip H. Lee and
                  Ismail Rasim Ulgen and
                  Berrak Sisman},
  title        = {Discrete Unit based Masking for Improving Disentanglement in Voice
                  Conversion},
  journal      = {CoRR},
  volume       = {abs/2409.11560},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2409.11560},
  doi          = {10.48550/ARXIV.2409.11560},
  eprinttype   = {arXiv},
  eprint       = {2409.11560},
  timestamp    = {Mon, 21 Oct 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2409-11560.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2410-13342,
  author       = {Jan Melechovsk{\'{y}} and
                  Ambuj Mehrish and
                  Berrak Sisman and
                  Dorien Herremans},
  title        = {{DART:} Disentanglement of Accent and Speaker Representation in Multispeaker
                  Text-to-Speech},
  journal      = {CoRR},
  volume       = {abs/2410.13342},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2410.13342},
  doi          = {10.48550/ARXIV.2410.13342},
  eprinttype   = {arXiv},
  eprint       = {2410.13342},
  timestamp    = {Fri, 29 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2410-13342.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/taffco/ZhouSRSL23,
  author       = {Kun Zhou and
                  Berrak Sisman and
                  Rajib Rana and
                  Bj{\"{o}}rn W. Schuller and
                  Haizhou Li},
  title        = {Emotion Intensity and its Control for Emotional Voice Conversion},
  journal      = {{IEEE} Trans. Affect. Comput.},
  volume       = {14},
  number       = {1},
  pages        = {31--48},
  year         = {2023},
  url          = {https://doi.org/10.1109/TAFFC.2022.3175578},
  doi          = {10.1109/TAFFC.2022.3175578},
  timestamp    = {Sat, 06 Jan 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/taffco/ZhouSRSL23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/taffco/ZhouSRSL23a,
  author       = {Kun Zhou and
                  Berrak Sisman and
                  Rajib Rana and
                  Bj{\"{o}}rn W. Schuller and
                  Haizhou Li},
  title        = {Speech Synthesis With Mixed Emotions},
  journal      = {{IEEE} Trans. Affect. Comput.},
  volume       = {14},
  number       = {4},
  pages        = {3120--3134},
  year         = {2023},
  url          = {https://doi.org/10.1109/TAFFC.2022.3233324},
  doi          = {10.1109/TAFFC.2022.3233324},
  timestamp    = {Fri, 08 Mar 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/taffco/ZhouSRSL23a.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/HaqueSCS0023,
  author       = {Mirazul Haque and
                  Rutvij Shah and
                  Simin Chen and
                  Berrak Sisman and
                  Cong Liu and
                  Wei Yang},
  editor       = {Naomi Harte and
                  Julie Carson{-}Berndsen and
                  Gareth Jones},
  title        = {SlothSpeech: Denial-of-service Attack Against Speech Recognition Models},
  booktitle    = {24th Annual Conference of the International Speech Communication Association,
                  Interspeech 2023, Dublin, Ireland, August 20-24, 2023},
  pages        = {1274--1278},
  publisher    = {{ISCA}},
  year         = {2023},
  url          = {https://doi.org/10.21437/Interspeech.2023-1118},
  doi          = {10.21437/INTERSPEECH.2023-1118},
  timestamp    = {Fri, 14 Jun 2024 14:12:12 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/HaqueSCS0023.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/LuS0023,
  author       = {Junchen Lu and
                  Berrak Sisman and
                  Mingyang Zhang and
                  Haizhou Li},
  editor       = {Naomi Harte and
                  Julie Carson{-}Berndsen and
                  Gareth Jones},
  title        = {High-Quality Automatic Voice Over with Accurate Alignment: Supervision
                  through Self-Supervised Discrete Speech Units},
  booktitle    = {24th Annual Conference of the International Speech Communication Association,
                  Interspeech 2023, Dublin, Ireland, August 20-24, 2023},
  pages        = {5536--5540},
  publisher    = {{ISCA}},
  year         = {2023},
  url          = {https://doi.org/10.21437/Interspeech.2023-2179},
  doi          = {10.21437/INTERSPEECH.2023-2179},
  timestamp    = {Sun, 04 Aug 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/LuS0023.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2305-07216,
  author       = {Lucas Goncalves and
                  Seong{-}Gyun Leem and
                  Wei{-}Cheng Lin and
                  Berrak Sisman and
                  Carlos Busso},
  title        = {Versatile Audio-Visual Learning for Handling Single and Multi Modalities
                  in Emotion Regression and Classification Tasks},
  journal      = {CoRR},
  volume       = {abs/2305.07216},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2305.07216},
  doi          = {10.48550/ARXIV.2305.07216},
  eprinttype   = {arXiv},
  eprint       = {2305.07216},
  timestamp    = {Sat, 30 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2305-07216.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2305-14402,
  author       = {Thejan Rajapakshe and
                  Rajib Rana and
                  Sara Khalifa and
                  Berrak Sisman and
                  Bj{\"{o}}rn W. Schuller},
  title        = {Improving Speech Emotion Recognition Performance using Differentiable
                  Architecture Search},
  journal      = {CoRR},
  volume       = {abs/2305.14402},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2305.14402},
  doi          = {10.48550/ARXIV.2305.14402},
  eprinttype   = {arXiv},
  eprint       = {2305.14402},
  timestamp    = {Tue, 24 Mar 2026 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2305-14402.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2306-00794,
  author       = {Mirazul Haque and
                  Rutvij Shah and
                  Simin Chen and
                  Berrak Sisman and
                  Cong Liu and
                  Wei Yang},
  title        = {SlothSpeech: Denial-of-service Attack Against Speech Recognition Models},
  journal      = {CoRR},
  volume       = {abs/2306.00794},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2306.00794},
  doi          = {10.48550/ARXIV.2306.00794},
  eprinttype   = {arXiv},
  eprint       = {2306.00794},
  timestamp    = {Fri, 25 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2306-00794.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2306-17005,
  author       = {Junchen Lu and
                  Berrak Sisman and
                  Mingyang Zhang and
                  Haizhou Li},
  title        = {High-Quality Automatic Voice Over with Accurate Alignment: Supervision
                  through Self-Supervised Discrete Speech Units},
  journal      = {CoRR},
  volume       = {abs/2306.17005},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2306.17005},
  doi          = {10.48550/ARXIV.2306.17005},
  eprinttype   = {arXiv},
  eprint       = {2306.17005},
  timestamp    = {Mon, 03 Jul 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2306-17005.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/speech/ZhouSLL22,
  author       = {Kun Zhou and
                  Berrak Sisman and
                  Rui Liu and
                  Haizhou Li},
  title        = {Emotional voice conversion: Theory, databases and {ESD}},
  journal      = {Speech Commun.},
  volume       = {137},
  pages        = {1--18},
  year         = {2022},
  url          = {https://doi.org/10.1016/j.specom.2021.11.006},
  doi          = {10.1016/J.SPECOM.2021.11.006},
  timestamp    = {Sat, 06 Jan 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/speech/ZhouSLL22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/taslp/LiuSGL22,
  author       = {Rui Liu and
                  Berrak Sisman and
                  Guanglai Gao and
                  Haizhou Li},
  title        = {Decoding Knowledge Transfer for Neural Text-to-Speech Training},
  journal      = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
  volume       = {30},
  pages        = {1789--1802},
  year         = {2022},
  url          = {https://doi.org/10.1109/TASLP.2022.3171974},
  doi          = {10.1109/TASLP.2022.3171974},
  timestamp    = {Thu, 27 Jul 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/taslp/LiuSGL22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icassp/LuSLZL22,
  author       = {Junchen Lu and
                  Berrak Sisman and
                  Rui Liu and
                  Mingyang Zhang and
                  Haizhou Li},
  title        = {Visualtts: {TTS} with Accurate Lip-Speech Synchronization for Automatic
                  Voice Over},
  booktitle    = {{IEEE} International Conference on Acoustics, Speech and Signal Processing,
                  {ICASSP} 2022, Virtual and Singapore, 23-27 May 2022},
  pages        = {8032--8036},
  publisher    = {{IEEE}},
  year         = {2022},
  url          = {https://doi.org/10.1109/ICASSP43922.2022.9746421},
  doi          = {10.1109/ICASSP43922.2022.9746421},
  timestamp    = {Sat, 06 Sep 2025 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/LuSLZL22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/LamZCS22,
  author       = {Perry Lam and
                  Huayun Zhang and
                  Nancy F. Chen and
                  Berrak Sisman},
  editor       = {Hanseok Ko and
                  John H. L. Hansen},
  title        = {{EPIC} {TTS} Models: Empirical Pruning Investigations Characterizing
                  Text-To-Speech Models},
  booktitle    = {23rd Annual Conference of the International Speech Communication Association,
                  Interspeech 2022, Incheon, Korea, September 18-22, 2022},
  pages        = {823--827},
  publisher    = {{ISCA}},
  year         = {2022},
  url          = {https://doi.org/10.21437/Interspeech.2022-10626},
  doi          = {10.21437/INTERSPEECH.2022-10626},
  timestamp    = {Mon, 03 Mar 2025 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/interspeech/LamZCS22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/DuSZ022,
  author       = {Zongyang Du and
                  Berrak Sisman and
                  Kun Zhou and
                  Haizhou Li},
  editor       = {Hanseok Ko and
                  John H. L. Hansen},
  title        = {Disentanglement of Emotional Style and Speaker Identity for Expressive
                  Voice Conversion},
  booktitle    = {23rd Annual Conference of the International Speech Communication Association,
                  Interspeech 2022, Incheon, Korea, September 18-22, 2022},
  pages        = {2603--2607},
  publisher    = {{ISCA}},
  year         = {2022},
  url          = {https://doi.org/10.21437/Interspeech.2022-10249},
  doi          = {10.21437/INTERSPEECH.2022-10249},
  timestamp    = {Sat, 06 Jan 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/interspeech/DuSZ022.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/0008SSG022,
  author       = {Rui Liu and
                  Berrak Sisman and
                  Bj{\"{o}}rn W. Schuller and
                  Guanglai Gao and
                  Haizhou Li},
  editor       = {Hanseok Ko and
                  John H. L. Hansen},
  title        = {Accurate Emotion Strength Assessment for Seen and Unseen Speech Based
                  on Data-Driven Deep Learning},
  booktitle    = {23rd Annual Conference of the International Speech Communication Association,
                  Interspeech 2022, Incheon, Korea, September 18-22, 2022},
  pages        = {5493--5497},
  publisher    = {{ISCA}},
  year         = {2022},
  url          = {https://doi.org/10.21437/Interspeech.2022-534},
  doi          = {10.21437/INTERSPEECH.2022-534},
  timestamp    = {Sat, 06 Sep 2025 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/0008SSG022.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/slt/MelechovskyMHS22,
  author       = {Jan Melechovsk{\'{y}} and
                  Ambuj Mehrish and
                  Dorien Herremans and
                  Berrak Sisman},
  title        = {Learning Accent Representation with Multi-Level {VAE} Towards Controllable
                  Speech Synthesis},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2022, Doha, Qatar,
                  January 9-12, 2023},
  pages        = {928--935},
  publisher    = {{IEEE}},
  year         = {2022},
  url          = {https://doi.org/10.1109/SLT54892.2023.10023072},
  doi          = {10.1109/SLT54892.2023.10023072},
  timestamp    = {Sat, 15 Nov 2025 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/slt/MelechovskyMHS22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2201-03967,
  author       = {Kun Zhou and
                  Berrak Sisman and
                  Rajib Rana and
                  Bj{\"{o}}rn W. Schuller and
                  Haizhou Li},
  title        = {Emotion Intensity and its Control for Emotional Voice Conversion},
  journal      = {CoRR},
  volume       = {abs/2201.03967},
  year         = {2022},
  url          = {https://arxiv.org/abs/2201.03967},
  eprinttype   = {arXiv},
  eprint       = {2201.03967},
  timestamp    = {Sat, 06 Jan 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2201-03967.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2206-07229,
  author       = {Rui Liu and
                  Berrak Sisman and
                  Bj{\"{o}}rn W. Schuller and
                  Guanglai Gao and
                  Haizhou Li},
  title        = {Accurate Emotion Strength Assessment for Seen and Unseen Speech Based
                  on Data-Driven Deep Learning},
  journal      = {CoRR},
  volume       = {abs/2206.07229},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2206.07229},
  doi          = {10.48550/ARXIV.2206.07229},
  eprinttype   = {arXiv},
  eprint       = {2206.07229},
  timestamp    = {Sat, 06 Sep 2025 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2206-07229.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2208-05890,
  author       = {Kun Zhou and
                  Berrak Sisman and
                  Rajib Rana and
                  Bj{\"{o}}rn W. Schuller and
                  Haizhou Li},
  title        = {Speech Synthesis with Mixed Emotions},
  journal      = {CoRR},
  volume       = {abs/2208.05890},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2208.05890},
  doi          = {10.48550/ARXIV.2208.05890},
  eprinttype   = {arXiv},
  eprint       = {2208.05890},
  timestamp    = {Sat, 06 Jan 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2208-05890.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2209-10804,
  author       = {Rui Liu and
                  Berrak Sisman and
                  Guanglai Gao and
                  Haizhou Li},
  title        = {Controllable Accented Text-to-Speech Synthesis},
  journal      = {CoRR},
  volume       = {abs/2209.10804},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2209.10804},
  doi          = {10.48550/ARXIV.2209.10804},
  eprinttype   = {arXiv},
  eprint       = {2209.10804},
  timestamp    = {Sat, 06 Sep 2025 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2209-10804.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2209-10890,
  author       = {Perry Lam and
                  Huayun Zhang and
                  Nancy F. Chen and
                  Berrak Sisman},
  title        = {{EPIC} {TTS} Models: Empirical Pruning Investigations Characterizing
                  Text-To-Speech Models},
  journal      = {CoRR},
  volume       = {abs/2209.10890},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2209.10890},
  doi          = {10.48550/ARXIV.2209.10890},
  eprinttype   = {arXiv},
  eprint       = {2209.10890},
  timestamp    = {Tue, 07 May 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2209-10890.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2210-13756,
  author       = {Kun Zhou and
                  Berrak Sisman and
                  Carlos Busso and
                  Haizhou Li},
  title        = {Mixed Emotion Modelling for Emotional Voice Conversion},
  journal      = {CoRR},
  volume       = {abs/2210.13756},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2210.13756},
  doi          = {10.48550/ARXIV.2210.13756},
  eprinttype   = {arXiv},
  eprint       = {2210.13756},
  timestamp    = {Sat, 06 Jan 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2210-13756.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2211-03316,
  author       = {Jan Melechovsk{\'{y}} and
                  Ambuj Mehrish and
                  Berrak Sisman and
                  Dorien Herremans},
  title        = {Accented Text-to-Speech Synthesis with a Conditional Variational Autoencoder},
  journal      = {CoRR},
  volume       = {abs/2211.03316},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2211.03316},
  doi          = {10.48550/ARXIV.2211.03316},
  eprinttype   = {arXiv},
  eprint       = {2211.03316},
  timestamp    = {Thu, 10 Nov 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2211-03316.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2211-07283,
  author       = {Perry Lam and
                  Huayun Zhang and
                  Nancy F. Chen and
                  Berrak Sisman and
                  Dorien Herremans},
  title        = {{SNIPER} Training: Variable Sparsity Rate Training For Text-To-Speech},
  journal      = {CoRR},
  volume       = {abs/2211.07283},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2211.07283},
  doi          = {10.48550/ARXIV.2211.07283},
  eprinttype   = {arXiv},
  eprint       = {2211.07283},
  timestamp    = {Tue, 07 May 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2211-07283.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/nn/LiuSLL21,
  author       = {Rui Liu and
                  Berrak Sisman and
                  Yixing Lin and
                  Haizhou Li},
  title        = {FastTalker: {A} neural text-to-speech architecture with shallow and
                  group autoregression},
  journal      = {Neural Networks},
  volume       = {141},
  pages        = {306--314},
  year         = {2021},
  url          = {https://doi.org/10.1016/j.neunet.2021.04.016},
  doi          = {10.1016/J.NEUNET.2021.04.016},
  timestamp    = {Thu, 16 Sep 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/nn/LiuSLL21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/taslp/SismanYKL21,
  author       = {Berrak Sisman and
                  Junichi Yamagishi and
                  Simon King and
                  Haizhou Li},
  title        = {An Overview of Voice Conversion and Its Challenges: From Statistical
                  Modeling to Deep Learning},
  journal      = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
  volume       = {29},
  pages        = {132--157},
  year         = {2021},
  url          = {https://doi.org/10.1109/TASLP.2020.3038524},
  doi          = {10.1109/TASLP.2020.3038524},
  timestamp    = {Mon, 29 Jan 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/taslp/SismanYKL21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/taslp/LiuSBYGL21,
  author       = {Rui Liu and
                  Berrak Sisman and
                  Feilong Bao and
                  Jichen Yang and
                  Guanglai Gao and
                  Haizhou Li},
  title        = {Exploiting Morphological and Phonological Features to Improve Prosodic
                  Phrasing for Mongolian Speech Synthesis},
  journal      = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
  volume       = {29},
  pages        = {274--285},
  year         = {2021},
  url          = {https://doi.org/10.1109/TASLP.2020.3040523},
  doi          = {10.1109/TASLP.2020.3040523},
  timestamp    = {Thu, 27 Jul 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/taslp/LiuSBYGL21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/taslp/LiuSGL21,
  author       = {Rui Liu and
                  Berrak Sisman and
                  Guanglai Gao and
                  Haizhou Li},
  title        = {Expressive {TTS} Training With Frame and Style Reconstruction Loss},
  journal      = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
  volume       = {29},
  pages        = {1806--1818},
  year         = {2021},
  url          = {https://doi.org/10.1109/TASLP.2021.3076369},
  doi          = {10.1109/TASLP.2021.3076369},
  timestamp    = {Thu, 27 Jul 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/taslp/LiuSGL21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/asru/DuSZL21,
  author       = {Zongyang Du and
                  Berrak Sisman and
                  Kun Zhou and
                  Haizhou Li},
  title        = {Expressive Voice Conversion: {A} Joint Framework for Speaker Identity
                  and Emotional Style Transfer},
  booktitle    = {{IEEE} Automatic Speech Recognition and Understanding Workshop, {ASRU}
                  2021, Cartagena, Colombia, December 13-17, 2021},
  pages        = {594--601},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/ASRU51503.2021.9687906},
  doi          = {10.1109/ASRU51503.2021.9687906},
  timestamp    = {Sat, 06 Jan 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/asru/DuSZL21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/asru/NikonorovSZL21,
  author       = {Sergey Nikonorov and
                  Berrak Sisman and
                  Mingyang Zhang and
                  Haizhou Li},
  title        = {{DEEPA:} {A} Deep Neural Analyzer for Speech and Singing Vocoding},
  booktitle    = {{IEEE} Automatic Speech Recognition and Understanding Workshop, {ASRU}
                  2021, Cartagena, Colombia, December 13-17, 2021},
  pages        = {618--625},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/ASRU51503.2021.9687923},
  doi          = {10.1109/ASRU51503.2021.9687923},
  timestamp    = {Wed, 09 Feb 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/asru/NikonorovSZL21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/blizzard/0003ZZ0LS021,
  author       = {Mingyang Zhang and
                  Xuehao Zhou and
                  Kun Zhou and
                  Rui Liu and
                  Perry Lam and
                  Berrak Sisman and
                  Haizhou Li},
  title        = {{SUTD-NUS} System for Blizzard Challenge 2021},
  booktitle    = {The Blizzard Challenge 2021, virtual, October 23, 2021},
  publisher    = {{ISCA}},
  year         = {2021},
  url          = {https://doi.org/10.21437/Blizzard.2021-12},
  doi          = {10.21437/BLIZZARD.2021-12},
  timestamp    = {Mon, 30 Sep 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/blizzard/0003ZZ0LS021.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icassp/ZhouS0021,
  author       = {Kun Zhou and
                  Berrak Sisman and
                  Rui Liu and
                  Haizhou Li},
  title        = {Seen and Unseen Emotional Style Transfer for Voice Conversion with
                  {A} New Emotional Speech Dataset},
  booktitle    = {{IEEE} International Conference on Acoustics, Speech and Signal Processing,
                  {ICASSP} 2021, Toronto, ON, Canada, June 6-11, 2021},
  pages        = {920--924},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/ICASSP39728.2021.9413391},
  doi          = {10.1109/ICASSP39728.2021.9413391},
  timestamp    = {Sat, 06 Sep 2025 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/ZhouS0021.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icassp/0008S021,
  author       = {Rui Liu and
                  Berrak Sisman and
                  Haizhou Li},
  title        = {Graphspeech: Syntax-Aware Graph Attention Network for Neural Speech
                  Synthesis},
  booktitle    = {{IEEE} International Conference on Acoustics, Speech and Signal Processing,
                  {ICASSP} 2021, Toronto, ON, Canada, June 6-11, 2021},
  pages        = {6059--6063},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/ICASSP39728.2021.9413513},
  doi          = {10.1109/ICASSP39728.2021.9413513},
  timestamp    = {Sat, 06 Sep 2025 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/0008S021.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/ZhouSL21,
  author       = {Kun Zhou and
                  Berrak Sisman and
                  Haizhou Li},
  editor       = {Hynek Hermansky and
                  Honza Cernock{\'{y}} and
                  Luk{\'{a}}s Burget and
                  Lori Lamel and
                  Odette Scharenborg and
                  Petr Motl{\'{\i}}cek},
  title        = {Limited Data Emotional Voice Conversion Leveraging Text-to-Speech:
                  Two-Stage Sequence-to-Sequence Training},
  booktitle    = {22nd Annual Conference of the International Speech Communication Association,
                  Interspeech 2021, Brno, Czechia, August 30 - September 3, 2021},
  pages        = {811--815},
  publisher    = {{ISCA}},
  year         = {2021},
  url          = {https://doi.org/10.21437/Interspeech.2021-781},
  doi          = {10.21437/INTERSPEECH.2021-781},
  timestamp    = {Tue, 11 Jun 2024 16:45:43 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/ZhouSL21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/0008S021,
  author       = {Rui Liu and
                  Berrak Sisman and
                  Haizhou Li},
  editor       = {Hynek Hermansky and
                  Honza Cernock{\'{y}} and
                  Luk{\'{a}}s Burget and
                  Lori Lamel and
                  Odette Scharenborg and
                  Petr Motl{\'{\i}}cek},
  title        = {Reinforcement Learning for Emotional Text-to-Speech Synthesis with
                  Improved Emotion Discriminability},
  booktitle    = {22nd Annual Conference of the International Speech Communication Association,
                  Interspeech 2021, Brno, Czechia, August 30 - September 3, 2021},
  pages        = {4648--4652},
  publisher    = {{ISCA}},
  year         = {2021},
  url          = {https://doi.org/10.21437/Interspeech.2021-1236},
  doi          = {10.21437/INTERSPEECH.2021-1236},
  timestamp    = {Sat, 06 Sep 2025 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/0008S021.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/sigdial/LiLYGSCVDWL21,
  author       = {Haizhou Li and
                  Gina{-}Anne Levow and
                  Zhou Yu and
                  Chitralekha Gupta and
                  Berrak Sisman and
                  Siqi Cai and
                  David Vandyke and
                  Nina Dethlefs and
                  Yan Wu and
                  Junyi Jessy Li},
  editor       = {Haizhou Li and
                  Gina{-}Anne Levow and
                  Zhou Yu and
                  Chitralekha Gupta and
                  Berrak Sisman and
                  Siqi Cai and
                  David Vandyke and
                  Nina Dethlefs and
                  Yan Wu and
                  Junyi Jessy Li},
  title        = {Proceedings of the 22nd Annual Meeting of the Special Interest Group
                  on Discourse and Dialogue},
  booktitle    = {Proceedings of the 22nd Annual Meeting of the Special Interest Group
                  on Discourse and Dialogue, SIGdial 2021, Singapore and Online, July
                  29-31, 2021},
  publisher    = {Association for Computational Linguistics},
  year         = {2021},
  url          = {https://aclanthology.org/2021.sigdial-1.0},
  timestamp    = {Mon, 07 Jul 2025 20:38:25 +0200},
  biburl       = {https://dblp.org/rec/conf/sigdial/LiLYGSCVDWL21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/slt/ZhouS021,
  author       = {Kun Zhou and
                  Berrak Sisman and
                  Haizhou Li},
  title        = {Vaw-Gan For Disentanglement And Recomposition Of Emotional Elements
                  In Speech},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {415--422},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383526},
  doi          = {10.1109/SLT48900.2021.9383526},
  timestamp    = {Sat, 06 Jan 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/slt/ZhouS021.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@proceedings{DBLP:conf/sigdial/2021,
  editor       = {Haizhou Li and
                  Gina{-}Anne Levow and
                  Zhou Yu and
                  Chitralekha Gupta and
                  Berrak Sisman and
                  Siqi Cai and
                  David Vandyke and
                  Nina Dethlefs and
                  Yan Wu and
                  Junyi Jessy Li},
  title        = {Proceedings of the 22nd Annual Meeting of the Special Interest Group
                  on Discourse and Dialogue, SIGdial 2021, Singapore and Online, July
                  29-31, 2021},
  publisher    = {Association for Computational Linguistics},
  year         = {2021},
  url          = {https://aclanthology.org/volumes/2021.sigdial-1/},
  isbn         = {978-1-954085-81-7},
  timestamp    = {Mon, 07 Jul 2025 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/sigdial/2021.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2103-16809,
  author       = {Kun Zhou and
                  Berrak Sisman and
                  Haizhou Li},
  title        = {Limited Data Emotional Voice Conversion Leveraging Text-to-Speech:
                  Two-stage Sequence-to-Sequence Training},
  journal      = {CoRR},
  volume       = {abs/2103.16809},
  year         = {2021},
  url          = {https://arxiv.org/abs/2103.16809},
  eprinttype   = {arXiv},
  eprint       = {2103.16809},
  timestamp    = {Sat, 06 Jan 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2103-16809.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2104-01408,
  author       = {Rui Liu and
                  Berrak Sisman and
                  Haizhou Li},
  title        = {Reinforcement Learning for Emotional Text-to-Speech Synthesis with
                  Improved Emotion Discriminability},
  journal      = {CoRR},
  volume       = {abs/2104.01408},
  year         = {2021},
  url          = {https://arxiv.org/abs/2104.01408},
  eprinttype   = {arXiv},
  eprint       = {2104.01408},
  timestamp    = {Mon, 12 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2104-01408.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2105-14762,
  author       = {Kun Zhou and
                  Berrak Sisman and
                  Rui Liu and
                  Haizhou Li},
  title        = {Emotional Voice Conversion: Theory, Databases and {ESD}},
  journal      = {CoRR},
  volume       = {abs/2105.14762},
  year         = {2021},
  url          = {https://arxiv.org/abs/2105.14762},
  eprinttype   = {arXiv},
  eprint       = {2105.14762},
  timestamp    = {Sat, 06 Jan 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2105-14762.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2107-03748,
  author       = {Zongyang Du and
                  Berrak Sisman and
                  Kun Zhou and
                  Haizhou Li},
  title        = {Expressive Voice Conversion: {A} Joint Framework for Speaker Identity
                  and Emotional Style Transfer},
  journal      = {CoRR},
  volume       = {abs/2107.03748},
  year         = {2021},
  url          = {https://arxiv.org/abs/2107.03748},
  eprinttype   = {arXiv},
  eprint       = {2107.03748},
  timestamp    = {Sat, 06 Jan 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2107-03748.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2110-03156,
  author       = {Rui Liu and
                  Berrak Sisman and
                  Haizhou Li},
  title        = {StrengthNet: Deep Learning-based Emotion Strength Assessment for Emotional
                  Speech Synthesis},
  journal      = {CoRR},
  volume       = {abs/2110.03156},
  year         = {2021},
  url          = {https://arxiv.org/abs/2110.03156},
  eprinttype   = {arXiv},
  eprint       = {2110.03156},
  timestamp    = {Fri, 22 Oct 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2110-03156.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2110-03342,
  author       = {Junchen Lu and
                  Berrak Sisman and
                  Rui Liu and
                  Mingyang Zhang and
                  Haizhou Li},
  title        = {VisualTTS: {TTS} with Accurate Lip-Speech Synchronization for Automatic
                  Voice Over},
  journal      = {CoRR},
  volume       = {abs/2110.03342},
  year         = {2021},
  url          = {https://arxiv.org/abs/2110.03342},
  eprinttype   = {arXiv},
  eprint       = {2110.03342},
  timestamp    = {Mon, 25 Oct 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2110-03342.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2110-06434,
  author       = {Sergey Nikonorov and
                  Berrak Sisman and
                  Mingyang Zhang and
                  Haizhou Li},
  title        = {DeepA: {A} Deep Neural Analyzer For Speech And Singing Vocoding},
  journal      = {CoRR},
  volume       = {abs/2110.06434},
  year         = {2021},
  url          = {https://arxiv.org/abs/2110.06434},
  eprinttype   = {arXiv},
  eprint       = {2110.06434},
  timestamp    = {Mon, 25 Oct 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2110-06434.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2110-10326,
  author       = {Zongyang Du and
                  Berrak Sisman and
                  Kun Zhou and
                  Haizhou Li},
  title        = {Identity Conversion for Emotional Speakers: {A} Study for Disentanglement
                  of Emotion Style and Speaker Identity},
  journal      = {CoRR},
  volume       = {abs/2110.10326},
  year         = {2021},
  url          = {https://arxiv.org/abs/2110.10326},
  eprinttype   = {arXiv},
  eprint       = {2110.10326},
  timestamp    = {Sat, 06 Jan 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2110-10326.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/speech/ZhangSZL20,
  author       = {Mingyang Zhang and
                  Berrak Sisman and
                  Li Zhao and
                  Haizhou Li},
  title        = {DeepConversion: Voice conversion with limited parallel training data},
  journal      = {Speech Commun.},
  volume       = {122},
  pages        = {31--43},
  year         = {2020},
  url          = {https://doi.org/10.1016/j.specom.2020.05.004},
  doi          = {10.1016/J.SPECOM.2020.05.004},
  timestamp    = {Tue, 07 Sep 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/speech/ZhangSZL20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/spl/LiuSBGL20,
  author       = {Rui Liu and
                  Berrak Sisman and
                  Feilong Bao and
                  Guanglai Gao and
                  Haizhou Li},
  title        = {Modeling Prosodic Phrasing With Multi-Task Learning in Tacotron-Based
                  {TTS}},
  journal      = {{IEEE} Signal Process. Lett.},
  volume       = {27},
  pages        = {1470--1474},
  year         = {2020},
  url          = {https://doi.org/10.1109/LSP.2020.3016564},
  doi          = {10.1109/LSP.2020.3016564},
  timestamp    = {Thu, 27 Jul 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/spl/LiuSBGL20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/apsipa/DuZS020,
  author       = {Zongyang Du and
                  Kun Zhou and
                  Berrak Sisman and
                  Haizhou Li},
  title        = {Spectrum and Prosody Conversion for Cross-lingual Voice Conversion
                  with CycleGAN},
  booktitle    = {Asia-Pacific Signal and Information Processing Association Annual
                  Summit and Conference, {APSIPA} 2020, Auckland, New Zealand, December
                  7-10, 2020},
  pages        = {507--513},
  publisher    = {{IEEE}},
  year         = {2020},
  url          = {https://ieeexplore.ieee.org/document/9306487},
  timestamp    = {Sat, 06 Jan 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/apsipa/DuZS020.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/apsipa/LuZS020,
  author       = {Junchen Lu and
                  Kun Zhou and
                  Berrak Sisman and
                  Haizhou Li},
  title        = {{VAW-GAN} for Singing Voice Conversion with Non-parallel Training
                  Data},
  booktitle    = {Asia-Pacific Signal and Information Processing Association Annual
                  Summit and Conference, {APSIPA} 2020, Auckland, New Zealand, December
                  7-10, 2020},
  pages        = {514--519},
  publisher    = {{IEEE}},
  year         = {2020},
  url          = {https://ieeexplore.ieee.org/document/9306474},
  timestamp    = {Sat, 06 Jan 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/apsipa/LuZS020.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/blizzard/0020TZ0LLS020,
  author       = {Yi Zhou and
                  Xiaohai Tian and
                  Xuehao Zhou and
                  Mingyang Zhang and
                  Grandee Lee and
                  Riu Liu and
                  Berrak Sisman and
                  Haizhou Li},
  editor       = {Junichi Yamagishi and
                  Zhenhua Ling and
                  Rohan Kumar Das and
                  Simon King and
                  Tomi Kinnunen and
                  Tomoki Toda and
                  Wen{-}Chin Huang and
                  Xiao Zhou and
                  Xiaohai Tian and
                  Yi Zhao},
  title        = {{NUS-HLT} System for Blizzard Challenge 2020},
  booktitle    = {Joint Workshop for the Blizzard Challenge and Voice Conversion Challenge
                  2020, Shanghai, China, October 30, 2020},
  publisher    = {{ISCA}},
  year         = {2020},
  url          = {https://doi.org/10.21437/VCCBC.2020-7},
  doi          = {10.21437/VCCBC.2020-7},
  timestamp    = {Mon, 16 Mar 2026 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/blizzard/0020TZ0LLS020.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/blizzard/Tian0YZD00ZS0020,
  author       = {Xiaohai Tian and
                  Zhichao Wang and
                  Shan Yang and
                  Xinyong Zhou and
                  Hongqiang Du and
                  Yi Zhou and
                  Mingyang Zhang and
                  Kun Zhou and
                  Berrak Sisman and
                  Lei Xie and
                  Haizhou Li},
  editor       = {Junichi Yamagishi and
                  Zhenhua Ling and
                  Rohan Kumar Das and
                  Simon King and
                  Tomi Kinnunen and
                  Tomoki Toda and
                  Wen{-}Chin Huang and
                  Xiao Zhou and
                  Xiaohai Tian and
                  Yi Zhao},
  title        = {The {NUS} {\&} {NWPU} system for Voice Conversion Challenge 2020},
  booktitle    = {Joint Workshop for the Blizzard Challenge and Voice Conversion Challenge
                  2020, Shanghai, China, October 30, 2020},
  publisher    = {{ISCA}},
  year         = {2020},
  url          = {https://doi.org/10.21437/VCCBC.2020-26},
  doi          = {10.21437/VCCBC.2020-26},
  timestamp    = {Mon, 26 Jan 2026 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/blizzard/Tian0YZD00ZS0020.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icassp/0008SLBG020,
  author       = {Rui Liu and
                  Berrak Sisman and
                  Jingdong Li and
                  Feilong Bao and
                  Guanglai Gao and
                  Haizhou Li},
  title        = {Teacher-Student Training For Robust Tacotron-Based {TTS}},
  booktitle    = {2020 {IEEE} International Conference on Acoustics, Speech and Signal
                  Processing, {ICASSP} 2020, Barcelona, Spain, May 4-8, 2020},
  pages        = {6274--6278},
  publisher    = {{IEEE}},
  year         = {2020},
  url          = {https://doi.org/10.1109/ICASSP40776.2020.9054681},
  doi          = {10.1109/ICASSP40776.2020.9054681},
  timestamp    = {Sat, 09 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/0008SLBG020.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/ZhouS0020,
  author       = {Kun Zhou and
                  Berrak Sisman and
                  Mingyang Zhang and
                  Haizhou Li},
  editor       = {Helen Meng and
                  Bo Xu and
                  Thomas Fang Zheng},
  title        = {Converting Anyone's Emotion: Towards Speaker-Independent Emotional
                  Voice Conversion},
  booktitle    = {21st Annual Conference of the International Speech Communication Association,
                  Interspeech 2020, Virtual Event, Shanghai, China, October 25-29, 2020},
  pages        = {3416--3420},
  publisher    = {{ISCA}},
  year         = {2020},
  url          = {https://doi.org/10.21437/Interspeech.2020-2014},
  doi          = {10.21437/INTERSPEECH.2020-2014},
  timestamp    = {Sun, 19 Jan 2025 13:13:53 +0100},
  biburl       = {https://dblp.org/rec/conf/interspeech/ZhouS0020.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/odyssey/ZhouS020,
  author       = {Kun Zhou and
                  Berrak Sisman and
                  Haizhou Li},
  editor       = {Kong{-}Aik Lee and
                  Takafumi Koshinaka and
                  Koichi Shinoda},
  title        = {Transforming Spectrum and Prosody for Emotional Voice Conversion with
                  Non-Parallel Training Data},
  booktitle    = {Odyssey 2020: The Speaker and Language Recognition Workshop, 1-5 November
                  2020, Tokyo, Japan},
  pages        = {230--237},
  publisher    = {{ISCA}},
  year         = {2020},
  url          = {https://doi.org/10.21437/Odyssey.2020-33},
  doi          = {10.21437/ODYSSEY.2020-33},
  timestamp    = {Tue, 30 Jul 2024 09:41:52 +0200},
  biburl       = {https://dblp.org/rec/conf/odyssey/ZhouS020.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/odyssey/Sisman020,
  author       = {Berrak Sisman and
                  Haizhou Li},
  editor       = {Kong{-}Aik Lee and
                  Takafumi Koshinaka and
                  Koichi Shinoda},
  title        = {Generative Adversarial Networks for Singing Voice Conversion with
                  and without Parallel Data},
  booktitle    = {Odyssey 2020: The Speaker and Language Recognition Workshop, 1-5 November
                  2020, Tokyo, Japan},
  pages        = {238--244},
  publisher    = {{ISCA}},
  year         = {2020},
  url          = {https://doi.org/10.21437/Odyssey.2020-34},
  doi          = {10.21437/ODYSSEY.2020-34},
  timestamp    = {Tue, 30 Jul 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/odyssey/Sisman020.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/odyssey/0008SBG020,
  author       = {Rui Liu and
                  Berrak Sisman and
                  Feilong Bao and
                  Guanglai Gao and
                  Haizhou Li},
  editor       = {Kong{-}Aik Lee and
                  Takafumi Koshinaka and
                  Koichi Shinoda},
  title        = {WaveTTS: Tacotron-based {TTS} with Joint Time-Frequency Domain Loss},
  booktitle    = {Odyssey 2020: The Speaker and Language Recognition Workshop, 1-5 November
                  2020, Tokyo, Japan},
  pages        = {245--251},
  publisher    = {{ISCA}},
  year         = {2020},
  url          = {https://doi.org/10.21437/Odyssey.2020-35},
  doi          = {10.21437/ODYSSEY.2020-35},
  timestamp    = {Tue, 30 Jul 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/odyssey/0008SBG020.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2002-00198,
  author       = {Kun Zhou and
                  Berrak Sisman and
                  Haizhou Li},
  title        = {Transforming Spectrum and Prosody for Emotional Voice Conversion with
                  Non-Parallel Training Data},
  journal      = {CoRR},
  volume       = {abs/2002.00198},
  year         = {2020},
  url          = {https://arxiv.org/abs/2002.00198},
  eprinttype   = {arXiv},
  eprint       = {2002.00198},
  timestamp    = {Sat, 06 Jan 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2002-00198.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2002-00417,
  author       = {Rui Liu and
                  Berrak Sisman and
                  Feilong Bao and
                  Guanglai Gao and
                  Haizhou Li},
  title        = {WaveTTS: Tacotron-based {TTS} with Joint Time-Frequency Domain Loss},
  journal      = {CoRR},
  volume       = {abs/2002.00417},
  year         = {2020},
  url          = {https://arxiv.org/abs/2002.00417},
  eprinttype   = {arXiv},
  eprint       = {2002.00417},
  timestamp    = {Mon, 10 Feb 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2002-00417.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2005-07025,
  author       = {Kun Zhou and
                  Berrak Sisman and
                  Mingyang Zhang and
                  Haizhou Li},
  title        = {Converting Anyone's Emotion: Towards Speaker-Independent Emotional
                  Voice Conversion},
  journal      = {CoRR},
  volume       = {abs/2005.07025},
  year         = {2020},
  url          = {https://arxiv.org/abs/2005.07025},
  eprinttype   = {arXiv},
  eprint       = {2005.07025},
  timestamp    = {Sat, 06 Jan 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2005-07025.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2008-01490,
  author       = {Rui Liu and
                  Berrak Sisman and
                  Guanglai Gao and
                  Haizhou Li},
  title        = {Expressive {TTS} Training with Frame and Style Reconstruction Loss},
  journal      = {CoRR},
  volume       = {abs/2008.01490},
  year         = {2020},
  url          = {https://arxiv.org/abs/2008.01490},
  eprinttype   = {arXiv},
  eprint       = {2008.01490},
  timestamp    = {Fri, 07 Aug 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2008-01490.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2008-03648,
  author       = {Berrak Sisman and
                  Junichi Yamagishi and
                  Simon King and
                  Haizhou Li},
  title        = {An Overview of Voice Conversion and its Challenges: From Statistical
                  Modeling to Deep Learning},
  journal      = {CoRR},
  volume       = {abs/2008.03648},
  year         = {2020},
  url          = {https://arxiv.org/abs/2008.03648},
  eprinttype   = {arXiv},
  eprint       = {2008.03648},
  timestamp    = {Fri, 02 Feb 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2008-03648.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2008-03992,
  author       = {Junchen Lu and
                  Kun Zhou and
                  Berrak Sisman and
                  Haizhou Li},
  title        = {{VAW-GAN} for Singing Voice Conversion with Non-parallel Training
                  Data},
  journal      = {CoRR},
  volume       = {abs/2008.03992},
  year         = {2020},
  url          = {https://arxiv.org/abs/2008.03992},
  eprinttype   = {arXiv},
  eprint       = {2008.03992},
  timestamp    = {Sat, 06 Jan 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2008-03992.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2008-04562,
  author       = {Zongyang Du and
                  Kun Zhou and
                  Berrak Sisman and
                  Haizhou Li},
  title        = {Spectrum and Prosody Conversion for Cross-lingual Voice Conversion
                  with CycleGAN},
  journal      = {CoRR},
  volume       = {abs/2008.04562},
  year         = {2020},
  url          = {https://arxiv.org/abs/2008.04562},
  eprinttype   = {arXiv},
  eprint       = {2008.04562},
  timestamp    = {Sat, 06 Jan 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2008-04562.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2008-05284,
  author       = {Rui Liu and
                  Berrak Sisman and
                  Feilong Bao and
                  Guanglai Gao and
                  Haizhou Li},
  title        = {Modeling Prosodic Phrasing with Multi-Task Learning in Tacotron-based
                  {TTS}},
  journal      = {CoRR},
  volume       = {abs/2008.05284},
  year         = {2020},
  url          = {https://arxiv.org/abs/2008.05284},
  eprinttype   = {arXiv},
  eprint       = {2008.05284},
  timestamp    = {Mon, 17 Aug 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2008-05284.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2010-12423,
  author       = {Rui Liu and
                  Berrak Sisman and
                  Haizhou Li},
  title        = {GraphSpeech: Syntax-Aware Graph Attention Network For Neural Speech
                  Synthesis},
  journal      = {CoRR},
  volume       = {abs/2010.12423},
  year         = {2020},
  url          = {https://arxiv.org/abs/2010.12423},
  eprinttype   = {arXiv},
  eprint       = {2010.12423},
  timestamp    = {Tue, 27 Oct 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2010-12423.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2010-14794,
  author       = {Kun Zhou and
                  Berrak Sisman and
                  Rui Liu and
                  Haizhou Li},
  title        = {Seen and Unseen emotional style transfer for voice conversion with
                  a new emotional speech dataset},
  journal      = {CoRR},
  volume       = {abs/2010.14794},
  year         = {2020},
  url          = {https://arxiv.org/abs/2010.14794},
  eprinttype   = {arXiv},
  eprint       = {2010.14794},
  timestamp    = {Sat, 06 Jan 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2010-14794.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2011-02314,
  author       = {Kun Zhou and
                  Berrak Sisman and
                  Haizhou Li},
  title        = {{VAW-GAN} for Disentanglement and Recomposition of Emotional Elements
                  in Speech},
  journal      = {CoRR},
  volume       = {abs/2011.02314},
  year         = {2020},
  url          = {https://arxiv.org/abs/2011.02314},
  eprinttype   = {arXiv},
  eprint       = {2011.02314},
  timestamp    = {Sat, 06 Jan 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2011-02314.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/taslp/SismanZL19,
  author       = {Berrak Sisman and
                  Mingyang Zhang and
                  Haizhou Li},
  title        = {Group Sparse Representation With WaveNet Vocoder Adaptation for Spectrum
                  and Prosody Conversion},
  journal      = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
  volume       = {27},
  number       = {6},
  pages        = {1085--1097},
  year         = {2019},
  url          = {https://doi.org/10.1109/TASLP.2019.2910637},
  doi          = {10.1109/TASLP.2019.2910637},
  timestamp    = {Sun, 19 Jan 2025 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/taslp/SismanZL19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/apsipa/SismanVD019,
  author       = {Berrak Sisman and
                  Karthika Vijayan and
                  Minghui Dong and
                  Haizhou Li},
  title        = {{SINGAN:} Singing Voice Conversion with Generative Adversarial Networks},
  booktitle    = {2019 Asia-Pacific Signal and Information Processing Association Annual
                  Summit and Conference, {APSIPA} {ASC} 2019, Lanzhou, China, November
                  18-21, 2019},
  pages        = {112--118},
  publisher    = {{IEEE}},
  year         = {2019},
  url          = {https://doi.org/10.1109/APSIPAASC47483.2019.9023162},
  doi          = {10.1109/APSIPAASC47483.2019.9023162},
  timestamp    = {Fri, 13 Mar 2020 10:17:58 +0100},
  biburl       = {https://dblp.org/rec/conf/apsipa/SismanVD019.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/asru/SismanZDL19,
  author       = {Berrak Sisman and
                  Mingyang Zhang and
                  Minghui Dong and
                  Haizhou Li},
  title        = {On the Study of Generative Adversarial Networks for Cross-Lingual
                  Voice Conversion},
  booktitle    = {{IEEE} Automatic Speech Recognition and Understanding Workshop, {ASRU}
                  2019, Singapore, December 14-18, 2019},
  pages        = {144--151},
  publisher    = {{IEEE}},
  year         = {2019},
  url          = {https://doi.org/10.1109/ASRU46091.2019.9003939},
  doi          = {10.1109/ASRU46091.2019.9003939},
  timestamp    = {Sat, 09 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/asru/SismanZDL19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/TjandraS0S0019,
  author       = {Andros Tjandra and
                  Berrak Sisman and
                  Mingyang Zhang and
                  Sakriani Sakti and
                  Haizhou Li and
                  Satoshi Nakamura},
  editor       = {Gernot Kubin and
                  Zdravko Kacic},
  title        = {{VQVAE} Unsupervised Unit Discovery and Multi-Scale Code2Spec Inverter
                  for Zerospeech Challenge 2019},
  booktitle    = {20th Annual Conference of the International Speech Communication Association,
                  Interspeech 2019, Graz, Austria, September 15-19, 2019},
  pages        = {1118--1122},
  publisher    = {{ISCA}},
  year         = {2019},
  url          = {https://doi.org/10.21437/Interspeech.2019-3232},
  doi          = {10.21437/INTERSPEECH.2019-3232},
  timestamp    = {Tue, 11 Jun 2024 16:45:43 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/TjandraS0S0019.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-1905-11449,
  author       = {Andros Tjandra and
                  Berrak Sisman and
                  Mingyang Zhang and
                  Sakriani Sakti and
                  Haizhou Li and
                  Satoshi Nakamura},
  title        = {{VQVAE} Unsupervised Unit Discovery and Multi-scale Code2Spec Inverter
                  for Zerospeech Challenge 2019},
  journal      = {CoRR},
  volume       = {abs/1905.11449},
  year         = {2019},
  url          = {http://arxiv.org/abs/1905.11449},
  eprinttype   = {arXiv},
  eprint       = {1905.11449},
  timestamp    = {Thu, 06 Feb 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1905-11449.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-1911-02839,
  author       = {Rui Liu and
                  Berrak Sisman and
                  Jingdong Li and
                  Feilong Bao and
                  Guanglai Gao and
                  Haizhou Li},
  title        = {Teacher-Student Training for Robust Tacotron-based {TTS}},
  journal      = {CoRR},
  volume       = {abs/1911.02839},
  year         = {2019},
  url          = {http://arxiv.org/abs/1911.02839},
  eprinttype   = {arXiv},
  eprint       = {1911.02839},
  timestamp    = {Mon, 11 Nov 2019 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1911-02839.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/apsipa/ZhangSR0Z18,
  author       = {Mingyang Zhang and
                  Berrak Sisman and
                  Sai Sirisha Rallabandi and
                  Haizhou Li and
                  Li Zhao},
  title        = {Error Reduction Network for DBLSTM-based Voice Conversion},
  booktitle    = {Asia-Pacific Signal and Information Processing Association Annual
                  Summit and Conference, {APSIPA} {ASC} 2018, Honolulu, HI, USA, November
                  12-15, 2018},
  pages        = {823--828},
  publisher    = {{IEEE}},
  year         = {2018},
  url          = {https://doi.org/10.23919/APSIPA.2018.8659543},
  doi          = {10.23919/APSIPA.2018.8659543},
  timestamp    = {Tue, 07 Sep 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/apsipa/ZhangSR0Z18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/blizzard/XiaoY0SH0D018,
  author       = {Jinba Xiao and
                  Shan Yang and
                  Mingyang Zhang and
                  Berrak Sisman and
                  Dongyan Huang and
                  Lei Xie and
                  Minghui Dong and
                  Haizhou Li},
  title        = {The {I2R-NWPU-NUS} Text-to-Speech System for Blizzard Challenge 2018},
  booktitle    = {The Blizzard Challenge 2018, Hyderabad, India, September 8, 2018},
  publisher    = {{ISCA}},
  year         = {2018},
  url          = {https://doi.org/10.21437/Blizzard.2018-4},
  doi          = {10.21437/BLIZZARD.2018-4},
  timestamp    = {Mon, 26 Jan 2026 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/blizzard/XiaoY0SH0D018.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/SismanL18,
  author       = {Berrak Sisman and
                  Haizhou Li},
  editor       = {B. Yegnanarayana},
  title        = {Wavelet Analysis of Speaker Dependent and Independent Prosody for
                  Voice Conversion},
  booktitle    = {19th Annual Conference of the International Speech Communication Association,
                  Interspeech 2018, Hyderabad, India, September 2-6, 2018},
  pages        = {52--56},
  publisher    = {{ISCA}},
  year         = {2018},
  url          = {https://doi.org/10.21437/Interspeech.2018-1499},
  doi          = {10.21437/INTERSPEECH.2018-1499},
  timestamp    = {Tue, 11 Jun 2024 16:45:43 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/SismanL18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/SismanZL18,
  author       = {Berrak Sisman and
                  Mingyang Zhang and
                  Haizhou Li},
  editor       = {B. Yegnanarayana},
  title        = {A Voice Conversion Framework with Tandem Feature Sparse Representation
                  and Speaker-Adapted WaveNet Vocoder},
  booktitle    = {19th Annual Conference of the International Speech Communication Association,
                  Interspeech 2018, Hyderabad, India, September 2-6, 2018},
  pages        = {1978--1982},
  publisher    = {{ISCA}},
  year         = {2018},
  url          = {https://doi.org/10.21437/Interspeech.2018-1131},
  doi          = {10.21437/INTERSPEECH.2018-1131},
  timestamp    = {Fri, 29 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/interspeech/SismanZL18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/odyssey/SismanL018,
  author       = {Berrak Sisman and
                  Grandee Lee and
                  Haizhou Li},
  editor       = {Anthony Larcher and
                  Jean{-}Fran{\c{c}}ois Bonastre},
  title        = {Phonetically Aware Exemplar-Based Prosody Transformation},
  booktitle    = {Odyssey 2018: The Speaker and Language Recognition Workshop, 26-29
                  June 2018, Les Sables d'Olonne, France},
  pages        = {267--274},
  publisher    = {{ISCA}},
  year         = {2018},
  url          = {https://doi.org/10.21437/Odyssey.2018-38},
  doi          = {10.21437/ODYSSEY.2018-38},
  timestamp    = {Tue, 30 Jul 2024 09:37:28 +0200},
  biburl       = {https://dblp.org/rec/conf/odyssey/SismanL018.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/slt/SismanZS0018,
  author       = {Berrak Sisman and
                  Mingyang Zhang and
                  Sakriani Sakti and
                  Haizhou Li and
                  Satoshi Nakamura},
  title        = {Adaptive Wavenet Vocoder for Residual Compensation in GAN-Based Voice
                  Conversion},
  booktitle    = {2018 {IEEE} Spoken Language Technology Workshop, {SLT} 2018, Athens,
                  Greece, December 18-21, 2018},
  pages        = {282--289},
  publisher    = {{IEEE}},
  year         = {2018},
  url          = {https://doi.org/10.1109/SLT.2018.8639507},
  doi          = {10.1109/SLT.2018.8639507},
  timestamp    = {Fri, 27 Mar 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/slt/SismanZS0018.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/apsipa/SismanLT17,
  author       = {Berrak Sisman and
                  Haizhou Li and
                  Kay Chen Tan},
  title        = {Transformation of prosody in voice conversion},
  booktitle    = {2017 Asia-Pacific Signal and Information Processing Association Annual
                  Summit and Conference, {APSIPA} {ASC} 2017, Kuala Lumpur, Malaysia,
                  December 12-15, 2017},
  pages        = {1537--1546},
  publisher    = {{IEEE}},
  year         = {2017},
  url          = {https://doi.org/10.1109/APSIPA.2017.8282288},
  doi          = {10.1109/APSIPA.2017.8282288},
  timestamp    = {Tue, 07 May 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/apsipa/SismanLT17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/asru/CicmanLT17,
  author       = {Berrak Sisman and
                  Haizhou Li and
                  Kay Chen Tan},
  title        = {Sparse representation of phonetic features for voice conversion with
                  and without parallel data},
  booktitle    = {2017 {IEEE} Automatic Speech Recognition and Understanding Workshop,
                  {ASRU} 2017, Okinawa, Japan, December 16-20, 2017},
  pages        = {677--684},
  publisher    = {{IEEE}},
  year         = {2017},
  url          = {https://doi.org/10.1109/ASRU.2017.8269002},
  doi          = {10.1109/ASRU.2017.8269002},
  timestamp    = {Tue, 07 May 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/asru/CicmanLT17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/ialp/SismanLLT17,
  author       = {Berrak Sisman and
                  Grandee Lee and
                  Haizhou Li and
                  Kay Chen Tan},
  editor       = {Rong Tong and
                  Yue Zhang and
                  Yanfeng Lu and
                  Minghui Dong},
  title        = {On the analysis and evaluation of prosody conversion techniques},
  booktitle    = {2017 International Conference on Asian Language Processing, {IALP}
                  2017, Singapore, December 5-7, 2017},
  pages        = {44--47},
  publisher    = {{IEEE}},
  year         = {2017},
  url          = {https://doi.org/10.1109/IALP.2017.8300542},
  doi          = {10.1109/IALP.2017.8300542},
  timestamp    = {Tue, 07 May 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/ialp/SismanLLT17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/wcnc/GurakanSKU16a,
  author       = {Berk Gurakan and
                  Berrak Sisman and
                  Onur Kaya and
                  Sennur Ulukus},
  title        = {Energy and data cooperation in energy harvesting multiple access channel},
  booktitle    = {{IEEE} Wireless Communications and Networking Conference, {WCNC} 2016,
                  Doha, Qatar, April 3-6, 2016},
  pages        = {1--6},
  publisher    = {{IEEE}},
  year         = {2016},
  url          = {https://doi.org/10.1109/WCNC.2016.7564705},
  doi          = {10.1109/WCNC.2016.7564705},
  timestamp    = {Wed, 16 Oct 2019 14:14:50 +0200},
  biburl       = {https://dblp.org/rec/conf/wcnc/GurakanSKU16a.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/wcnc/GurakanSKU16,
  author       = {Berk Gurakan and
                  Berrak Sisman and
                  Onur Kaya and
                  Sennur Ulukus},
  title        = {Energy and data cooperation in energy harvesting multiple access channel},
  booktitle    = {{IEEE} Wireless Communications and Networking Conference Workshops,
                  {WCNC} Workshops 2016, Doha, Qatar, April 3-6, 2016},
  pages        = {410--415},
  publisher    = {{IEEE}},
  year         = {2016},
  url          = {https://doi.org/10.1109/WCNCW.2016.7552734},
  doi          = {10.1109/WCNCW.2016.7552734},
  timestamp    = {Sat, 06 Sep 2025 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/wcnc/GurakanSKU16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

manage site settings

To protect your privacy, all features that rely on external API calls from your browser are turned off by default. You need to opt-in for them to become active. All settings here will be stored as cookies with your web browser. For more information see our F.A.Q.