


default search action
BibTeX records: Berrak Sisman
@article{DBLP:journals/corr/abs-2601-03115,
author = {Xiutian Zhao and
Bj{\"{o}}rn W. Schuller and
Berrak Sisman},
title = {Discovering and Causally Validating Emotion-Sensitive Neurons in Large
Audio-Language Models},
journal = {CoRR},
volume = {abs/2601.03115},
year = {2026},
url = {https://doi.org/10.48550/arXiv.2601.03115},
doi = {10.48550/ARXIV.2601.03115},
eprinttype = {arXiv},
eprint = {2601.03115},
timestamp = {Wed, 11 Feb 2026 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2601-03115.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/spl/LamZCSH25,
author = {Perry Lam and
Huayun Zhang and
Nancy F. Chen and
Berrak Sisman and
Dorien Herremans},
title = {{PRESENT:} Zero-Shot Text-to-Prosody Control},
journal = {{IEEE} Signal Process. Lett.},
volume = {32},
pages = {776--780},
year = {2025},
url = {https://doi.org/10.1109/LSP.2025.3528359},
doi = {10.1109/LSP.2025.3528359},
timestamp = {Tue, 14 Oct 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/spl/LamZCSH25.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/taffco/GoncalvesLLSB25,
author = {Lucas Goncalves and
Seong{-}Gyun Leem and
Wei{-}Cheng Lin and
Berrak Sisman and
Carlos Busso},
title = {Versatile Audio-Visual Learning for Emotion Recognition},
journal = {{IEEE} Trans. Affect. Comput.},
volume = {16},
number = {1},
pages = {306--318},
year = {2025},
url = {https://doi.org/10.1109/TAFFC.2024.3433386},
doi = {10.1109/TAFFC.2024.3433386},
timestamp = {Sun, 15 Jun 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/taffco/GoncalvesLLSB25.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/apsipa/Xian0S025,
author = {Huhong Xian and
Rui Liu and
Berrak Sisman and
Haizhou Li},
title = {{NE-PADD:} Leveraging Named Entity Knowledge for Robust Partial Audio
Deepfake Detection via Attention Aggregation},
booktitle = {Asia Pacific Signal and Information Processing Association Annual
Summit and Conference, {APSIPA} {ASC} 2025, Singapore, October 22-24,
2025},
pages = {2199--2204},
publisher = {{IEEE}},
year = {2025},
url = {https://doi.org/10.1109/APSIPAASC65261.2025.11249178},
doi = {10.1109/APSIPAASC65261.2025.11249178},
timestamp = {Mon, 16 Mar 2026 12:13:26 +0100},
biburl = {https://dblp.org/rec/conf/apsipa/Xian0S025.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/emnlp/JiaLSL25,
author = {Zhenqi Jia and
Rui Liu and
Berrak Sisman and
Haizhou Li},
editor = {Christos Christodoulopoulos and
Tanmoy Chakraborty and
Carolyn Rose and
Violet Peng},
title = {Multimodal Fine-grained Context Interaction Graph Modeling for Conversational
Speech Synthesis},
booktitle = {Proceedings of the 2025 Conference on Empirical Methods in Natural
Language Processing, {EMNLP} 2025, Suzhou, China, November 4-9, 2025},
pages = {8852--8858},
publisher = {Association for Computational Linguistics},
year = {2025},
url = {https://doi.org/10.18653/v1/2025.emnlp-main.448},
doi = {10.18653/V1/2025.EMNLP-MAIN.448},
timestamp = {Wed, 04 Feb 2026 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/emnlp/JiaLSL25.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/0008GXSB025,
author = {Rui Liu and
Pu Gao and
Jiatian Xi and
Berrak Sisman and
Carlos Busso and
Haizhou Li},
editor = {Odette Scharenborg and
Catharine Oertel and
Khiet Truong},
title = {Towards Emotionally Consistent Text-Based Speech Editing: Introducing
EmoCorrector and The {ECD-TSE} Dataset},
booktitle = {26th Annual Conference of the International Speech Communication Association,
Interspeech 2025, Rotterdam, The Netherlands, 17-21 August 2025},
publisher = {{ISCA}},
year = {2025},
url = {https://doi.org/10.21437/Interspeech.2025-559},
doi = {10.21437/INTERSPEECH.2025-559},
timestamp = {Thu, 20 Nov 2025 11:03:39 +0100},
biburl = {https://dblp.org/rec/conf/interspeech/0008GXSB025.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/ChandraGLBS25,
author = {Shreeram Suresh Chandra and
Lucas Goncalves and
Junchen Lu and
Carlos Busso and
Berrak Sisman},
editor = {Odette Scharenborg and
Catharine Oertel and
Khiet Truong},
title = {EmotionRankCLAP: Bridging Natural Language Speaking Styles and Ordinal
Speech Emotion via Rank-N-Contrast},
booktitle = {26th Annual Conference of the International Speech Communication Association,
Interspeech 2025, Rotterdam, The Netherlands, 17-21 August 2025},
publisher = {{ISCA}},
year = {2025},
url = {https://doi.org/10.21437/Interspeech.2025-1198},
doi = {10.21437/INTERSPEECH.2025-1198},
timestamp = {Thu, 20 Nov 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/interspeech/ChandraGLBS25.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/MahapatraUNBS25,
author = {Aurosweta Mahapatra and
Ismail Rasim Ulgen and
Abinay Reddy Naini and
Carlos Busso and
Berrak Sisman},
editor = {Odette Scharenborg and
Catharine Oertel and
Khiet Truong},
title = {Can Emotion Fool Anti-spoofing?},
booktitle = {26th Annual Conference of the International Speech Communication Association,
Interspeech 2025, Rotterdam, The Netherlands, 17-21 August 2025},
publisher = {{ISCA}},
year = {2025},
url = {https://doi.org/10.21437/Interspeech.2025-1234},
doi = {10.21437/INTERSPEECH.2025-1234},
timestamp = {Thu, 20 Nov 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/interspeech/MahapatraUNBS25.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/NainiGSMUTMGDSB25,
author = {Abinay Reddy Naini and
Lucas Goncalves and
Ali N. Salman and
Pravin Mote and
Ismail Rasim Ulgen and
Thomas Thebaud and
Laureano Moro{-}Vel{\'{a}}zquez and
Leibny Paola Garc{\'{\i}}a and
Najim Dehak and
Berrak Sisman and
Carlos Busso},
editor = {Odette Scharenborg and
Catharine Oertel and
Khiet Truong},
title = {The Interspeech 2025 Challenge on Speech Emotion Recognition in Naturalistic
Conditions},
booktitle = {26th Annual Conference of the International Speech Communication Association,
Interspeech 2025, Rotterdam, The Netherlands, 17-21 August 2025},
publisher = {{ISCA}},
year = {2025},
url = {https://doi.org/10.21437/Interspeech.2025-1972},
doi = {10.21437/INTERSPEECH.2025-1972},
timestamp = {Fri, 21 Nov 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/interspeech/NainiGSMUTMGDSB25.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/RoseroSCSSKHB25,
author = {Karen Rosero and
Ali N. Salman and
Shreeram Suresh Chandra and
Berrak Sisman and
Cortney Van't Slot and
Alex A. Kane and
Rami R. Hallac and
Carlos Busso},
editor = {Odette Scharenborg and
Catharine Oertel and
Khiet Truong},
title = {Advancing Pediatric {ASR:} The Role of Voice Generation in Disordered
Speech},
booktitle = {26th Annual Conference of the International Speech Communication Association,
Interspeech 2025, Rotterdam, The Netherlands, 17-21 August 2025},
publisher = {{ISCA}},
year = {2025},
url = {https://doi.org/10.21437/Interspeech.2025-1890},
doi = {10.21437/INTERSPEECH.2025-1890},
timestamp = {Thu, 20 Nov 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/interspeech/RoseroSCSSKHB25.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2505-20341,
author = {Rui Liu and
Pu Gao and
Jiatian Xi and
Berrak Sisman and
Carlos Busso and
Haizhou Li},
title = {Towards Emotionally Consistent Text-Based Speech Editing: Introducing
EmoCorrector and The {ECD-TSE} Dataset},
journal = {CoRR},
volume = {abs/2505.20341},
year = {2025},
url = {https://doi.org/10.48550/arXiv.2505.20341},
doi = {10.48550/ARXIV.2505.20341},
eprinttype = {arXiv},
eprint = {2505.20341},
timestamp = {Sat, 06 Sep 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2505-20341.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2505-23732,
author = {Shreeram Suresh Chandra and
Lucas Goncalves and
Junchen Lu and
Carlos Busso and
Berrak Sisman},
title = {EmotionRankCLAP: Bridging Natural Language Speaking Styles and Ordinal
Speech Emotion via Rank-N-Contrast},
journal = {CoRR},
volume = {abs/2505.23732},
year = {2025},
url = {https://doi.org/10.48550/arXiv.2505.23732},
doi = {10.48550/ARXIV.2505.23732},
eprinttype = {arXiv},
eprint = {2505.23732},
timestamp = {Sun, 29 Jun 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2505-23732.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2505-23962,
author = {Aurosweta Mahapatra and
Ismail Rasim Ulgen and
Abinay Reddy Naini and
Carlos Busso and
Berrak Sisman},
title = {Can Emotion Fool Anti-spoofing?},
journal = {CoRR},
volume = {abs/2505.23962},
year = {2025},
url = {https://doi.org/10.48550/arXiv.2505.23962},
doi = {10.48550/ARXIV.2505.23962},
eprinttype = {arXiv},
eprint = {2505.23962},
timestamp = {Sun, 29 Jun 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2505-23962.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2509-03829,
author = {Huhong Xian and
Rui Liu and
Berrak Sisman and
Haizhou Li},
title = {{NE-PADD:} Leveraging Named Entity Knowledge for Robust Partial Audio
Deepfake Detection via Attention Aggregation},
journal = {CoRR},
volume = {abs/2509.03829},
year = {2025},
url = {https://doi.org/10.48550/arXiv.2509.03829},
doi = {10.48550/ARXIV.2509.03829},
eprinttype = {arXiv},
eprint = {2509.03829},
timestamp = {Mon, 13 Oct 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2509-03829.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2509-06074,
author = {Zhenqi Jia and
Rui Liu and
Berrak Sisman and
Haizhou Li},
title = {Multimodal Fine-grained Context Interaction Graph Modeling for Conversational
Speech Synthesis},
journal = {CoRR},
volume = {abs/2509.06074},
year = {2025},
url = {https://doi.org/10.48550/arXiv.2509.06074},
doi = {10.48550/ARXIV.2509.06074},
eprinttype = {arXiv},
eprint = {2509.06074},
timestamp = {Mon, 13 Oct 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2509-06074.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2509-20485,
author = {Ismail Rasim Ulgen and
Zongyang Du and
Junchen Lu and
Philipp Koehn and
Berrak Sisman},
title = {Objective Evaluation of Prosody and Intelligibility in Speech Synthesis
via Conditional Prediction of Discrete Tokens},
journal = {CoRR},
volume = {abs/2509.20485},
year = {2025},
url = {https://doi.org/10.48550/arXiv.2509.20485},
doi = {10.48550/ARXIV.2509.20485},
eprinttype = {arXiv},
eprint = {2509.20485},
timestamp = {Wed, 22 Oct 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2509-20485.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2509-21676,
author = {Aurosweta Mahapatra and
Ismail Rasim Ulgen and
Berrak Sisman},
title = {HuLA: Prosody-Aware Anti-Spoofing with Multi-Task Learning for Expressive
and Emotional Synthetic Speech},
journal = {CoRR},
volume = {abs/2509.21676},
year = {2025},
url = {https://doi.org/10.48550/arXiv.2509.21676},
doi = {10.48550/ARXIV.2509.21676},
eprinttype = {arXiv},
eprint = {2509.21676},
timestamp = {Wed, 22 Oct 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2509-21676.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2511-00256,
author = {Zongyang Du and
Shreeram Suresh Chandra and
Ismail Rasim Ulgen and
Aurosweta Mahapatra and
Ali N. Salman and
Carlos Busso and
Berrak Sisman},
title = {NaturalVoices: {A} Large-Scale, Spontaneous and Emotional Podcast
Dataset for Voice Conversion},
journal = {CoRR},
volume = {abs/2511.00256},
year = {2025},
url = {https://doi.org/10.48550/arXiv.2511.00256},
doi = {10.48550/ARXIV.2511.00256},
eprinttype = {arXiv},
eprint = {2511.00256},
timestamp = {Fri, 02 Jan 2026 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2511-00256.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/access/RajapaksheRKSSB24,
author = {Thejan Rajapakshe and
Rajib Rana and
Sara Khalifa and
Berrak Sisman and
Bj{\"{o}}rn W. Schuller and
Carlos Busso},
title = {emoDARTS: Joint Optimization of {CNN} and Sequential Neural Network
Architectures for Superior Speech Emotion Recognition},
journal = {{IEEE} Access},
volume = {12},
pages = {110492--110503},
year = {2024},
url = {https://doi.org/10.1109/ACCESS.2024.3439604},
doi = {10.1109/ACCESS.2024.3439604},
timestamp = {Thu, 22 Aug 2024 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/access/RajapaksheRKSSB24.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/taslp/LiuSGL24,
author = {Rui Liu and
Berrak Sisman and
Guanglai Gao and
Haizhou Li},
title = {Controllable Accented Text-to-Speech Synthesis With Fine and Coarse-Grained
Intensity Rendering},
journal = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
volume = {32},
pages = {2188--2201},
year = {2024},
url = {https://doi.org/10.1109/TASLP.2024.3378110},
doi = {10.1109/TASLP.2024.3378110},
timestamp = {Fri, 17 May 2024 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/taslp/LiuSGL24.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/fgr/RoseroSSHB24,
author = {Karen Rosero and
Ali N. Salman and
Berrak Sisman and
Rami R. Hallac and
Carlos Busso},
title = {Enhanced Facial Landmarks Detection for Patients with Repaired Cleft
Lip and Palate},
booktitle = {18th {IEEE} International Conference on Automatic Face and Gesture
Recognition, {FG} 2024, Istanbul, Turkey, May 27-31, 2024},
pages = {1--10},
publisher = {{IEEE}},
year = {2024},
url = {https://doi.org/10.1109/FG59268.2024.10582022},
doi = {10.1109/FG59268.2024.10582022},
timestamp = {Wed, 31 Jul 2024 14:00:36 +0200},
biburl = {https://dblp.org/rec/conf/fgr/RoseroSSHB24.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/UlgenDBS24,
author = {Ismail Rasim Ulgen and
Zongyang Du and
Carlos Busso and
Berrak Sisman},
title = {Revealing Emotional Clusters in Speaker Embeddings: {A} Contrastive
Learning Strategy for Speech Emotion Recognition},
booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing,
{ICASSP} 2024, Seoul, Republic of Korea, April 14-19, 2024},
pages = {12081--12085},
publisher = {{IEEE}},
year = {2024},
url = {https://doi.org/10.1109/ICASSP48485.2024.10447060},
doi = {10.1109/ICASSP48485.2024.10447060},
timestamp = {Sun, 19 Jan 2025 13:18:23 +0100},
biburl = {https://dblp.org/rec/conf/icassp/UlgenDBS24.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/MoteSB24,
author = {Pravin Mote and
Berrak Sisman and
Carlos Busso},
editor = {Itshak Lapidot and
Sharon Gannot},
title = {Unsupervised Domain Adaptation for Speech Emotion Recognition using
K-Nearest Neighbors Voice Conversion},
booktitle = {25th Annual Conference of the International Speech Communication Association,
Interspeech 2024, Kos, Greece, September 1-5, 2024},
publisher = {{ISCA}},
year = {2024},
url = {https://doi.org/10.21437/Interspeech.2024-1248},
doi = {10.21437/INTERSPEECH.2024-1248},
timestamp = {Tue, 20 May 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/interspeech/MoteSB24.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/SalmanDCUBS24,
author = {Ali N. Salman and
Zongyang Du and
Shreeram Suresh Chandra and
Ismail Rasim {\"{U}}lgen and
Carlos Busso and
Berrak Sisman},
editor = {Itshak Lapidot and
Sharon Gannot},
title = {Towards Naturalistic Voice Conversion: NaturalVoices Dataset with
an Automatic Processing Pipeline},
booktitle = {25th Annual Conference of the International Speech Communication Association,
Interspeech 2024, Kos, Greece, September 1-5, 2024},
publisher = {{ISCA}},
year = {2024},
url = {https://doi.org/10.21437/Interspeech.2024-1256},
doi = {10.21437/INTERSPEECH.2024-1256},
timestamp = {Tue, 20 May 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/interspeech/SalmanDCUBS24.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/odyssey/DuL0KS24,
author = {Zongyang Du and
Junchen Lu and
Kun Zhou and
Lakshmish Kaushik and
Berrak Sisman},
editor = {Najim Dehak and
Patrick Cardinal},
title = {Converting Anyone's Voice: End-to-End Expressive Voice Conversion
with {A} Conditional Diffusion Model},
booktitle = {Odyssey 2024: The Speaker and Language Recognition Workshop, Quebec
City, Canada, June 18-21, 2024},
pages = {172--179},
publisher = {{ISCA}},
year = {2024},
url = {https://doi.org/10.21437/odyssey.2024-25},
doi = {10.21437/ODYSSEY.2024-25},
timestamp = {Wed, 31 Jul 2024 15:08:41 +0200},
biburl = {https://dblp.org/rec/conf/odyssey/DuL0KS24.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/odyssey/0003SB0024,
author = {Kun Zhou and
Berrak Sisman and
Carlos Busso and
Bin Ma and
Haizhou Li},
editor = {Najim Dehak and
Patrick Cardinal},
title = {Mixed-EVC: Mixed Emotion Synthesis and Control in Voice Conversion},
booktitle = {Odyssey 2024: The Speaker and Language Recognition Workshop, Quebec
City, Canada, June 18-21, 2024},
pages = {180--186},
publisher = {{ISCA}},
year = {2024},
url = {https://doi.org/10.21437/odyssey.2024-26},
doi = {10.21437/ODYSSEY.2024-26},
timestamp = {Tue, 30 Jul 2024 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/odyssey/0003SB0024.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/odyssey/ChandraDS24,
author = {Shreeram Suresh Chandra and
Zongyang Du and
Berrak Sisman},
editor = {Najim Dehak and
Patrick Cardinal},
title = {Exploring speech style spaces with language models: Emotional {TTS}
without emotion labels},
booktitle = {Odyssey 2024: The Speaker and Language Recognition Workshop, Quebec
City, Canada, June 18-21, 2024},
pages = {194--200},
publisher = {{ISCA}},
year = {2024},
url = {https://doi.org/10.21437/odyssey.2024-28},
doi = {10.21437/ODYSSEY.2024-28},
timestamp = {Sun, 06 Oct 2024 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/odyssey/ChandraDS24.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/odyssey/GoncalvesSNMT0D24,
author = {Lucas Goncalves and
Ali N. Salman and
Abinay Reddy Naini and
Laureano Moro{-}Vel{\'{a}}zquez and
Thomas Thebaud and
Paola Garc{\'{\i}}a and
Najim Dehak and
Berrak Sisman and
Carlos Busso},
editor = {Najim Dehak and
Patrick Cardinal},
title = {Odyssey 2024 - Speech Emotion Recognition Challenge: Dataset, Baseline
Framework, and Results},
booktitle = {Odyssey 2024: The Speaker and Language Recognition Workshop, Quebec
City, Canada, June 18-21, 2024},
pages = {247--254},
publisher = {{ISCA}},
year = {2024},
url = {https://doi.org/10.21437/odyssey.2024-35},
doi = {10.21437/ODYSSEY.2024-35},
timestamp = {Tue, 30 Jul 2024 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/odyssey/GoncalvesSNMT0D24.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/LeeUS24,
author = {Philip H. Lee and
Ismail Rasim Ulgen and
Berrak Sisman},
title = {Discrete Unit Based Masking For Improving Disentanglement in Voice
Conversion},
booktitle = {{IEEE} Spoken Language Technology Workshop, {SLT} 2024, Macao, December
2-5, 2024},
pages = {742--749},
publisher = {{IEEE}},
year = {2024},
url = {https://doi.org/10.1109/SLT61566.2024.10832297},
doi = {10.1109/SLT61566.2024.10832297},
timestamp = {Fri, 21 Feb 2025 21:48:34 +0100},
biburl = {https://dblp.org/rec/conf/slt/LeeUS24.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/tencon/LamZCSH24,
author = {Perry Lam and
Huayun Zhang and
Nancy F. Chen and
Berrak Sisman and
Dorien Herremans},
title = {{SNIPER} Training: Single-Shot Sparse Training for Text-to-Speech},
booktitle = {{IEEE} Region 10 Conference, {TENCON} 2024, Singapore, December 1-4,
2024},
pages = {327--330},
publisher = {{IEEE}},
year = {2024},
url = {https://doi.org/10.1109/TENCON61640.2024.10902970},
doi = {10.1109/TENCON61640.2024.10902970},
timestamp = {Sat, 15 Nov 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/tencon/LamZCSH24.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/tencon/MelechovskyMSH24,
author = {Jan Melechovsk{\'{y}} and
Ambuj Mehrish and
Berrak Sisman and
Dorien Herremans},
title = {Accented Text-to-Speech Synthesis with a Conditional Variational Autoencoder},
booktitle = {{IEEE} Region 10 Conference, {TENCON} 2024, Singapore, December 1-4,
2024},
pages = {343--346},
publisher = {{IEEE}},
year = {2024},
url = {https://doi.org/10.1109/TENCON61640.2024.10902981},
doi = {10.1109/TENCON61640.2024.10902981},
timestamp = {Sat, 15 Nov 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/tencon/MelechovskyMSH24.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/tencon/MelechovskyMSH24a,
author = {Jan Melechovsk{\'{y}} and
Ambuj Mehrish and
Berrak Sisman and
Dorien Herremans},
title = {Accent Conversion in Text-to-Speech Using Multi-Level {VAE} and Adversarial
Training},
booktitle = {{IEEE} Region 10 Conference, {TENCON} 2024, Singapore, December 1-4,
2024},
pages = {473--476},
publisher = {{IEEE}},
year = {2024},
url = {https://doi.org/10.1109/TENCON61640.2024.10902878},
doi = {10.1109/TENCON61640.2024.10902878},
timestamp = {Sat, 15 Nov 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/tencon/MelechovskyMSH24a.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2401-11017,
author = {Ismail Rasim Ulgen and
Zongyang Du and
Carlos Busso and
Berrak Sisman},
title = {Revealing Emotional Clusters in Speaker Embeddings: {A} Contrastive
Learning Strategy for Speech Emotion Recognition},
journal = {CoRR},
volume = {abs/2401.11017},
year = {2024},
url = {https://doi.org/10.48550/arXiv.2401.11017},
doi = {10.48550/ARXIV.2401.11017},
eprinttype = {arXiv},
eprint = {2401.11017},
timestamp = {Wed, 07 Feb 2024 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2401-11017.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2403-14083,
author = {Thejan Rajapakshe and
Rajib Rana and
Sara Khalifa and
Berrak Sisman and
Bj{\"{o}}rn W. Schuller and
Carlos Busso},
title = {emoDARTS: Joint Optimisation of {CNN} {\&} Sequential Neural Network
Architectures for Superior Speech Emotion Recognition},
journal = {CoRR},
volume = {abs/2403.14083},
year = {2024},
url = {https://doi.org/10.48550/arXiv.2403.14083},
doi = {10.48550/ARXIV.2403.14083},
eprinttype = {arXiv},
eprint = {2403.14083},
timestamp = {Tue, 24 Mar 2026 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2403-14083.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2405-01730,
author = {Zongyang Du and
Junchen Lu and
Kun Zhou and
Lakshmish Kaushik and
Berrak Sisman},
title = {Converting Anyone's Voice: End-to-End Expressive Voice Conversion
with a Conditional Diffusion Model},
journal = {CoRR},
volume = {abs/2405.01730},
year = {2024},
url = {https://doi.org/10.48550/arXiv.2405.01730},
doi = {10.48550/ARXIV.2405.01730},
eprinttype = {arXiv},
eprint = {2405.01730},
timestamp = {Mon, 24 Jun 2024 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2405-01730.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2405-11413,
author = {Shreeram Suresh Chandra and
Zongyang Du and
Berrak Sisman},
title = {Exploring speech style spaces with language models: Emotional {TTS}
without emotion labels},
journal = {CoRR},
volume = {abs/2405.11413},
year = {2024},
url = {https://doi.org/10.48550/arXiv.2405.11413},
doi = {10.48550/ARXIV.2405.11413},
eprinttype = {arXiv},
eprint = {2405.11413},
timestamp = {Mon, 24 Jun 2024 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2405-11413.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2406-01018,
author = {Jan Melechovsk{\'{y}} and
Ambuj Mehrish and
Berrak Sisman and
Dorien Herremans},
title = {Accent Conversion in Text-To-Speech Using Multi-Level {VAE} and Adversarial
Training},
journal = {CoRR},
volume = {abs/2406.01018},
year = {2024},
url = {https://doi.org/10.48550/arXiv.2406.01018},
doi = {10.48550/ARXIV.2406.01018},
eprinttype = {arXiv},
eprint = {2406.01018},
timestamp = {Wed, 24 Jul 2024 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2406-01018.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2406-03637,
author = {Ahad Jawaid and
Shreeram Suresh Chandra and
Junchen Lu and
Berrak Sisman},
title = {Style Mixture of Experts for Expressive Text-To-Speech Synthesis},
journal = {CoRR},
volume = {abs/2406.03637},
year = {2024},
url = {https://doi.org/10.48550/arXiv.2406.03637},
doi = {10.48550/ARXIV.2406.03637},
eprinttype = {arXiv},
eprint = {2406.03637},
timestamp = {Wed, 24 Jul 2024 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2406-03637.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2407-04291,
author = {Ismail Rasim Ulgen and
Carlos Busso and
John H. L. Hansen and
Berrak Sisman},
title = {We Need Variations in Speech Synthesis: Sub-center Modelling for Speaker
Embeddings},
journal = {CoRR},
volume = {abs/2407.04291},
year = {2024},
url = {https://doi.org/10.48550/arXiv.2407.04291},
doi = {10.48550/ARXIV.2407.04291},
eprinttype = {arXiv},
eprint = {2407.04291},
timestamp = {Sat, 24 Aug 2024 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2407-04291.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2408-06827,
author = {Perry Lam and
Huayun Zhang and
Nancy F. Chen and
Berrak Sisman and
Dorien Herremans},
title = {{PRESENT:} Zero-Shot Text-to-Prosody Control},
journal = {CoRR},
volume = {abs/2408.06827},
year = {2024},
url = {https://doi.org/10.48550/arXiv.2408.06827},
doi = {10.48550/ARXIV.2408.06827},
eprinttype = {arXiv},
eprint = {2408.06827},
timestamp = {Mon, 03 Mar 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2408-06827.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2408-17432,
author = {Ismail Rasim Ulgen and
Shreeram Suresh Chandra and
Junchen Lu and
Berrak Sisman},
title = {SelectTTS: Synthesizing Anyone's Voice via Discrete Unit-Based Frame
Selection},
journal = {CoRR},
volume = {abs/2408.17432},
year = {2024},
url = {https://doi.org/10.48550/arXiv.2408.17432},
doi = {10.48550/ARXIV.2408.17432},
eprinttype = {arXiv},
eprint = {2408.17432},
timestamp = {Sat, 28 Sep 2024 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2408-17432.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2409-11560,
author = {Philip H. Lee and
Ismail Rasim Ulgen and
Berrak Sisman},
title = {Discrete Unit based Masking for Improving Disentanglement in Voice
Conversion},
journal = {CoRR},
volume = {abs/2409.11560},
year = {2024},
url = {https://doi.org/10.48550/arXiv.2409.11560},
doi = {10.48550/ARXIV.2409.11560},
eprinttype = {arXiv},
eprint = {2409.11560},
timestamp = {Mon, 21 Oct 2024 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2409-11560.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2410-13342,
author = {Jan Melechovsk{\'{y}} and
Ambuj Mehrish and
Berrak Sisman and
Dorien Herremans},
title = {{DART:} Disentanglement of Accent and Speaker Representation in Multispeaker
Text-to-Speech},
journal = {CoRR},
volume = {abs/2410.13342},
year = {2024},
url = {https://doi.org/10.48550/arXiv.2410.13342},
doi = {10.48550/ARXIV.2410.13342},
eprinttype = {arXiv},
eprint = {2410.13342},
timestamp = {Fri, 29 Nov 2024 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2410-13342.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/taffco/ZhouSRSL23,
author = {Kun Zhou and
Berrak Sisman and
Rajib Rana and
Bj{\"{o}}rn W. Schuller and
Haizhou Li},
title = {Emotion Intensity and its Control for Emotional Voice Conversion},
journal = {{IEEE} Trans. Affect. Comput.},
volume = {14},
number = {1},
pages = {31--48},
year = {2023},
url = {https://doi.org/10.1109/TAFFC.2022.3175578},
doi = {10.1109/TAFFC.2022.3175578},
timestamp = {Sat, 06 Jan 2024 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/taffco/ZhouSRSL23.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/taffco/ZhouSRSL23a,
author = {Kun Zhou and
Berrak Sisman and
Rajib Rana and
Bj{\"{o}}rn W. Schuller and
Haizhou Li},
title = {Speech Synthesis With Mixed Emotions},
journal = {{IEEE} Trans. Affect. Comput.},
volume = {14},
number = {4},
pages = {3120--3134},
year = {2023},
url = {https://doi.org/10.1109/TAFFC.2022.3233324},
doi = {10.1109/TAFFC.2022.3233324},
timestamp = {Fri, 08 Mar 2024 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/taffco/ZhouSRSL23a.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/HaqueSCS0023,
author = {Mirazul Haque and
Rutvij Shah and
Simin Chen and
Berrak Sisman and
Cong Liu and
Wei Yang},
editor = {Naomi Harte and
Julie Carson{-}Berndsen and
Gareth Jones},
title = {SlothSpeech: Denial-of-service Attack Against Speech Recognition Models},
booktitle = {24th Annual Conference of the International Speech Communication Association,
Interspeech 2023, Dublin, Ireland, August 20-24, 2023},
pages = {1274--1278},
publisher = {{ISCA}},
year = {2023},
url = {https://doi.org/10.21437/Interspeech.2023-1118},
doi = {10.21437/INTERSPEECH.2023-1118},
timestamp = {Fri, 14 Jun 2024 14:12:12 +0200},
biburl = {https://dblp.org/rec/conf/interspeech/HaqueSCS0023.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/LuS0023,
author = {Junchen Lu and
Berrak Sisman and
Mingyang Zhang and
Haizhou Li},
editor = {Naomi Harte and
Julie Carson{-}Berndsen and
Gareth Jones},
title = {High-Quality Automatic Voice Over with Accurate Alignment: Supervision
through Self-Supervised Discrete Speech Units},
booktitle = {24th Annual Conference of the International Speech Communication Association,
Interspeech 2023, Dublin, Ireland, August 20-24, 2023},
pages = {5536--5540},
publisher = {{ISCA}},
year = {2023},
url = {https://doi.org/10.21437/Interspeech.2023-2179},
doi = {10.21437/INTERSPEECH.2023-2179},
timestamp = {Sun, 04 Aug 2024 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/interspeech/LuS0023.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2305-07216,
author = {Lucas Goncalves and
Seong{-}Gyun Leem and
Wei{-}Cheng Lin and
Berrak Sisman and
Carlos Busso},
title = {Versatile Audio-Visual Learning for Handling Single and Multi Modalities
in Emotion Regression and Classification Tasks},
journal = {CoRR},
volume = {abs/2305.07216},
year = {2023},
url = {https://doi.org/10.48550/arXiv.2305.07216},
doi = {10.48550/ARXIV.2305.07216},
eprinttype = {arXiv},
eprint = {2305.07216},
timestamp = {Sat, 30 Sep 2023 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2305-07216.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2305-14402,
author = {Thejan Rajapakshe and
Rajib Rana and
Sara Khalifa and
Berrak Sisman and
Bj{\"{o}}rn W. Schuller},
title = {Improving Speech Emotion Recognition Performance using Differentiable
Architecture Search},
journal = {CoRR},
volume = {abs/2305.14402},
year = {2023},
url = {https://doi.org/10.48550/arXiv.2305.14402},
doi = {10.48550/ARXIV.2305.14402},
eprinttype = {arXiv},
eprint = {2305.14402},
timestamp = {Tue, 24 Mar 2026 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2305-14402.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2306-00794,
author = {Mirazul Haque and
Rutvij Shah and
Simin Chen and
Berrak Sisman and
Cong Liu and
Wei Yang},
title = {SlothSpeech: Denial-of-service Attack Against Speech Recognition Models},
journal = {CoRR},
volume = {abs/2306.00794},
year = {2023},
url = {https://doi.org/10.48550/arXiv.2306.00794},
doi = {10.48550/ARXIV.2306.00794},
eprinttype = {arXiv},
eprint = {2306.00794},
timestamp = {Fri, 25 Aug 2023 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2306-00794.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2306-17005,
author = {Junchen Lu and
Berrak Sisman and
Mingyang Zhang and
Haizhou Li},
title = {High-Quality Automatic Voice Over with Accurate Alignment: Supervision
through Self-Supervised Discrete Speech Units},
journal = {CoRR},
volume = {abs/2306.17005},
year = {2023},
url = {https://doi.org/10.48550/arXiv.2306.17005},
doi = {10.48550/ARXIV.2306.17005},
eprinttype = {arXiv},
eprint = {2306.17005},
timestamp = {Mon, 03 Jul 2023 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2306-17005.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/speech/ZhouSLL22,
author = {Kun Zhou and
Berrak Sisman and
Rui Liu and
Haizhou Li},
title = {Emotional voice conversion: Theory, databases and {ESD}},
journal = {Speech Commun.},
volume = {137},
pages = {1--18},
year = {2022},
url = {https://doi.org/10.1016/j.specom.2021.11.006},
doi = {10.1016/J.SPECOM.2021.11.006},
timestamp = {Sat, 06 Jan 2024 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/speech/ZhouSLL22.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/taslp/LiuSGL22,
author = {Rui Liu and
Berrak Sisman and
Guanglai Gao and
Haizhou Li},
title = {Decoding Knowledge Transfer for Neural Text-to-Speech Training},
journal = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
volume = {30},
pages = {1789--1802},
year = {2022},
url = {https://doi.org/10.1109/TASLP.2022.3171974},
doi = {10.1109/TASLP.2022.3171974},
timestamp = {Thu, 27 Jul 2023 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/taslp/LiuSGL22.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/LuSLZL22,
author = {Junchen Lu and
Berrak Sisman and
Rui Liu and
Mingyang Zhang and
Haizhou Li},
title = {Visualtts: {TTS} with Accurate Lip-Speech Synchronization for Automatic
Voice Over},
booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing,
{ICASSP} 2022, Virtual and Singapore, 23-27 May 2022},
pages = {8032--8036},
publisher = {{IEEE}},
year = {2022},
url = {https://doi.org/10.1109/ICASSP43922.2022.9746421},
doi = {10.1109/ICASSP43922.2022.9746421},
timestamp = {Sat, 06 Sep 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/icassp/LuSLZL22.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/LamZCS22,
author = {Perry Lam and
Huayun Zhang and
Nancy F. Chen and
Berrak Sisman},
editor = {Hanseok Ko and
John H. L. Hansen},
title = {{EPIC} {TTS} Models: Empirical Pruning Investigations Characterizing
Text-To-Speech Models},
booktitle = {23rd Annual Conference of the International Speech Communication Association,
Interspeech 2022, Incheon, Korea, September 18-22, 2022},
pages = {823--827},
publisher = {{ISCA}},
year = {2022},
url = {https://doi.org/10.21437/Interspeech.2022-10626},
doi = {10.21437/INTERSPEECH.2022-10626},
timestamp = {Mon, 03 Mar 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/interspeech/LamZCS22.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/DuSZ022,
author = {Zongyang Du and
Berrak Sisman and
Kun Zhou and
Haizhou Li},
editor = {Hanseok Ko and
John H. L. Hansen},
title = {Disentanglement of Emotional Style and Speaker Identity for Expressive
Voice Conversion},
booktitle = {23rd Annual Conference of the International Speech Communication Association,
Interspeech 2022, Incheon, Korea, September 18-22, 2022},
pages = {2603--2607},
publisher = {{ISCA}},
year = {2022},
url = {https://doi.org/10.21437/Interspeech.2022-10249},
doi = {10.21437/INTERSPEECH.2022-10249},
timestamp = {Sat, 06 Jan 2024 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/interspeech/DuSZ022.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/0008SSG022,
author = {Rui Liu and
Berrak Sisman and
Bj{\"{o}}rn W. Schuller and
Guanglai Gao and
Haizhou Li},
editor = {Hanseok Ko and
John H. L. Hansen},
title = {Accurate Emotion Strength Assessment for Seen and Unseen Speech Based
on Data-Driven Deep Learning},
booktitle = {23rd Annual Conference of the International Speech Communication Association,
Interspeech 2022, Incheon, Korea, September 18-22, 2022},
pages = {5493--5497},
publisher = {{ISCA}},
year = {2022},
url = {https://doi.org/10.21437/Interspeech.2022-534},
doi = {10.21437/INTERSPEECH.2022-534},
timestamp = {Sat, 06 Sep 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/interspeech/0008SSG022.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/MelechovskyMHS22,
author = {Jan Melechovsk{\'{y}} and
Ambuj Mehrish and
Dorien Herremans and
Berrak Sisman},
title = {Learning Accent Representation with Multi-Level {VAE} Towards Controllable
Speech Synthesis},
booktitle = {{IEEE} Spoken Language Technology Workshop, {SLT} 2022, Doha, Qatar,
January 9-12, 2023},
pages = {928--935},
publisher = {{IEEE}},
year = {2022},
url = {https://doi.org/10.1109/SLT54892.2023.10023072},
doi = {10.1109/SLT54892.2023.10023072},
timestamp = {Sat, 15 Nov 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/slt/MelechovskyMHS22.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2201-03967,
author = {Kun Zhou and
Berrak Sisman and
Rajib Rana and
Bj{\"{o}}rn W. Schuller and
Haizhou Li},
title = {Emotion Intensity and its Control for Emotional Voice Conversion},
journal = {CoRR},
volume = {abs/2201.03967},
year = {2022},
url = {https://arxiv.org/abs/2201.03967},
eprinttype = {arXiv},
eprint = {2201.03967},
timestamp = {Sat, 06 Jan 2024 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2201-03967.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2206-07229,
author = {Rui Liu and
Berrak Sisman and
Bj{\"{o}}rn W. Schuller and
Guanglai Gao and
Haizhou Li},
title = {Accurate Emotion Strength Assessment for Seen and Unseen Speech Based
on Data-Driven Deep Learning},
journal = {CoRR},
volume = {abs/2206.07229},
year = {2022},
url = {https://doi.org/10.48550/arXiv.2206.07229},
doi = {10.48550/ARXIV.2206.07229},
eprinttype = {arXiv},
eprint = {2206.07229},
timestamp = {Sat, 06 Sep 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2206-07229.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2208-05890,
author = {Kun Zhou and
Berrak Sisman and
Rajib Rana and
Bj{\"{o}}rn W. Schuller and
Haizhou Li},
title = {Speech Synthesis with Mixed Emotions},
journal = {CoRR},
volume = {abs/2208.05890},
year = {2022},
url = {https://doi.org/10.48550/arXiv.2208.05890},
doi = {10.48550/ARXIV.2208.05890},
eprinttype = {arXiv},
eprint = {2208.05890},
timestamp = {Sat, 06 Jan 2024 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2208-05890.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2209-10804,
author = {Rui Liu and
Berrak Sisman and
Guanglai Gao and
Haizhou Li},
title = {Controllable Accented Text-to-Speech Synthesis},
journal = {CoRR},
volume = {abs/2209.10804},
year = {2022},
url = {https://doi.org/10.48550/arXiv.2209.10804},
doi = {10.48550/ARXIV.2209.10804},
eprinttype = {arXiv},
eprint = {2209.10804},
timestamp = {Sat, 06 Sep 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2209-10804.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2209-10890,
author = {Perry Lam and
Huayun Zhang and
Nancy F. Chen and
Berrak Sisman},
title = {{EPIC} {TTS} Models: Empirical Pruning Investigations Characterizing
Text-To-Speech Models},
journal = {CoRR},
volume = {abs/2209.10890},
year = {2022},
url = {https://doi.org/10.48550/arXiv.2209.10890},
doi = {10.48550/ARXIV.2209.10890},
eprinttype = {arXiv},
eprint = {2209.10890},
timestamp = {Tue, 07 May 2024 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2209-10890.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2210-13756,
author = {Kun Zhou and
Berrak Sisman and
Carlos Busso and
Haizhou Li},
title = {Mixed Emotion Modelling for Emotional Voice Conversion},
journal = {CoRR},
volume = {abs/2210.13756},
year = {2022},
url = {https://doi.org/10.48550/arXiv.2210.13756},
doi = {10.48550/ARXIV.2210.13756},
eprinttype = {arXiv},
eprint = {2210.13756},
timestamp = {Sat, 06 Jan 2024 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2210-13756.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2211-03316,
author = {Jan Melechovsk{\'{y}} and
Ambuj Mehrish and
Berrak Sisman and
Dorien Herremans},
title = {Accented Text-to-Speech Synthesis with a Conditional Variational Autoencoder},
journal = {CoRR},
volume = {abs/2211.03316},
year = {2022},
url = {https://doi.org/10.48550/arXiv.2211.03316},
doi = {10.48550/ARXIV.2211.03316},
eprinttype = {arXiv},
eprint = {2211.03316},
timestamp = {Thu, 10 Nov 2022 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2211-03316.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2211-07283,
author = {Perry Lam and
Huayun Zhang and
Nancy F. Chen and
Berrak Sisman and
Dorien Herremans},
title = {{SNIPER} Training: Variable Sparsity Rate Training For Text-To-Speech},
journal = {CoRR},
volume = {abs/2211.07283},
year = {2022},
url = {https://doi.org/10.48550/arXiv.2211.07283},
doi = {10.48550/ARXIV.2211.07283},
eprinttype = {arXiv},
eprint = {2211.07283},
timestamp = {Tue, 07 May 2024 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2211-07283.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/nn/LiuSLL21,
author = {Rui Liu and
Berrak Sisman and
Yixing Lin and
Haizhou Li},
title = {FastTalker: {A} neural text-to-speech architecture with shallow and
group autoregression},
journal = {Neural Networks},
volume = {141},
pages = {306--314},
year = {2021},
url = {https://doi.org/10.1016/j.neunet.2021.04.016},
doi = {10.1016/J.NEUNET.2021.04.016},
timestamp = {Thu, 16 Sep 2021 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/nn/LiuSLL21.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/taslp/SismanYKL21,
author = {Berrak Sisman and
Junichi Yamagishi and
Simon King and
Haizhou Li},
title = {An Overview of Voice Conversion and Its Challenges: From Statistical
Modeling to Deep Learning},
journal = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
volume = {29},
pages = {132--157},
year = {2021},
url = {https://doi.org/10.1109/TASLP.2020.3038524},
doi = {10.1109/TASLP.2020.3038524},
timestamp = {Mon, 29 Jan 2024 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/taslp/SismanYKL21.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/taslp/LiuSBYGL21,
author = {Rui Liu and
Berrak Sisman and
Feilong Bao and
Jichen Yang and
Guanglai Gao and
Haizhou Li},
title = {Exploiting Morphological and Phonological Features to Improve Prosodic
Phrasing for Mongolian Speech Synthesis},
journal = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
volume = {29},
pages = {274--285},
year = {2021},
url = {https://doi.org/10.1109/TASLP.2020.3040523},
doi = {10.1109/TASLP.2020.3040523},
timestamp = {Thu, 27 Jul 2023 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/taslp/LiuSBYGL21.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/taslp/LiuSGL21,
author = {Rui Liu and
Berrak Sisman and
Guanglai Gao and
Haizhou Li},
title = {Expressive {TTS} Training With Frame and Style Reconstruction Loss},
journal = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
volume = {29},
pages = {1806--1818},
year = {2021},
url = {https://doi.org/10.1109/TASLP.2021.3076369},
doi = {10.1109/TASLP.2021.3076369},
timestamp = {Thu, 27 Jul 2023 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/taslp/LiuSGL21.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/asru/DuSZL21,
author = {Zongyang Du and
Berrak Sisman and
Kun Zhou and
Haizhou Li},
title = {Expressive Voice Conversion: {A} Joint Framework for Speaker Identity
and Emotional Style Transfer},
booktitle = {{IEEE} Automatic Speech Recognition and Understanding Workshop, {ASRU}
2021, Cartagena, Colombia, December 13-17, 2021},
pages = {594--601},
publisher = {{IEEE}},
year = {2021},
url = {https://doi.org/10.1109/ASRU51503.2021.9687906},
doi = {10.1109/ASRU51503.2021.9687906},
timestamp = {Sat, 06 Jan 2024 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/asru/DuSZL21.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/asru/NikonorovSZL21,
author = {Sergey Nikonorov and
Berrak Sisman and
Mingyang Zhang and
Haizhou Li},
title = {{DEEPA:} {A} Deep Neural Analyzer for Speech and Singing Vocoding},
booktitle = {{IEEE} Automatic Speech Recognition and Understanding Workshop, {ASRU}
2021, Cartagena, Colombia, December 13-17, 2021},
pages = {618--625},
publisher = {{IEEE}},
year = {2021},
url = {https://doi.org/10.1109/ASRU51503.2021.9687923},
doi = {10.1109/ASRU51503.2021.9687923},
timestamp = {Wed, 09 Feb 2022 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/asru/NikonorovSZL21.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/blizzard/0003ZZ0LS021,
author = {Mingyang Zhang and
Xuehao Zhou and
Kun Zhou and
Rui Liu and
Perry Lam and
Berrak Sisman and
Haizhou Li},
title = {{SUTD-NUS} System for Blizzard Challenge 2021},
booktitle = {The Blizzard Challenge 2021, virtual, October 23, 2021},
publisher = {{ISCA}},
year = {2021},
url = {https://doi.org/10.21437/Blizzard.2021-12},
doi = {10.21437/BLIZZARD.2021-12},
timestamp = {Mon, 30 Sep 2024 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/blizzard/0003ZZ0LS021.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/ZhouS0021,
author = {Kun Zhou and
Berrak Sisman and
Rui Liu and
Haizhou Li},
title = {Seen and Unseen Emotional Style Transfer for Voice Conversion with
{A} New Emotional Speech Dataset},
booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing,
{ICASSP} 2021, Toronto, ON, Canada, June 6-11, 2021},
pages = {920--924},
publisher = {{IEEE}},
year = {2021},
url = {https://doi.org/10.1109/ICASSP39728.2021.9413391},
doi = {10.1109/ICASSP39728.2021.9413391},
timestamp = {Sat, 06 Sep 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/icassp/ZhouS0021.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/0008S021,
author = {Rui Liu and
Berrak Sisman and
Haizhou Li},
title = {Graphspeech: Syntax-Aware Graph Attention Network for Neural Speech
Synthesis},
booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing,
{ICASSP} 2021, Toronto, ON, Canada, June 6-11, 2021},
pages = {6059--6063},
publisher = {{IEEE}},
year = {2021},
url = {https://doi.org/10.1109/ICASSP39728.2021.9413513},
doi = {10.1109/ICASSP39728.2021.9413513},
timestamp = {Sat, 06 Sep 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/icassp/0008S021.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/ZhouSL21,
author = {Kun Zhou and
Berrak Sisman and
Haizhou Li},
editor = {Hynek Hermansky and
Honza Cernock{\'{y}} and
Luk{\'{a}}s Burget and
Lori Lamel and
Odette Scharenborg and
Petr Motl{\'{\i}}cek},
title = {Limited Data Emotional Voice Conversion Leveraging Text-to-Speech:
Two-Stage Sequence-to-Sequence Training},
booktitle = {22nd Annual Conference of the International Speech Communication Association,
Interspeech 2021, Brno, Czechia, August 30 - September 3, 2021},
pages = {811--815},
publisher = {{ISCA}},
year = {2021},
url = {https://doi.org/10.21437/Interspeech.2021-781},
doi = {10.21437/INTERSPEECH.2021-781},
timestamp = {Tue, 11 Jun 2024 16:45:43 +0200},
biburl = {https://dblp.org/rec/conf/interspeech/ZhouSL21.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/0008S021,
author = {Rui Liu and
Berrak Sisman and
Haizhou Li},
editor = {Hynek Hermansky and
Honza Cernock{\'{y}} and
Luk{\'{a}}s Burget and
Lori Lamel and
Odette Scharenborg and
Petr Motl{\'{\i}}cek},
title = {Reinforcement Learning for Emotional Text-to-Speech Synthesis with
Improved Emotion Discriminability},
booktitle = {22nd Annual Conference of the International Speech Communication Association,
Interspeech 2021, Brno, Czechia, August 30 - September 3, 2021},
pages = {4648--4652},
publisher = {{ISCA}},
year = {2021},
url = {https://doi.org/10.21437/Interspeech.2021-1236},
doi = {10.21437/INTERSPEECH.2021-1236},
timestamp = {Sat, 06 Sep 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/interspeech/0008S021.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sigdial/LiLYGSCVDWL21,
author = {Haizhou Li and
Gina{-}Anne Levow and
Zhou Yu and
Chitralekha Gupta and
Berrak Sisman and
Siqi Cai and
David Vandyke and
Nina Dethlefs and
Yan Wu and
Junyi Jessy Li},
editor = {Haizhou Li and
Gina{-}Anne Levow and
Zhou Yu and
Chitralekha Gupta and
Berrak Sisman and
Siqi Cai and
David Vandyke and
Nina Dethlefs and
Yan Wu and
Junyi Jessy Li},
title = {Proceedings of the 22nd Annual Meeting of the Special Interest Group
on Discourse and Dialogue},
booktitle = {Proceedings of the 22nd Annual Meeting of the Special Interest Group
on Discourse and Dialogue, SIGdial 2021, Singapore and Online, July
29-31, 2021},
publisher = {Association for Computational Linguistics},
year = {2021},
url = {https://aclanthology.org/2021.sigdial-1.0},
timestamp = {Mon, 07 Jul 2025 20:38:25 +0200},
biburl = {https://dblp.org/rec/conf/sigdial/LiLYGSCVDWL21.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/ZhouS021,
author = {Kun Zhou and
Berrak Sisman and
Haizhou Li},
title = {Vaw-Gan For Disentanglement And Recomposition Of Emotional Elements
In Speech},
booktitle = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
China, January 19-22, 2021},
pages = {415--422},
publisher = {{IEEE}},
year = {2021},
url = {https://doi.org/10.1109/SLT48900.2021.9383526},
doi = {10.1109/SLT48900.2021.9383526},
timestamp = {Sat, 06 Jan 2024 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/slt/ZhouS021.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@proceedings{DBLP:conf/sigdial/2021,
editor = {Haizhou Li and
Gina{-}Anne Levow and
Zhou Yu and
Chitralekha Gupta and
Berrak Sisman and
Siqi Cai and
David Vandyke and
Nina Dethlefs and
Yan Wu and
Junyi Jessy Li},
title = {Proceedings of the 22nd Annual Meeting of the Special Interest Group
on Discourse and Dialogue, SIGdial 2021, Singapore and Online, July
29-31, 2021},
publisher = {Association for Computational Linguistics},
year = {2021},
url = {https://aclanthology.org/volumes/2021.sigdial-1/},
isbn = {978-1-954085-81-7},
timestamp = {Mon, 07 Jul 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/sigdial/2021.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2103-16809,
author = {Kun Zhou and
Berrak Sisman and
Haizhou Li},
title = {Limited Data Emotional Voice Conversion Leveraging Text-to-Speech:
Two-stage Sequence-to-Sequence Training},
journal = {CoRR},
volume = {abs/2103.16809},
year = {2021},
url = {https://arxiv.org/abs/2103.16809},
eprinttype = {arXiv},
eprint = {2103.16809},
timestamp = {Sat, 06 Jan 2024 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2103-16809.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2104-01408,
author = {Rui Liu and
Berrak Sisman and
Haizhou Li},
title = {Reinforcement Learning for Emotional Text-to-Speech Synthesis with
Improved Emotion Discriminability},
journal = {CoRR},
volume = {abs/2104.01408},
year = {2021},
url = {https://arxiv.org/abs/2104.01408},
eprinttype = {arXiv},
eprint = {2104.01408},
timestamp = {Mon, 12 Apr 2021 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2104-01408.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2105-14762,
author = {Kun Zhou and
Berrak Sisman and
Rui Liu and
Haizhou Li},
title = {Emotional Voice Conversion: Theory, Databases and {ESD}},
journal = {CoRR},
volume = {abs/2105.14762},
year = {2021},
url = {https://arxiv.org/abs/2105.14762},
eprinttype = {arXiv},
eprint = {2105.14762},
timestamp = {Sat, 06 Jan 2024 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2105-14762.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2107-03748,
author = {Zongyang Du and
Berrak Sisman and
Kun Zhou and
Haizhou Li},
title = {Expressive Voice Conversion: {A} Joint Framework for Speaker Identity
and Emotional Style Transfer},
journal = {CoRR},
volume = {abs/2107.03748},
year = {2021},
url = {https://arxiv.org/abs/2107.03748},
eprinttype = {arXiv},
eprint = {2107.03748},
timestamp = {Sat, 06 Jan 2024 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2107-03748.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2110-03156,
author = {Rui Liu and
Berrak Sisman and
Haizhou Li},
title = {StrengthNet: Deep Learning-based Emotion Strength Assessment for Emotional
Speech Synthesis},
journal = {CoRR},
volume = {abs/2110.03156},
year = {2021},
url = {https://arxiv.org/abs/2110.03156},
eprinttype = {arXiv},
eprint = {2110.03156},
timestamp = {Fri, 22 Oct 2021 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2110-03156.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2110-03342,
author = {Junchen Lu and
Berrak Sisman and
Rui Liu and
Mingyang Zhang and
Haizhou Li},
title = {VisualTTS: {TTS} with Accurate Lip-Speech Synchronization for Automatic
Voice Over},
journal = {CoRR},
volume = {abs/2110.03342},
year = {2021},
url = {https://arxiv.org/abs/2110.03342},
eprinttype = {arXiv},
eprint = {2110.03342},
timestamp = {Mon, 25 Oct 2021 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2110-03342.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2110-06434,
author = {Sergey Nikonorov and
Berrak Sisman and
Mingyang Zhang and
Haizhou Li},
title = {DeepA: {A} Deep Neural Analyzer For Speech And Singing Vocoding},
journal = {CoRR},
volume = {abs/2110.06434},
year = {2021},
url = {https://arxiv.org/abs/2110.06434},
eprinttype = {arXiv},
eprint = {2110.06434},
timestamp = {Mon, 25 Oct 2021 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2110-06434.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2110-10326,
author = {Zongyang Du and
Berrak Sisman and
Kun Zhou and
Haizhou Li},
title = {Identity Conversion for Emotional Speakers: {A} Study for Disentanglement
of Emotion Style and Speaker Identity},
journal = {CoRR},
volume = {abs/2110.10326},
year = {2021},
url = {https://arxiv.org/abs/2110.10326},
eprinttype = {arXiv},
eprint = {2110.10326},
timestamp = {Sat, 06 Jan 2024 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2110-10326.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/speech/ZhangSZL20,
author = {Mingyang Zhang and
Berrak Sisman and
Li Zhao and
Haizhou Li},
title = {DeepConversion: Voice conversion with limited parallel training data},
journal = {Speech Commun.},
volume = {122},
pages = {31--43},
year = {2020},
url = {https://doi.org/10.1016/j.specom.2020.05.004},
doi = {10.1016/J.SPECOM.2020.05.004},
timestamp = {Tue, 07 Sep 2021 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/speech/ZhangSZL20.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/spl/LiuSBGL20,
author = {Rui Liu and
Berrak Sisman and
Feilong Bao and
Guanglai Gao and
Haizhou Li},
title = {Modeling Prosodic Phrasing With Multi-Task Learning in Tacotron-Based
{TTS}},
journal = {{IEEE} Signal Process. Lett.},
volume = {27},
pages = {1470--1474},
year = {2020},
url = {https://doi.org/10.1109/LSP.2020.3016564},
doi = {10.1109/LSP.2020.3016564},
timestamp = {Thu, 27 Jul 2023 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/spl/LiuSBGL20.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/apsipa/DuZS020,
author = {Zongyang Du and
Kun Zhou and
Berrak Sisman and
Haizhou Li},
title = {Spectrum and Prosody Conversion for Cross-lingual Voice Conversion
with CycleGAN},
booktitle = {Asia-Pacific Signal and Information Processing Association Annual
Summit and Conference, {APSIPA} 2020, Auckland, New Zealand, December
7-10, 2020},
pages = {507--513},
publisher = {{IEEE}},
year = {2020},
url = {https://ieeexplore.ieee.org/document/9306487},
timestamp = {Sat, 06 Jan 2024 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/apsipa/DuZS020.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/apsipa/LuZS020,
author = {Junchen Lu and
Kun Zhou and
Berrak Sisman and
Haizhou Li},
title = {{VAW-GAN} for Singing Voice Conversion with Non-parallel Training
Data},
booktitle = {Asia-Pacific Signal and Information Processing Association Annual
Summit and Conference, {APSIPA} 2020, Auckland, New Zealand, December
7-10, 2020},
pages = {514--519},
publisher = {{IEEE}},
year = {2020},
url = {https://ieeexplore.ieee.org/document/9306474},
timestamp = {Sat, 06 Jan 2024 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/apsipa/LuZS020.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/blizzard/0020TZ0LLS020,
author = {Yi Zhou and
Xiaohai Tian and
Xuehao Zhou and
Mingyang Zhang and
Grandee Lee and
Riu Liu and
Berrak Sisman and
Haizhou Li},
editor = {Junichi Yamagishi and
Zhenhua Ling and
Rohan Kumar Das and
Simon King and
Tomi Kinnunen and
Tomoki Toda and
Wen{-}Chin Huang and
Xiao Zhou and
Xiaohai Tian and
Yi Zhao},
title = {{NUS-HLT} System for Blizzard Challenge 2020},
booktitle = {Joint Workshop for the Blizzard Challenge and Voice Conversion Challenge
2020, Shanghai, China, October 30, 2020},
publisher = {{ISCA}},
year = {2020},
url = {https://doi.org/10.21437/VCCBC.2020-7},
doi = {10.21437/VCCBC.2020-7},
timestamp = {Mon, 16 Mar 2026 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/blizzard/0020TZ0LLS020.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/blizzard/Tian0YZD00ZS0020,
author = {Xiaohai Tian and
Zhichao Wang and
Shan Yang and
Xinyong Zhou and
Hongqiang Du and
Yi Zhou and
Mingyang Zhang and
Kun Zhou and
Berrak Sisman and
Lei Xie and
Haizhou Li},
editor = {Junichi Yamagishi and
Zhenhua Ling and
Rohan Kumar Das and
Simon King and
Tomi Kinnunen and
Tomoki Toda and
Wen{-}Chin Huang and
Xiao Zhou and
Xiaohai Tian and
Yi Zhao},
title = {The {NUS} {\&} {NWPU} system for Voice Conversion Challenge 2020},
booktitle = {Joint Workshop for the Blizzard Challenge and Voice Conversion Challenge
2020, Shanghai, China, October 30, 2020},
publisher = {{ISCA}},
year = {2020},
url = {https://doi.org/10.21437/VCCBC.2020-26},
doi = {10.21437/VCCBC.2020-26},
timestamp = {Mon, 26 Jan 2026 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/blizzard/Tian0YZD00ZS0020.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/0008SLBG020,
author = {Rui Liu and
Berrak Sisman and
Jingdong Li and
Feilong Bao and
Guanglai Gao and
Haizhou Li},
title = {Teacher-Student Training For Robust Tacotron-Based {TTS}},
booktitle = {2020 {IEEE} International Conference on Acoustics, Speech and Signal
Processing, {ICASSP} 2020, Barcelona, Spain, May 4-8, 2020},
pages = {6274--6278},
publisher = {{IEEE}},
year = {2020},
url = {https://doi.org/10.1109/ICASSP40776.2020.9054681},
doi = {10.1109/ICASSP40776.2020.9054681},
timestamp = {Sat, 09 Apr 2022 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/icassp/0008SLBG020.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/ZhouS0020,
author = {Kun Zhou and
Berrak Sisman and
Mingyang Zhang and
Haizhou Li},
editor = {Helen Meng and
Bo Xu and
Thomas Fang Zheng},
title = {Converting Anyone's Emotion: Towards Speaker-Independent Emotional
Voice Conversion},
booktitle = {21st Annual Conference of the International Speech Communication Association,
Interspeech 2020, Virtual Event, Shanghai, China, October 25-29, 2020},
pages = {3416--3420},
publisher = {{ISCA}},
year = {2020},
url = {https://doi.org/10.21437/Interspeech.2020-2014},
doi = {10.21437/INTERSPEECH.2020-2014},
timestamp = {Sun, 19 Jan 2025 13:13:53 +0100},
biburl = {https://dblp.org/rec/conf/interspeech/ZhouS0020.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/odyssey/ZhouS020,
author = {Kun Zhou and
Berrak Sisman and
Haizhou Li},
editor = {Kong{-}Aik Lee and
Takafumi Koshinaka and
Koichi Shinoda},
title = {Transforming Spectrum and Prosody for Emotional Voice Conversion with
Non-Parallel Training Data},
booktitle = {Odyssey 2020: The Speaker and Language Recognition Workshop, 1-5 November
2020, Tokyo, Japan},
pages = {230--237},
publisher = {{ISCA}},
year = {2020},
url = {https://doi.org/10.21437/Odyssey.2020-33},
doi = {10.21437/ODYSSEY.2020-33},
timestamp = {Tue, 30 Jul 2024 09:41:52 +0200},
biburl = {https://dblp.org/rec/conf/odyssey/ZhouS020.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/odyssey/Sisman020,
author = {Berrak Sisman and
Haizhou Li},
editor = {Kong{-}Aik Lee and
Takafumi Koshinaka and
Koichi Shinoda},
title = {Generative Adversarial Networks for Singing Voice Conversion with
and without Parallel Data},
booktitle = {Odyssey 2020: The Speaker and Language Recognition Workshop, 1-5 November
2020, Tokyo, Japan},
pages = {238--244},
publisher = {{ISCA}},
year = {2020},
url = {https://doi.org/10.21437/Odyssey.2020-34},
doi = {10.21437/ODYSSEY.2020-34},
timestamp = {Tue, 30 Jul 2024 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/odyssey/Sisman020.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/odyssey/0008SBG020,
author = {Rui Liu and
Berrak Sisman and
Feilong Bao and
Guanglai Gao and
Haizhou Li},
editor = {Kong{-}Aik Lee and
Takafumi Koshinaka and
Koichi Shinoda},
title = {WaveTTS: Tacotron-based {TTS} with Joint Time-Frequency Domain Loss},
booktitle = {Odyssey 2020: The Speaker and Language Recognition Workshop, 1-5 November
2020, Tokyo, Japan},
pages = {245--251},
publisher = {{ISCA}},
year = {2020},
url = {https://doi.org/10.21437/Odyssey.2020-35},
doi = {10.21437/ODYSSEY.2020-35},
timestamp = {Tue, 30 Jul 2024 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/odyssey/0008SBG020.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2002-00198,
author = {Kun Zhou and
Berrak Sisman and
Haizhou Li},
title = {Transforming Spectrum and Prosody for Emotional Voice Conversion with
Non-Parallel Training Data},
journal = {CoRR},
volume = {abs/2002.00198},
year = {2020},
url = {https://arxiv.org/abs/2002.00198},
eprinttype = {arXiv},
eprint = {2002.00198},
timestamp = {Sat, 06 Jan 2024 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2002-00198.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2002-00417,
author = {Rui Liu and
Berrak Sisman and
Feilong Bao and
Guanglai Gao and
Haizhou Li},
title = {WaveTTS: Tacotron-based {TTS} with Joint Time-Frequency Domain Loss},
journal = {CoRR},
volume = {abs/2002.00417},
year = {2020},
url = {https://arxiv.org/abs/2002.00417},
eprinttype = {arXiv},
eprint = {2002.00417},
timestamp = {Mon, 10 Feb 2020 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2002-00417.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2005-07025,
author = {Kun Zhou and
Berrak Sisman and
Mingyang Zhang and
Haizhou Li},
title = {Converting Anyone's Emotion: Towards Speaker-Independent Emotional
Voice Conversion},
journal = {CoRR},
volume = {abs/2005.07025},
year = {2020},
url = {https://arxiv.org/abs/2005.07025},
eprinttype = {arXiv},
eprint = {2005.07025},
timestamp = {Sat, 06 Jan 2024 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2005-07025.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2008-01490,
author = {Rui Liu and
Berrak Sisman and
Guanglai Gao and
Haizhou Li},
title = {Expressive {TTS} Training with Frame and Style Reconstruction Loss},
journal = {CoRR},
volume = {abs/2008.01490},
year = {2020},
url = {https://arxiv.org/abs/2008.01490},
eprinttype = {arXiv},
eprint = {2008.01490},
timestamp = {Fri, 07 Aug 2020 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2008-01490.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2008-03648,
author = {Berrak Sisman and
Junichi Yamagishi and
Simon King and
Haizhou Li},
title = {An Overview of Voice Conversion and its Challenges: From Statistical
Modeling to Deep Learning},
journal = {CoRR},
volume = {abs/2008.03648},
year = {2020},
url = {https://arxiv.org/abs/2008.03648},
eprinttype = {arXiv},
eprint = {2008.03648},
timestamp = {Fri, 02 Feb 2024 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2008-03648.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2008-03992,
author = {Junchen Lu and
Kun Zhou and
Berrak Sisman and
Haizhou Li},
title = {{VAW-GAN} for Singing Voice Conversion with Non-parallel Training
Data},
journal = {CoRR},
volume = {abs/2008.03992},
year = {2020},
url = {https://arxiv.org/abs/2008.03992},
eprinttype = {arXiv},
eprint = {2008.03992},
timestamp = {Sat, 06 Jan 2024 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2008-03992.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2008-04562,
author = {Zongyang Du and
Kun Zhou and
Berrak Sisman and
Haizhou Li},
title = {Spectrum and Prosody Conversion for Cross-lingual Voice Conversion
with CycleGAN},
journal = {CoRR},
volume = {abs/2008.04562},
year = {2020},
url = {https://arxiv.org/abs/2008.04562},
eprinttype = {arXiv},
eprint = {2008.04562},
timestamp = {Sat, 06 Jan 2024 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2008-04562.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2008-05284,
author = {Rui Liu and
Berrak Sisman and
Feilong Bao and
Guanglai Gao and
Haizhou Li},
title = {Modeling Prosodic Phrasing with Multi-Task Learning in Tacotron-based
{TTS}},
journal = {CoRR},
volume = {abs/2008.05284},
year = {2020},
url = {https://arxiv.org/abs/2008.05284},
eprinttype = {arXiv},
eprint = {2008.05284},
timestamp = {Mon, 17 Aug 2020 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2008-05284.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2010-12423,
author = {Rui Liu and
Berrak Sisman and
Haizhou Li},
title = {GraphSpeech: Syntax-Aware Graph Attention Network For Neural Speech
Synthesis},
journal = {CoRR},
volume = {abs/2010.12423},
year = {2020},
url = {https://arxiv.org/abs/2010.12423},
eprinttype = {arXiv},
eprint = {2010.12423},
timestamp = {Tue, 27 Oct 2020 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2010-12423.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2010-14794,
author = {Kun Zhou and
Berrak Sisman and
Rui Liu and
Haizhou Li},
title = {Seen and Unseen emotional style transfer for voice conversion with
a new emotional speech dataset},
journal = {CoRR},
volume = {abs/2010.14794},
year = {2020},
url = {https://arxiv.org/abs/2010.14794},
eprinttype = {arXiv},
eprint = {2010.14794},
timestamp = {Sat, 06 Jan 2024 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2010-14794.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2011-02314,
author = {Kun Zhou and
Berrak Sisman and
Haizhou Li},
title = {{VAW-GAN} for Disentanglement and Recomposition of Emotional Elements
in Speech},
journal = {CoRR},
volume = {abs/2011.02314},
year = {2020},
url = {https://arxiv.org/abs/2011.02314},
eprinttype = {arXiv},
eprint = {2011.02314},
timestamp = {Sat, 06 Jan 2024 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2011-02314.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/taslp/SismanZL19,
author = {Berrak Sisman and
Mingyang Zhang and
Haizhou Li},
title = {Group Sparse Representation With WaveNet Vocoder Adaptation for Spectrum
and Prosody Conversion},
journal = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
volume = {27},
number = {6},
pages = {1085--1097},
year = {2019},
url = {https://doi.org/10.1109/TASLP.2019.2910637},
doi = {10.1109/TASLP.2019.2910637},
timestamp = {Sun, 19 Jan 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/taslp/SismanZL19.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/apsipa/SismanVD019,
author = {Berrak Sisman and
Karthika Vijayan and
Minghui Dong and
Haizhou Li},
title = {{SINGAN:} Singing Voice Conversion with Generative Adversarial Networks},
booktitle = {2019 Asia-Pacific Signal and Information Processing Association Annual
Summit and Conference, {APSIPA} {ASC} 2019, Lanzhou, China, November
18-21, 2019},
pages = {112--118},
publisher = {{IEEE}},
year = {2019},
url = {https://doi.org/10.1109/APSIPAASC47483.2019.9023162},
doi = {10.1109/APSIPAASC47483.2019.9023162},
timestamp = {Fri, 13 Mar 2020 10:17:58 +0100},
biburl = {https://dblp.org/rec/conf/apsipa/SismanVD019.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/asru/SismanZDL19,
author = {Berrak Sisman and
Mingyang Zhang and
Minghui Dong and
Haizhou Li},
title = {On the Study of Generative Adversarial Networks for Cross-Lingual
Voice Conversion},
booktitle = {{IEEE} Automatic Speech Recognition and Understanding Workshop, {ASRU}
2019, Singapore, December 14-18, 2019},
pages = {144--151},
publisher = {{IEEE}},
year = {2019},
url = {https://doi.org/10.1109/ASRU46091.2019.9003939},
doi = {10.1109/ASRU46091.2019.9003939},
timestamp = {Sat, 09 Apr 2022 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/asru/SismanZDL19.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/TjandraS0S0019,
author = {Andros Tjandra and
Berrak Sisman and
Mingyang Zhang and
Sakriani Sakti and
Haizhou Li and
Satoshi Nakamura},
editor = {Gernot Kubin and
Zdravko Kacic},
title = {{VQVAE} Unsupervised Unit Discovery and Multi-Scale Code2Spec Inverter
for Zerospeech Challenge 2019},
booktitle = {20th Annual Conference of the International Speech Communication Association,
Interspeech 2019, Graz, Austria, September 15-19, 2019},
pages = {1118--1122},
publisher = {{ISCA}},
year = {2019},
url = {https://doi.org/10.21437/Interspeech.2019-3232},
doi = {10.21437/INTERSPEECH.2019-3232},
timestamp = {Tue, 11 Jun 2024 16:45:43 +0200},
biburl = {https://dblp.org/rec/conf/interspeech/TjandraS0S0019.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1905-11449,
author = {Andros Tjandra and
Berrak Sisman and
Mingyang Zhang and
Sakriani Sakti and
Haizhou Li and
Satoshi Nakamura},
title = {{VQVAE} Unsupervised Unit Discovery and Multi-scale Code2Spec Inverter
for Zerospeech Challenge 2019},
journal = {CoRR},
volume = {abs/1905.11449},
year = {2019},
url = {http://arxiv.org/abs/1905.11449},
eprinttype = {arXiv},
eprint = {1905.11449},
timestamp = {Thu, 06 Feb 2020 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-1905-11449.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1911-02839,
author = {Rui Liu and
Berrak Sisman and
Jingdong Li and
Feilong Bao and
Guanglai Gao and
Haizhou Li},
title = {Teacher-Student Training for Robust Tacotron-based {TTS}},
journal = {CoRR},
volume = {abs/1911.02839},
year = {2019},
url = {http://arxiv.org/abs/1911.02839},
eprinttype = {arXiv},
eprint = {1911.02839},
timestamp = {Mon, 11 Nov 2019 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-1911-02839.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/apsipa/ZhangSR0Z18,
author = {Mingyang Zhang and
Berrak Sisman and
Sai Sirisha Rallabandi and
Haizhou Li and
Li Zhao},
title = {Error Reduction Network for DBLSTM-based Voice Conversion},
booktitle = {Asia-Pacific Signal and Information Processing Association Annual
Summit and Conference, {APSIPA} {ASC} 2018, Honolulu, HI, USA, November
12-15, 2018},
pages = {823--828},
publisher = {{IEEE}},
year = {2018},
url = {https://doi.org/10.23919/APSIPA.2018.8659543},
doi = {10.23919/APSIPA.2018.8659543},
timestamp = {Tue, 07 Sep 2021 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/apsipa/ZhangSR0Z18.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/blizzard/XiaoY0SH0D018,
author = {Jinba Xiao and
Shan Yang and
Mingyang Zhang and
Berrak Sisman and
Dongyan Huang and
Lei Xie and
Minghui Dong and
Haizhou Li},
title = {The {I2R-NWPU-NUS} Text-to-Speech System for Blizzard Challenge 2018},
booktitle = {The Blizzard Challenge 2018, Hyderabad, India, September 8, 2018},
publisher = {{ISCA}},
year = {2018},
url = {https://doi.org/10.21437/Blizzard.2018-4},
doi = {10.21437/BLIZZARD.2018-4},
timestamp = {Mon, 26 Jan 2026 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/blizzard/XiaoY0SH0D018.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/SismanL18,
author = {Berrak Sisman and
Haizhou Li},
editor = {B. Yegnanarayana},
title = {Wavelet Analysis of Speaker Dependent and Independent Prosody for
Voice Conversion},
booktitle = {19th Annual Conference of the International Speech Communication Association,
Interspeech 2018, Hyderabad, India, September 2-6, 2018},
pages = {52--56},
publisher = {{ISCA}},
year = {2018},
url = {https://doi.org/10.21437/Interspeech.2018-1499},
doi = {10.21437/INTERSPEECH.2018-1499},
timestamp = {Tue, 11 Jun 2024 16:45:43 +0200},
biburl = {https://dblp.org/rec/conf/interspeech/SismanL18.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/SismanZL18,
author = {Berrak Sisman and
Mingyang Zhang and
Haizhou Li},
editor = {B. Yegnanarayana},
title = {A Voice Conversion Framework with Tandem Feature Sparse Representation
and Speaker-Adapted WaveNet Vocoder},
booktitle = {19th Annual Conference of the International Speech Communication Association,
Interspeech 2018, Hyderabad, India, September 2-6, 2018},
pages = {1978--1982},
publisher = {{ISCA}},
year = {2018},
url = {https://doi.org/10.21437/Interspeech.2018-1131},
doi = {10.21437/INTERSPEECH.2018-1131},
timestamp = {Fri, 29 Jan 2021 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/interspeech/SismanZL18.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/odyssey/SismanL018,
author = {Berrak Sisman and
Grandee Lee and
Haizhou Li},
editor = {Anthony Larcher and
Jean{-}Fran{\c{c}}ois Bonastre},
title = {Phonetically Aware Exemplar-Based Prosody Transformation},
booktitle = {Odyssey 2018: The Speaker and Language Recognition Workshop, 26-29
June 2018, Les Sables d'Olonne, France},
pages = {267--274},
publisher = {{ISCA}},
year = {2018},
url = {https://doi.org/10.21437/Odyssey.2018-38},
doi = {10.21437/ODYSSEY.2018-38},
timestamp = {Tue, 30 Jul 2024 09:37:28 +0200},
biburl = {https://dblp.org/rec/conf/odyssey/SismanL018.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/SismanZS0018,
author = {Berrak Sisman and
Mingyang Zhang and
Sakriani Sakti and
Haizhou Li and
Satoshi Nakamura},
title = {Adaptive Wavenet Vocoder for Residual Compensation in GAN-Based Voice
Conversion},
booktitle = {2018 {IEEE} Spoken Language Technology Workshop, {SLT} 2018, Athens,
Greece, December 18-21, 2018},
pages = {282--289},
publisher = {{IEEE}},
year = {2018},
url = {https://doi.org/10.1109/SLT.2018.8639507},
doi = {10.1109/SLT.2018.8639507},
timestamp = {Fri, 27 Mar 2020 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/slt/SismanZS0018.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/apsipa/SismanLT17,
author = {Berrak Sisman and
Haizhou Li and
Kay Chen Tan},
title = {Transformation of prosody in voice conversion},
booktitle = {2017 Asia-Pacific Signal and Information Processing Association Annual
Summit and Conference, {APSIPA} {ASC} 2017, Kuala Lumpur, Malaysia,
December 12-15, 2017},
pages = {1537--1546},
publisher = {{IEEE}},
year = {2017},
url = {https://doi.org/10.1109/APSIPA.2017.8282288},
doi = {10.1109/APSIPA.2017.8282288},
timestamp = {Tue, 07 May 2024 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/apsipa/SismanLT17.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/asru/CicmanLT17,
author = {Berrak Sisman and
Haizhou Li and
Kay Chen Tan},
title = {Sparse representation of phonetic features for voice conversion with
and without parallel data},
booktitle = {2017 {IEEE} Automatic Speech Recognition and Understanding Workshop,
{ASRU} 2017, Okinawa, Japan, December 16-20, 2017},
pages = {677--684},
publisher = {{IEEE}},
year = {2017},
url = {https://doi.org/10.1109/ASRU.2017.8269002},
doi = {10.1109/ASRU.2017.8269002},
timestamp = {Tue, 07 May 2024 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/asru/CicmanLT17.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ialp/SismanLLT17,
author = {Berrak Sisman and
Grandee Lee and
Haizhou Li and
Kay Chen Tan},
editor = {Rong Tong and
Yue Zhang and
Yanfeng Lu and
Minghui Dong},
title = {On the analysis and evaluation of prosody conversion techniques},
booktitle = {2017 International Conference on Asian Language Processing, {IALP}
2017, Singapore, December 5-7, 2017},
pages = {44--47},
publisher = {{IEEE}},
year = {2017},
url = {https://doi.org/10.1109/IALP.2017.8300542},
doi = {10.1109/IALP.2017.8300542},
timestamp = {Tue, 07 May 2024 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/ialp/SismanLLT17.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/wcnc/GurakanSKU16a,
author = {Berk Gurakan and
Berrak Sisman and
Onur Kaya and
Sennur Ulukus},
title = {Energy and data cooperation in energy harvesting multiple access channel},
booktitle = {{IEEE} Wireless Communications and Networking Conference, {WCNC} 2016,
Doha, Qatar, April 3-6, 2016},
pages = {1--6},
publisher = {{IEEE}},
year = {2016},
url = {https://doi.org/10.1109/WCNC.2016.7564705},
doi = {10.1109/WCNC.2016.7564705},
timestamp = {Wed, 16 Oct 2019 14:14:50 +0200},
biburl = {https://dblp.org/rec/conf/wcnc/GurakanSKU16a.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/wcnc/GurakanSKU16,
author = {Berk Gurakan and
Berrak Sisman and
Onur Kaya and
Sennur Ulukus},
title = {Energy and data cooperation in energy harvesting multiple access channel},
booktitle = {{IEEE} Wireless Communications and Networking Conference Workshops,
{WCNC} Workshops 2016, Doha, Qatar, April 3-6, 2016},
pages = {410--415},
publisher = {{IEEE}},
year = {2016},
url = {https://doi.org/10.1109/WCNCW.2016.7552734},
doi = {10.1109/WCNCW.2016.7552734},
timestamp = {Sat, 06 Sep 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/wcnc/GurakanSKU16.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}

manage site settings
To protect your privacy, all features that rely on external API calls from your browser are turned off by default. You need to opt-in for them to become active. All settings here will be stored as cookies with your web browser. For more information see our F.A.Q.


Google
Google Scholar
Semantic Scholar
Internet Archive Scholar
CiteSeerX
ORCID













