


default search action
BibTeX records: Siddhant Arora
@article{DBLP:journals/tmm/KimJRMACWR26,
author = {Minsu Kim and
Jee{-}Weon Jung and
Hyeongseop Rha and
Soumi Maiti and
Siddhant Arora and
Xuankai Chang and
Shinji Watanabe and
Yong Man Ro},
title = {{TMT:} Tri-Modal Translation Between Speech, Image, and Text by Processing
Different Modalities as Different Languages},
journal = {{IEEE} Trans. Multim.},
volume = {28},
pages = {1976--1988},
year = {2026},
url = {https://doi.org/10.1109/TMM.2026.3659297},
doi = {10.1109/TMM.2026.3659297},
timestamp = {Wed, 25 Mar 2026 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/tmm/KimJRMACWR26.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2601-02391,
author = {Zhaojiang Lin and
Yong Xu and
Kai Sun and
Jing Zheng and
Yin Huang and
Surya Teja Appini and
Krish Narang and
Renjie Tao and
Ishan Kapil Jain and
Siddhant Arora and
Ruizhi Li and
Yiteng Huang and
Kaushik Patnaik and
Wenfang Xu and
Suwon Shon and
Yue Liu and
Ahmed Aly and
Anuj Kumar and
Florian Metze and
Xin Luna Dong},
title = {WearVox: An Egocentric Multichannel Voice Assistant Benchmark for
Wearables},
journal = {CoRR},
volume = {abs/2601.02391},
year = {2026},
url = {https://doi.org/10.48550/arXiv.2601.02391},
doi = {10.48550/ARXIV.2601.02391},
eprinttype = {arXiv},
eprint = {2601.02391},
timestamp = {Wed, 11 Feb 2026 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2601-02391.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2601-19063,
author = {Siddhant Arora and
Jinchuan Tian and
Jiatong Shi and
Hayato Futami and
Yosuke Kashiwagi and
Emiru Tsunoo and
Shinji Watanabe},
title = {Optimizing Conversational Quality in Spoken Dialogue Systems with
Reinforcement Learning from {AI} Feedback},
journal = {CoRR},
volume = {abs/2601.19063},
year = {2026},
url = {https://doi.org/10.48550/arXiv.2601.19063},
doi = {10.48550/ARXIV.2601.19063},
eprinttype = {arXiv},
eprint = {2601.19063},
timestamp = {Thu, 26 Feb 2026 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2601-19063.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2602-05220,
author = {Jinchuan Tian and
Haoran Wang and
Bo{-}Hao Su and
Chien{-}Yu Huang and
Qingzheng Wang and
Jiatong Shi and
William Chen and
Xun Gong and
Siddhant Arora and
Chin{-}Jou Li and
Masao Someki and
Takashi Maekaku and
Keita Goto and
Yusuke Shinohara and
Jin Sakuma and
Chao{-}Han Huck Yang and
Shinji Watanabe},
title = {Bagpiper: Solving Open-Ended Audio Tasks via Rich Captions},
journal = {CoRR},
volume = {abs/2602.05220},
year = {2026},
url = {https://doi.org/10.48550/arXiv.2602.05220},
doi = {10.48550/ARXIV.2602.05220},
eprinttype = {arXiv},
eprint = {2602.05220},
timestamp = {Fri, 20 Mar 2026 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2602-05220.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/dai/AroraBK25,
author = {Siddhant Arora and
Ahaan Banerjee and
Nitish Katal},
title = {Enhanced urban driving scene segmentation using modified UNet with
residual convolutions and attention guided skip connections},
journal = {Discov. Artif. Intell.},
volume = {5},
number = {1},
pages = {198},
year = {2025},
url = {https://doi.org/10.1007/s44163-025-00455-x},
doi = {10.1007/S44163-025-00455-X},
timestamp = {Wed, 25 Feb 2026 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/dai/AroraBK25.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/tmlr/AroraCCPWADLLW25,
author = {Siddhant Arora and
Kai{-}Wei Chang and
Chung{-}Ming Chien and
Yifan Peng and
Haibin Wu and
Yossi Adi and
Emmanuel Dupoux and
Hung{-}yi Lee and
Karen Livescu and
Shinji Watanabe},
title = {On The Landscape of Spoken Language Models: {A} Comprehensive Survey},
journal = {Trans. Mach. Learn. Res.},
volume = {2025},
year = {2025},
url = {https://openreview.net/forum?id=BvxaP3sVbA},
timestamp = {Sun, 08 Feb 2026 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/tmlr/AroraCCPWADLLW25.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/KashiwagiFTA025,
author = {Yosuke Kashiwagi and
Hayato Futami and
Emiru Tsunoo and
Siddhant Arora and
Shinji Watanabe},
title = {Hypothesis Clustering and Merging: Novel MultiTalker Speech Recognition
with Speaker Tokens},
booktitle = {2025 {IEEE} International Conference on Acoustics, Speech and Signal
Processing, {ICASSP} 2025, Hyderabad, India, April 6-11, 2025},
pages = {1--5},
publisher = {{IEEE}},
year = {2025},
url = {https://doi.org/10.1109/ICASSP49660.2025.10888002},
doi = {10.1109/ICASSP49660.2025.10888002},
timestamp = {Thu, 25 Dec 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/icassp/KashiwagiFTA025.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/AroraLCP025,
author = {Siddhant Arora and
Zhiyun Lu and
Chung{-}Cheng Chiu and
Ruoming Pang and
Shinji Watanabe},
title = {Talking Turns: Benchmarking Audio Foundation Models on Turn-Taking
Dynamics},
booktitle = {The Thirteenth International Conference on Learning Representations,
{ICLR} 2025, Singapore, April 24-28, 2025},
publisher = {OpenReview.net},
year = {2025},
url = {https://openreview.net/forum?id=2e4ECh0ikn},
timestamp = {Thu, 15 May 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/iclr/AroraLCP025.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/SomekiPAMMSL025,
author = {Masao Someki and
Yifan Peng and
Siddhant Arora and
Markus M{\"{u}}ller and
Athanasios Mouchtaris and
Grant P. Strimel and
Jing Liu and
Shinji Watanabe},
title = {Context-aware Dynamic Pruning for Speech Foundation Models},
booktitle = {The Thirteenth International Conference on Learning Representations,
{ICLR} 2025, Singapore, April 24-28, 2025},
publisher = {OpenReview.net},
year = {2025},
url = {https://openreview.net/forum?id=u2QdCiOgwA},
timestamp = {Mon, 30 Jun 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/iclr/SomekiPAMMSL025.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/AroraTFJSKT025,
author = {Siddhant Arora and
Jinchuan Tian and
Hayato Futami and
Jee{-}weon Jung and
Jiatong Shi and
Yosuke Kashiwagi and
Emiru Tsunoo and
Shinji Watanabe},
editor = {Odette Scharenborg and
Catharine Oertel and
Khiet Truong},
title = {Chain-of-Thought Training for Open {E2E} Spoken Dialogue Systems},
booktitle = {26th Annual Conference of the International Speech Communication Association,
Interspeech 2025, Rotterdam, The Netherlands, 17-21 August 2025},
publisher = {{ISCA}},
year = {2025},
url = {https://doi.org/10.21437/Interspeech.2025-2339},
doi = {10.21437/INTERSPEECH.2025-2339},
timestamp = {Thu, 20 Nov 2025 11:03:39 +0100},
biburl = {https://dblp.org/rec/conf/interspeech/AroraTFJSKT025.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/FutamiTKISA025,
author = {Hayato Futami and
Emiru Tsunoo and
Yosuke Kashiwagi and
Yuki Ito and
Hassan Shahmohammadi and
Siddhant Arora and
Shinji Watanabe},
editor = {Odette Scharenborg and
Catharine Oertel and
Khiet Truong},
title = {Scheduled Interleaved Speech-Text Training for Speech-to-Speech Translation
with LLMs},
booktitle = {26th Annual Conference of the International Speech Communication Association,
Interspeech 2025, Rotterdam, The Netherlands, 17-21 August 2025},
publisher = {{ISCA}},
year = {2025},
url = {https://doi.org/10.21437/Interspeech.2025-1595},
doi = {10.21437/INTERSPEECH.2025-1595},
timestamp = {Thu, 20 Nov 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/interspeech/FutamiTKISA025.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/SheikhSASCL025,
author = {Zaid Sheikh and
Shuichiro Shimizu and
Siddhant Arora and
Jiatong Shi and
Samuele Cornell and
Xinjian Li and
Shinji Watanabe},
editor = {Odette Scharenborg and
Catharine Oertel and
Khiet Truong},
title = {Scalable Spontaneous Speech Dataset {(SSSD):} Crowdsourcing Data Collection
to Promote Dialogue Research},
booktitle = {26th Annual Conference of the International Speech Communication Association,
Interspeech 2025, Rotterdam, The Netherlands, 17-21 August 2025},
publisher = {{ISCA}},
year = {2025},
url = {https://doi.org/10.21437/Interspeech.2025-2362},
doi = {10.21437/INTERSPEECH.2025-2362},
timestamp = {Thu, 20 Nov 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/interspeech/SheikhSASCL025.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/TianC0SABMSGYY025,
author = {Jinchuan Tian and
William Chen and
Yifan Peng and
Jiatong Shi and
Siddhant Arora and
Shikhar Bharadwaj and
Takashi Maekaku and
Yusuke Shinohara and
Keita Goto and
Xiang Yue and
Huck Yang and
Shinji Watanabe},
editor = {Odette Scharenborg and
Catharine Oertel and
Khiet Truong},
title = {OpusLM: {A} Family of Open Unified Speech Language Models},
booktitle = {26th Annual Conference of the International Speech Communication Association,
Interspeech 2025, Rotterdam, The Netherlands, 17-21 August 2025},
publisher = {{ISCA}},
year = {2025},
url = {https://doi.org/10.21437/Interspeech.2025-1184},
doi = {10.21437/INTERSPEECH.2025-1184},
timestamp = {Thu, 25 Dec 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/interspeech/TianC0SABMSGYY025.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/naacl/TianSCAMMWPBZC025,
author = {Jinchuan Tian and
Jiatong Shi and
William Chen and
Siddhant Arora and
Yoshiki Masuyama and
Takashi Maekaku and
Yihan Wu and
Junyi Peng and
Shikhar Bharadwaj and
Yiwen Zhao and
Samuele Cornell and
Yifan Peng and
Xiang Yue and
Chao{-}Han Huck Yang and
Graham Neubig and
Shinji Watanabe},
editor = {Nouha Dziri and
Sean (Xiang) Ren and
Shizhe Diao},
title = {ESPnet-SpeechLM: An Open Speech Language Model Toolkit},
booktitle = {Proceedings of the 2025 Conference of the Nations of the Americas
Chapter of the Association for Computational Linguistics: Human Language
Technologies, {NAACL} 2025 - System Demonstrations, Albuquerque, New
Mexico, USA, April 29 - May 4, 2025},
pages = {116--124},
publisher = {Association for Computational Linguistics},
year = {2025},
url = {https://doi.org/10.18653/v1/2025.naacl-demo.12},
doi = {10.18653/V1/2025.NAACL-DEMO.12},
timestamp = {Wed, 25 Feb 2026 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/naacl/TianSCAMMWPBZC025.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/naacl/ShiSTAWPYZTZAHS25,
author = {Jiatong Shi and
Hye{-}jin Shim and
Jinchuan Tian and
Siddhant Arora and
Haibin Wu and
Darius Petermann and
Jia Qi Yip and
You Zhang and
Yuxun Tang and
Wangyou Zhang and
Dareen Alharthi and
Yichen Huang and
Koichi Saito and
Jionghao Han and
Yiwen Zhao and
Chris Donahue and
Shinji Watanabe},
editor = {Nouha Dziri and
Sean (Xiang) Ren and
Shizhe Diao},
title = {{VERSA:} {A} Versatile Evaluation Toolkit for Speech, Audio, and Music},
booktitle = {Proceedings of the 2025 Conference of the Nations of the Americas
Chapter of the Association for Computational Linguistics: Human Language
Technologies, {NAACL} 2025 - System Demonstrations, Albuquerque, New
Mexico, USA, April 29 - May 4, 2025},
pages = {191--209},
publisher = {Association for Computational Linguistics},
year = {2025},
url = {https://doi.org/10.18653/v1/2025.naacl-demo.19},
doi = {10.18653/V1/2025.NAACL-DEMO.19},
timestamp = {Tue, 03 Feb 2026 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/naacl/ShiSTAWPYZTZAHS25.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/naacl/Arora0STCBFKTSS25,
author = {Siddhant Arora and
Yifan Peng and
Jiatong Shi and
Jinchuan Tian and
William Chen and
Shikhar Bharadwaj and
Hayato Futami and
Yosuke Kashiwagi and
Emiru Tsunoo and
Shuichiro Shimizu and
Vaibhav Srivastav and
Shinji Watanabe},
editor = {Nouha Dziri and
Sean (Xiang) Ren and
Shizhe Diao},
title = {ESPnet-SDS: Unified Toolkit and Demo for Spoken Dialogue Systems},
booktitle = {Proceedings of the 2025 Conference of the Nations of the Americas
Chapter of the Association for Computational Linguistics: Human Language
Technologies, {NAACL} 2025 - System Demonstrations, Albuquerque, New
Mexico, USA, April 29 - May 4, 2025},
pages = {248--259},
publisher = {Association for Computational Linguistics},
year = {2025},
url = {https://doi.org/10.18653/v1/2025.naacl-demo.21},
doi = {10.18653/V1/2025.NAACL-DEMO.21},
timestamp = {Tue, 03 Feb 2026 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/naacl/Arora0STCBFKTSS25.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2502-15218,
author = {Jinchuan Tian and
Jiatong Shi and
William Chen and
Siddhant Arora and
Yoshiki Masuyama and
Takashi Maekaku and
Yihan Wu and
Junyi Peng and
Shikhar Bharadwaj and
Yiwen Zhao and
Samuele Cornell and
Yifan Peng and
Xiang Yue and
Chao{-}Han Huck Yang and
Graham Neubig and
Shinji Watanabe},
title = {ESPnet-SpeechLM: An Open Speech Language Model Toolkit},
journal = {CoRR},
volume = {abs/2502.15218},
year = {2025},
url = {https://doi.org/10.48550/arXiv.2502.15218},
doi = {10.48550/ARXIV.2502.15218},
eprinttype = {arXiv},
eprint = {2502.15218},
timestamp = {Sat, 07 Feb 2026 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2502-15218.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2503-01174,
author = {Siddhant Arora and
Zhiyun Lu and
Chung{-}Cheng Chiu and
Ruoming Pang and
Shinji Watanabe},
title = {Talking Turns: Benchmarking Audio Foundation Models on Turn-Taking
Dynamics},
journal = {CoRR},
volume = {abs/2503.01174},
year = {2025},
url = {https://doi.org/10.48550/arXiv.2503.01174},
doi = {10.48550/ARXIV.2503.01174},
eprinttype = {arXiv},
eprint = {2503.01174},
timestamp = {Thu, 25 Dec 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2503-01174.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2503-08533,
author = {Siddhant Arora and
Yifan Peng and
Jiatong Shi and
Jinchuan Tian and
William Chen and
Shikhar Bharadwaj and
Hayato Futami and
Yosuke Kashiwagi and
Emiru Tsunoo and
Shuichiro Shimizu and
Vaibhav Srivastav and
Shinji Watanabe},
title = {ESPnet-SDS: Unified Toolkit and Demo for Spoken Dialogue Systems},
journal = {CoRR},
volume = {abs/2503.08533},
year = {2025},
url = {https://doi.org/10.48550/arXiv.2503.08533},
doi = {10.48550/ARXIV.2503.08533},
eprinttype = {arXiv},
eprint = {2503.08533},
timestamp = {Thu, 25 Dec 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2503-08533.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2504-08528,
author = {Siddhant Arora and
Kai{-}Wei Chang and
Chung{-}Ming Chien and
Yifan Peng and
Haibin Wu and
Yossi Adi and
Emmanuel Dupoux and
Hung{-}Yi Lee and
Karen Livescu and
Shinji Watanabe},
title = {On The Landscape of Spoken Language Models: {A} Comprehensive Survey},
journal = {CoRR},
volume = {abs/2504.08528},
year = {2025},
url = {https://doi.org/10.48550/arXiv.2504.08528},
doi = {10.48550/ARXIV.2504.08528},
eprinttype = {arXiv},
eprint = {2504.08528},
timestamp = {Sun, 04 Jan 2026 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2504-08528.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2505-03054,
author = {Orevaoghene Ahia and
Martijn Bartelds and
Kabir Ahuja and
Hila Gonen and
Valentin Hofmann and
Siddhant Arora and
Shuyue Stella Li and
Vishal Puttagunta and
Mofetoluwa Adeyemi and
Charishma Buchireddy and
Ben Walls and
Noah Bennett and
Shinji Watanabe and
Noah A. Smith and
Yulia Tsvetkov and
Sachin Kumar},
title = {{BLAB:} Brutally Long Audio Bench},
journal = {CoRR},
volume = {abs/2505.03054},
year = {2025},
url = {https://doi.org/10.48550/arXiv.2505.03054},
doi = {10.48550/ARXIV.2505.03054},
eprinttype = {arXiv},
eprint = {2505.03054},
timestamp = {Thu, 25 Dec 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2505-03054.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2505-24518,
author = {Jiatong Shi and
Yifan Cheng and
Bo{-}Hao Su and
Hye{-}jin Shim and
Jinchuan Tian and
Samuele Cornell and
Yiwen Zhao and
Siddhant Arora and
Shinji Watanabe},
title = {{ARECHO:} Autoregressive Evaluation via Chain-Based Hypothesis Optimization
for Speech Multi-Metric Estimation},
journal = {CoRR},
volume = {abs/2505.24518},
year = {2025},
url = {https://doi.org/10.48550/arXiv.2505.24518},
doi = {10.48550/ARXIV.2505.24518},
eprinttype = {arXiv},
eprint = {2505.24518},
timestamp = {Thu, 25 Dec 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2505-24518.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2506-00722,
author = {Siddhant Arora and
Jinchuan Tian and
Hayato Futami and
Jee{-}weon Jung and
Jiatong Shi and
Yosuke Kashiwagi and
Emiru Tsunoo and
Shinji Watanabe},
title = {Chain-of-Thought Training for Open {E2E} Spoken Dialogue Systems},
journal = {CoRR},
volume = {abs/2506.00722},
year = {2025},
url = {https://doi.org/10.48550/arXiv.2506.00722},
doi = {10.48550/ARXIV.2506.00722},
eprinttype = {arXiv},
eprint = {2506.00722},
timestamp = {Thu, 25 Dec 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2506-00722.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2506-10299,
author = {Hayato Futami and
Emiru Tsunoo and
Yosuke Kashiwagi and
Yuki Ito and
Hassan Shahmohammadi and
Siddhant Arora and
Shinji Watanabe},
title = {Scheduled Interleaved Speech-Text Training for Speech-to-Speech Translation
with LLMs},
journal = {CoRR},
volume = {abs/2506.10299},
year = {2025},
url = {https://doi.org/10.48550/arXiv.2506.10299},
doi = {10.48550/ARXIV.2506.10299},
eprinttype = {arXiv},
eprint = {2506.10299},
timestamp = {Thu, 25 Dec 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2506-10299.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2506-17611,
author = {Jinchuan Tian and
William Chen and
Yifan Peng and
Jiatong Shi and
Siddhant Arora and
Shikhar Bharadwaj and
Takashi Maekaku and
Yusuke Shinohara and
Keita Goto and
Xiang Yue and
Huck Yang and
Shinji Watanabe},
title = {OpusLM: {A} Family of Open Unified Speech Language Models},
journal = {CoRR},
volume = {abs/2506.17611},
year = {2025},
url = {https://doi.org/10.48550/arXiv.2506.17611},
doi = {10.48550/ARXIV.2506.17611},
eprinttype = {arXiv},
eprint = {2506.17611},
timestamp = {Thu, 25 Dec 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2506-17611.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2506-23049,
author = {Leander Melroy Maben and
Gayathri Ganesh Lakshmy and
Srijith Radhakrishnan and
Siddhant Arora and
Shinji Watanabe},
title = {{AURA:} Agent for Understanding, Reasoning, and Automated Tool Use
in Voice-Driven Tasks},
journal = {CoRR},
volume = {abs/2506.23049},
year = {2025},
url = {https://doi.org/10.48550/arXiv.2506.23049},
doi = {10.48550/ARXIV.2506.23049},
eprinttype = {arXiv},
eprint = {2506.23049},
timestamp = {Thu, 25 Dec 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2506-23049.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2508-16576,
author = {Anyu Ying and
Natarajan Balaji Shankar and
Chyi{-}Jiunn Lin and
Mohan Shi and
Pu Wang and
Hye{-}jin Shim and
Siddhant Arora and
Hugo Van hamme and
Abeer Alwan and
Shinji Watanabe},
title = {Benchmarking Training Paradigms, Dataset Composition, and Model Scaling
for Child {ASR} in ESPnet},
journal = {CoRR},
volume = {abs/2508.16576},
year = {2025},
url = {https://doi.org/10.48550/arXiv.2508.16576},
doi = {10.48550/ARXIV.2508.16576},
eprinttype = {arXiv},
eprint = {2508.16576},
timestamp = {Thu, 25 Dec 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2508-16576.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2510-00982,
author = {Emiru Tsunoo and
Hayato Futami and
Yosuke Kashiwagi and
Siddhant Arora and
Shinji Watanabe},
title = {Spiralformer: Low Latency Encoder for Streaming Speech Recognition
with Circular Layer Skipping and Early Exiting},
journal = {CoRR},
volume = {abs/2510.00982},
year = {2025},
url = {https://doi.org/10.48550/arXiv.2510.00982},
doi = {10.48550/ARXIV.2510.00982},
eprinttype = {arXiv},
eprint = {2510.00982},
timestamp = {Mon, 17 Nov 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2510-00982.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2510-02044,
author = {Siddhant Arora and
Haidar Khan and
Kai Sun and
Xin Luna Dong and
Sajal Choudhary and
Seungwhan Moon and
Xinyuan Zhang and
Adithya Sagar and
Surya Teja Appini and
Kaushik Patnaik and
Sanat Sharma and
Shinji Watanabe and
Anuj Kumar and
Ahmed Aly and
Yue Liu and
Florian Metze and
Zhaojiang Lin},
title = {Stream {RAG:} Instant and Accurate Spoken Dialogue Systems with Streaming
Tool Usage},
journal = {CoRR},
volume = {abs/2510.02044},
year = {2025},
url = {https://doi.org/10.48550/arXiv.2510.02044},
doi = {10.48550/ARXIV.2510.02044},
eprinttype = {arXiv},
eprint = {2510.02044},
timestamp = {Mon, 10 Nov 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2510-02044.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2510-02066,
author = {Siddhant Arora and
Jinchuan Tian and
Hayato Futami and
Jiatong Shi and
Yosuke Kashiwagi and
Emiru Tsunoo and
Shinji Watanabe},
title = {Chain-of-Thought Reasoning in Streaming Full-Duplex End-to-End Spoken
Dialogue Systems},
journal = {CoRR},
volume = {abs/2510.02066},
year = {2025},
url = {https://doi.org/10.48550/arXiv.2510.02066},
doi = {10.48550/ARXIV.2510.02066},
eprinttype = {arXiv},
eprint = {2510.02066},
timestamp = {Sun, 09 Nov 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2510-02066.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/acl/AroraPCHSJDCSLL24,
author = {Siddhant Arora and
Ankita Pasad and
Chung{-}Ming Chien and
Jionghao Han and
Roshan S. Sharma and
Jee{-}weon Jung and
Hira Dhamyal and
William Chen and
Suwon Shon and
Hung{-}yi Lee and
Karen Livescu and
Shinji Watanabe},
editor = {Lun{-}Wei Ku and
Andre Martins and
Vivek Srikumar},
title = {On the Evaluation of Speech Foundation Models for Spoken Language
Understanding},
booktitle = {Findings of the Association for Computational Linguistics, {ACL} 2024,
Bangkok, Thailand and virtual meeting, August 11-16, 2024},
series = {Findings of {ACL}},
pages = {11923--11938},
publisher = {Association for Computational Linguistics},
year = {2024},
url = {https://doi.org/10.18653/v1/2024.findings-acl.709},
doi = {10.18653/V1/2024.FINDINGS-ACL.709},
timestamp = {Tue, 27 Jan 2026 20:26:47 +0100},
biburl = {https://dblp.org/rec/conf/acl/AroraPCHSJDCSLL24.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/coling/GuptaGBPOANSW24,
author = {Sonu Gupta and
Geetika Gopi and
Harish Balaji and
Ellen Poplavska and
Nora O'Toole and
Siddhant Arora and
Thomas B. Norton and
Norman M. Sadeh and
Shomir Wilson},
editor = {Nicoletta Calzolari and
Min{-}Yen Kan and
V{\'{e}}ronique Hoste and
Alessandro Lenci and
Sakriani Sakti and
Nianwen Xue},
title = {Creation and Analysis of an International Corpus of Privacy Laws},
booktitle = {Proceedings of the 2024 Joint International Conference on Computational
Linguistics, Language Resources and Evaluation, {LREC/COLING} 2024,
20-25 May, 2024, Torino, Italy},
pages = {4092--4105},
publisher = {{ELRA} and {ICCL}},
year = {2024},
url = {https://aclanthology.org/2024.lrec-main.365},
timestamp = {Thu, 23 May 2024 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/coling/GuptaGBPOANSW24.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/FutamiTKOA024,
author = {Hayato Futami and
Emiru Tsunoo and
Yosuke Kashiwagi and
Hiroaki Ogawa and
Siddhant Arora and
Shinji Watanabe},
title = {Phoneme-Aware Encoding for Prefix-Tree-Based Contextual {ASR}},
booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing,
{ICASSP} 2024, Seoul, Republic of Korea, April 14-19, 2024},
pages = {10641--10645},
publisher = {{IEEE}},
year = {2024},
url = {https://doi.org/10.1109/ICASSP48485.2024.10447652},
doi = {10.1109/ICASSP48485.2024.10447652},
timestamp = {Thu, 25 Dec 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/icassp/FutamiTKOA024.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/AroraS0K24,
author = {Siddhant Arora and
George Saon and
Shinji Watanabe and
Brian Kingsbury},
title = {Semi-Autoregressive Streaming {ASR} with Label Context},
booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing,
{ICASSP} 2024, Seoul, Republic of Korea, April 14-19, 2024},
pages = {11681--11685},
publisher = {{IEEE}},
year = {2024},
url = {https://doi.org/10.1109/ICASSP48485.2024.10446807},
doi = {10.1109/ICASSP48485.2024.10446807},
timestamp = {Thu, 25 Dec 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/icassp/AroraS0K24.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/HuangLWHKWACSPS24,
author = {Chien{-}Yu Huang and
Ke{-}Han Lu and
Shih{-}Heng Wang and
Chi{-}Yuan Hsiao and
Chun{-}Yi Kuan and
Haibin Wu and
Siddhant Arora and
Kai{-}Wei Chang and
Jiatong Shi and
Yifan Peng and
Roshan S. Sharma and
Shinji Watanabe and
Bhiksha Ramakrishnan and
Shady Shehata and
Hung{-}Yi Lee},
title = {Dynamic-Superb: Towards a Dynamic, Collaborative, and Comprehensive
Instruction-Tuning Benchmark For Speech},
booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing,
{ICASSP} 2024, Seoul, Republic of Korea, April 14-19, 2024},
pages = {12136--12140},
publisher = {{IEEE}},
year = {2024},
url = {https://doi.org/10.1109/ICASSP48485.2024.10448257},
doi = {10.1109/ICASSP48485.2024.10448257},
timestamp = {Sun, 04 Jan 2026 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/icassp/HuangLWHKWACSPS24.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/FutamiAKT024,
author = {Hayato Futami and
Siddhant Arora and
Yosuke Kashiwagi and
Emiru Tsunoo and
Shinji Watanabe},
editor = {Itshak Lapidot and
Sharon Gannot},
title = {Finding Task-specific Subnetworks in Multi-task Spoken Language Understanding
Model},
booktitle = {25th Annual Conference of the International Speech Communication Association,
Interspeech 2024, Kos, Greece, September 1-5, 2024},
publisher = {{ISCA}},
year = {2024},
url = {https://doi.org/10.21437/Interspeech.2024-712},
doi = {10.21437/INTERSPEECH.2024-712},
timestamp = {Thu, 25 Dec 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/interspeech/FutamiAKT024.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/Jung0E0STAYC24,
author = {Jee{-}weon Jung and
Xin Wang and
Nicholas W. D. Evans and
Shinji Watanabe and
Hye{-}jin Shim and
Hemlata Tak and
Siddhant Arora and
Junichi Yamagishi and
Joon Son Chung},
editor = {Itshak Lapidot and
Sharon Gannot},
title = {To what extent can {ASV} systems naturally defend against spoofing
attacks?},
booktitle = {25th Annual Conference of the International Speech Communication Association,
Interspeech 2024, Kos, Greece, September 1-5, 2024},
publisher = {{ISCA}},
year = {2024},
url = {https://doi.org/10.21437/Interspeech.2024-1354},
doi = {10.21437/INTERSPEECH.2024-1354},
timestamp = {Thu, 25 Dec 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/interspeech/Jung0E0STAYC24.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/KashiwagiFTA024,
author = {Yosuke Kashiwagi and
Hayato Futami and
Emiru Tsunoo and
Siddhant Arora and
Shinji Watanabe},
editor = {Itshak Lapidot and
Sharon Gannot},
title = {Rapid Language Adaptation for Multilingual {E2E} Speech Recognition
Using Encoder Prompting},
booktitle = {25th Annual Conference of the International Speech Communication Association,
Interspeech 2024, Kos, Greece, September 1-5, 2024},
publisher = {{ISCA}},
year = {2024},
url = {https://doi.org/10.21437/Interspeech.2024-702},
doi = {10.21437/INTERSPEECH.2024-702},
timestamp = {Thu, 25 Dec 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/interspeech/KashiwagiFTA024.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/PengTCAYS0CSCJ024,
author = {Yifan Peng and
Jinchuan Tian and
William Chen and
Siddhant Arora and
Brian Yan and
Yui Sudo and
Muhammad Shakeel and
Kwanghee Choi and
Jiatong Shi and
Xuankai Chang and
Jee{-}weon Jung and
Shinji Watanabe},
editor = {Itshak Lapidot and
Sharon Gannot},
title = {{OWSM} v3.1: Better and Faster Open Whisper-Style Speech Models based
on E-Branchformer},
booktitle = {25th Annual Conference of the International Speech Communication Association,
Interspeech 2024, Kos, Greece, September 1-5, 2024},
publisher = {{ISCA}},
year = {2024},
url = {https://doi.org/10.21437/Interspeech.2024-1194},
doi = {10.21437/INTERSPEECH.2024-1194},
timestamp = {Thu, 25 Dec 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/interspeech/PengTCAYS0CSCJ024.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/TsunooFKA024,
author = {Emiru Tsunoo and
Hayato Futami and
Yosuke Kashiwagi and
Siddhant Arora and
Shinji Watanabe},
editor = {Itshak Lapidot and
Sharon Gannot},
title = {Decoder-only Architecture for Streaming End-to-end Speech Recognition},
booktitle = {25th Annual Conference of the International Speech Communication Association,
Interspeech 2024, Kos, Greece, September 1-5, 2024},
publisher = {{ISCA}},
year = {2024},
url = {https://doi.org/10.21437/Interspeech.2024-705},
doi = {10.21437/INTERSPEECH.2024-705},
timestamp = {Thu, 25 Dec 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/interspeech/TsunooFKA024.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/naacl/AroraFJPSKTL024,
author = {Siddhant Arora and
Hayato Futami and
Jee{-}weon Jung and
Yifan Peng and
Roshan S. Sharma and
Yosuke Kashiwagi and
Emiru Tsunoo and
Karen Livescu and
Shinji Watanabe},
editor = {Kevin Duh and
Helena G{\'{o}}mez{-}Adorno and
Steven Bethard},
title = {UniverSLU: Universal Spoken Language Understanding for Diverse Tasks
with Natural Language Instructions},
booktitle = {Proceedings of the 2024 Conference of the North American Chapter of
the Association for Computational Linguistics: Human Language Technologies
(Volume 1: Long Papers), {NAACL} 2024, Mexico City, Mexico, June 16-21,
2024},
pages = {2754--2774},
publisher = {Association for Computational Linguistics},
year = {2024},
url = {https://doi.org/10.18653/v1/2024.naacl-long.151},
doi = {10.18653/V1/2024.NAACL-LONG.151},
timestamp = {Thu, 25 Dec 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/naacl/AroraFJPSKTL024.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/SomekiCACCHPSSW24,
author = {Masao Someki and
Kwanghee Choi and
Siddhant Arora and
William Chen and
Samuele Cornell and
Jionghao Han and
Yifan Peng and
Jiatong Shi and
Vaibhav Srivastav and
Shinji Watanabe},
title = {ESPnet-EZ: Python-Only ESPnet For Easy Fine-Tuning And Integration},
booktitle = {{IEEE} Spoken Language Technology Workshop, {SLT} 2024, Macao, December
2-5, 2024},
pages = {863--870},
publisher = {{IEEE}},
year = {2024},
url = {https://doi.org/10.1109/SLT61566.2024.10832148},
doi = {10.1109/SLT61566.2024.10832148},
timestamp = {Thu, 25 Dec 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/slt/SomekiCACCHPSSW24.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2401-16658,
author = {Yifan Peng and
Jinchuan Tian and
William Chen and
Siddhant Arora and
Brian Yan and
Yui Sudo and
Muhammad Shakeel and
Kwanghee Choi and
Jiatong Shi and
Xuankai Chang and
Jee{-}weon Jung and
Shinji Watanabe},
title = {{OWSM} v3.1: Better and Faster Open Whisper-Style Speech Models based
on E-Branchformer},
journal = {CoRR},
volume = {abs/2401.16658},
year = {2024},
url = {https://doi.org/10.48550/arXiv.2401.16658},
doi = {10.48550/ARXIV.2401.16658},
eprinttype = {arXiv},
eprint = {2401.16658},
timestamp = {Fri, 27 Jun 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2401-16658.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2402-16021,
author = {Minsu Kim and
Jee{-}weon Jung and
Hyeongseop Rha and
Soumi Maiti and
Siddhant Arora and
Xuankai Chang and
Shinji Watanabe and
Yong Man Ro},
title = {{TMT:} Tri-Modal Translation between Speech, Image, and Text by Processing
Different Modalities as Different Languages},
journal = {CoRR},
volume = {abs/2402.16021},
year = {2024},
url = {https://doi.org/10.48550/arXiv.2402.16021},
doi = {10.48550/ARXIV.2402.16021},
eprinttype = {arXiv},
eprint = {2402.16021},
timestamp = {Mon, 03 Mar 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2402-16021.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2406-10083,
author = {Siddhant Arora and
Ankita Pasad and
Chung{-}Ming Chien and
Jionghao Han and
Roshan S. Sharma and
Jee{-}weon Jung and
Hira Dhamyal and
William Chen and
Suwon Shon and
Hung{-}yi Lee and
Karen Livescu and
Shinji Watanabe},
title = {On the Evaluation of Speech Foundation Models for Spoken Language
Understanding},
journal = {CoRR},
volume = {abs/2406.10083},
year = {2024},
url = {https://doi.org/10.48550/arXiv.2406.10083},
doi = {10.48550/ARXIV.2406.10083},
eprinttype = {arXiv},
eprint = {2406.10083},
timestamp = {Thu, 25 Dec 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2406-10083.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2406-12317,
author = {Hayato Futami and
Siddhant Arora and
Yosuke Kashiwagi and
Emiru Tsunoo and
Shinji Watanabe},
title = {Finding Task-specific Subnetworks in Multi-task Spoken Language Understanding
Model},
journal = {CoRR},
volume = {abs/2406.12317},
year = {2024},
url = {https://doi.org/10.48550/arXiv.2406.12317},
doi = {10.48550/ARXIV.2406.12317},
eprinttype = {arXiv},
eprint = {2406.12317},
timestamp = {Thu, 25 Dec 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2406-12317.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2406-12611,
author = {Yosuke Kashiwagi and
Hayato Futami and
Emiru Tsunoo and
Siddhant Arora and
Shinji Watanabe},
title = {Rapid Language Adaptation for Multilingual {E2E} Speech Recognition
Using Encoder Prompting},
journal = {CoRR},
volume = {abs/2406.12611},
year = {2024},
url = {https://doi.org/10.48550/arXiv.2406.12611},
doi = {10.48550/ARXIV.2406.12611},
eprinttype = {arXiv},
eprint = {2406.12611},
timestamp = {Thu, 25 Dec 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2406-12611.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2406-16107,
author = {Emiru Tsunoo and
Hayato Futami and
Yosuke Kashiwagi and
Siddhant Arora and
Shinji Watanabe},
title = {Decoder-only Architecture for Streaming End-to-end Speech Recognition},
journal = {CoRR},
volume = {abs/2406.16107},
year = {2024},
url = {https://doi.org/10.48550/arXiv.2406.16107},
doi = {10.48550/ARXIV.2406.16107},
eprinttype = {arXiv},
eprint = {2406.16107},
timestamp = {Thu, 25 Dec 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2406-16107.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2409-09506,
author = {Masao Someki and
Kwanghee Choi and
Siddhant Arora and
William Chen and
Samuele Cornell and
Jionghao Han and
Yifan Peng and
Jiatong Shi and
Vaibhav Srivastav and
Shinji Watanabe},
title = {ESPnet-EZ: Python-only ESPnet for Easy Fine-tuning and Integration},
journal = {CoRR},
volume = {abs/2409.09506},
year = {2024},
url = {https://doi.org/10.48550/arXiv.2409.09506},
doi = {10.48550/ARXIV.2409.09506},
eprinttype = {arXiv},
eprint = {2409.09506},
timestamp = {Thu, 25 Dec 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2409-09506.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2409-11274,
author = {Yao{-}Fei Cheng and
Hayato Futami and
Yosuke Kashiwagi and
Emiru Tsunoo and
Wen Shen Teo and
Siddhant Arora and
Shinji Watanabe},
title = {Task Arithmetic for Language Expansion in Speech Translation},
journal = {CoRR},
volume = {abs/2409.11274},
year = {2024},
url = {https://doi.org/10.48550/arXiv.2409.11274},
doi = {10.48550/ARXIV.2409.11274},
eprinttype = {arXiv},
eprint = {2409.11274},
timestamp = {Thu, 25 Dec 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2409-11274.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2409-15732,
author = {Yosuke Kashiwagi and
Hayato Futami and
Emiru Tsunoo and
Siddhant Arora and
Shinji Watanabe},
title = {Hypothesis Clustering and Merging: Novel MultiTalker Speech Recognition
with Speaker Tokens},
journal = {CoRR},
volume = {abs/2409.15732},
year = {2024},
url = {https://doi.org/10.48550/arXiv.2409.15732},
doi = {10.48550/ARXIV.2409.15732},
eprinttype = {arXiv},
eprint = {2409.15732},
timestamp = {Thu, 25 Dec 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2409-15732.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2411-05361,
author = {Chien{-}yu Huang and
Wei{-}Chih Chen and
Shu{-}Wen Yang and
Andy T. Liu and
Chen{-}An Li and
Yu{-}Xiang Lin and
Wei{-}Cheng Tseng and
Anuj Diwan and
Yi{-}Jen Shih and
Jiatong Shi and
William Chen and
Xuanjun Chen and
Chi{-}Yuan Hsiao and
Puyuan Peng and
Shih{-}Heng Wang and
Chun{-}Yi Kuan and
Ke{-}Han Lu and
Kai{-}Wei Chang and
Chih{-}Kai Yang and
Fabian Ritter Gutierrez and
Ming To Chuang and
Kuan{-}Po Huang and
Siddhant Arora and
You{-}Kuan Lin and
Eunjung Yeo and
Kalvin Chang and
Chung{-}Ming Chien and
Kwanghee Choi and
Cheng{-}Hsiu Hsieh and
Yi{-}Cheng Lin and
Chee{-}En Yu and
I{-}Hsiang Chiu and
Heitor R. Guimar{\~{a}}es and
Jionghao Han and
Tzu{-}Quan Lin and
Tzu{-}Yuan Lin and
Homu Chang and
Ting{-}Wu Chang and
Chun Wei Chen and
Shou{-}Jen Chen and
Yu{-}Hua Chen and
Hsi{-}Chun Cheng and
Kunal Dhawan and
Jia{-}Lin Fang and
Shi{-}Xin Fang and
Kuan{-}Yu Fang Chiang and
Chi An Fu and
Hsien{-}Fu Hsiao and
Ching Yu Hsu and
Shao{-}Syuan Huang and
Lee Chen Wei and
Hsi{-}Che Lin and
Hsuan{-}Hao Lin and
Hsuan{-}Ting Lin and
Jian{-}Ren Lin and
Ting{-}Chun Liu and
Li{-}Chun Lu and
Tsung{-}Min Pai and
Ankita Pasad and
Shih{-}Yun Shan Kuan and
Suwon Shon and
Yuxun Tang and
Yun{-}Shao Tsai and
Jui{-}Chiang Wei and
Tzu{-}Chieh Wei and
Chengxi Wu and
Dien{-}Ruei Wu and
Chao{-}Han Huck Yang and
Chieh{-}Chi Yang and
Jia Qi Yip and
Shao{-}Xiang Yuan and
Vahid Noroozi and
Zhehuai Chen and
Haibin Wu and
Karen Livescu and
David Harwath and
Shinji Watanabe and
Hung{-}yi Lee},
title = {Dynamic-SUPERB Phase-2: {A} Collaboratively Expanding Benchmark for
Measuring the Capabilities of Spoken Language Models with 180 Tasks},
journal = {CoRR},
volume = {abs/2411.05361},
year = {2024},
url = {https://doi.org/10.48550/arXiv.2411.05361},
doi = {10.48550/ARXIV.2411.05361},
eprinttype = {arXiv},
eprint = {2411.05361},
timestamp = {Sun, 04 Jan 2026 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2411-05361.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2412-17667,
author = {Jiatong Shi and
Hye{-}jin Shim and
Jinchuan Tian and
Siddhant Arora and
Haibin Wu and
Darius Petermann and
Jia Qi Yip and
You Zhang and
Yuxun Tang and
Wangyou Zhang and
Dareen Alharthi and
Yichen Huang and
Koichi Saito and
Jionghao Han and
Yiwen Zhao and
Chris Donahue and
Shinji Watanabe},
title = {{VERSA:} {A} Versatile Evaluation Toolkit for Speech, Audio, and Music},
journal = {CoRR},
volume = {abs/2412.17667},
year = {2024},
url = {https://doi.org/10.48550/arXiv.2412.17667},
doi = {10.48550/ARXIV.2412.17667},
eprinttype = {arXiv},
eprint = {2412.17667},
timestamp = {Thu, 25 Dec 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2412-17667.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/acl/ShonALPWSWLL023,
author = {Suwon Shon and
Siddhant Arora and
Chyi{-}Jiunn Lin and
Ankita Pasad and
Felix Wu and
Roshan S. Sharma and
Wei{-}Lun Wu and
Hung{-}yi Lee and
Karen Livescu and
Shinji Watanabe},
editor = {Anna Rogers and
Jordan L. Boyd{-}Graber and
Naoaki Okazaki},
title = {{SLUE} Phase-2: {A} Benchmark Suite of Diverse Spoken Language Understanding
Tasks},
booktitle = {Proceedings of the 61st Annual Meeting of the Association for Computational
Linguistics (Volume 1: Long Papers), {ACL} 2023, Toronto, Canada,
July 9-14, 2023},
pages = {8906--8937},
publisher = {Association for Computational Linguistics},
year = {2023},
url = {https://doi.org/10.18653/v1/2023.acl-long.496},
doi = {10.18653/V1/2023.ACL-LONG.496},
timestamp = {Sun, 19 Jan 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/acl/ShonALPWSWLL023.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/asru/PengTYBCLSACSZSSJMW23,
author = {Yifan Peng and
Jinchuan Tian and
Brian Yan and
Dan Berrebbi and
Xuankai Chang and
Xinjian Li and
Jiatong Shi and
Siddhant Arora and
William Chen and
Roshan S. Sharma and
Wangyou Zhang and
Yui Sudo and
Muhammad Shakeel and
Jee{-}Weon Jung and
Soumi Maiti and
Shinji Watanabe},
title = {Reproducing Whisper-Style Training Using An Open-Source Toolkit And
Publicly Available Data},
booktitle = {{IEEE} Automatic Speech Recognition and Understanding Workshop, {ASRU}
2023, Taipei, Taiwan, December 16-20, 2023},
pages = {1--8},
publisher = {{IEEE}},
year = {2023},
url = {https://doi.org/10.1109/ASRU57964.2023.10389676},
doi = {10.1109/ASRU57964.2023.10389676},
timestamp = {Fri, 27 Jun 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/asru/PengTYBCLSACSZSSJMW23.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/asru/SharmaCKSAWODSR23,
author = {Roshan S. Sharma and
William Chen and
Takatomo Kano and
Ruchira Sharma and
Siddhant Arora and
Shinji Watanabe and
Atsunori Ogawa and
Marc Delcroix and
Rita Singh and
Bhiksha Raj},
title = {Espnet-Summ: Introducing a Novel Large Dataset, Toolkit, and a Cross-Corpora
Evaluation of Speech Summarization Systems},
booktitle = {{IEEE} Automatic Speech Recognition and Understanding Workshop, {ASRU}
2023, Taipei, Taiwan, December 16-20, 2023},
pages = {1--8},
publisher = {{IEEE}},
year = {2023},
url = {https://doi.org/10.1109/ASRU57964.2023.10389641},
doi = {10.1109/ASRU57964.2023.10389641},
timestamp = {Mon, 03 Mar 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/asru/SharmaCKSAWODSR23.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/comad/GaurSAGB23,
author = {Garima Gaur and
Rajat Singh and
Siddhant Arora and
Vinayak Gupta and
Srikanta Bedathur},
title = {Teaching Old {DB} Neu(ral) Tricks: Learning Embeddings on Multi-tabular
Databases},
booktitle = {Proceedings of the 6th Joint International Conference on Data Science
{\&} Management of Data (10th {ACM} {IKDD} {CODS} and 28th COMAD),
Mumbai, India, January 4-7, 2023},
pages = {87--94},
publisher = {{ACM}},
year = {2023},
url = {https://doi.org/10.1145/3570991.3571041},
doi = {10.1145/3570991.3571041},
timestamp = {Sun, 19 Jan 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/comad/GaurSAGB23.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/AroraFTYW23,
author = {Siddhant Arora and
Hayato Futami and
Emiru Tsunoo and
Brian Yan and
Shinji Watanabe},
title = {Joint Modelling of Spoken Language Understanding Tasks with Integrated
Dialog History},
booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing
{ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023},
pages = {1--5},
publisher = {{IEEE}},
year = {2023},
url = {https://doi.org/10.1109/ICASSP49357.2023.10095055},
doi = {10.1109/ICASSP49357.2023.10095055},
timestamp = {Mon, 03 Mar 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/icassp/AroraFTYW23.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/AroraFWHPKTYW23,
author = {Siddhant Arora and
Hayato Futami and
Shih{-}Lun Wu and
Jessica Huynh and
Yifan Peng and
Yosuke Kashiwagi and
Emiru Tsunoo and
Brian Yan and
Shinji Watanabe},
title = {A Study on the Integration of Pipeline and {E2E} {SLU} Systems for
Spoken Semantic Parsing Toward Stop Quality Challenge},
booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing
{ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023},
pages = {1--2},
publisher = {{IEEE}},
year = {2023},
url = {https://doi.org/10.1109/ICASSP49357.2023.10096175},
doi = {10.1109/ICASSP49357.2023.10096175},
timestamp = {Fri, 27 Jun 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/icassp/AroraFWHPKTYW23.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/FutamiHAWKPYTW23,
author = {Hayato Futami and
Jessica Huynh and
Siddhant Arora and
Shih{-}Lun Wu and
Yosuke Kashiwagi and
Yifan Peng and
Brian Yan and
Emiru Tsunoo and
Shinji Watanabe},
title = {The Pipeline System of {ASR} and {NLU} with MLM-based data Augmentation
Toward Stop Low-Resource Challenge},
booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing
{ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023},
pages = {1--2},
publisher = {{IEEE}},
year = {2023},
url = {https://doi.org/10.1109/ICASSP49357.2023.10096049},
doi = {10.1109/ICASSP49357.2023.10096049},
timestamp = {Fri, 27 Jun 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/icassp/FutamiHAWKPYTW23.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/FutamiTSKOAW23,
author = {Hayato Futami and
Emiru Tsunoo and
Kentaro Shibata and
Yosuke Kashiwagi and
Takao Okuda and
Siddhant Arora and
Shinji Watanabe},
title = {Streaming Joint Speech Recognition and Disfluency Detection},
booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing
{ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023},
pages = {1--5},
publisher = {{IEEE}},
year = {2023},
url = {https://doi.org/10.1109/ICASSP49357.2023.10094620},
doi = {10.1109/ICASSP49357.2023.10094620},
timestamp = {Mon, 03 Mar 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/icassp/FutamiTSKOAW23.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/KashiwagiAFHWPYTW23,
author = {Yosuke Kashiwagi and
Siddhant Arora and
Hayato Futami and
Jessica Huynh and
Shih{-}Lun Wu and
Yifan Peng and
Brian Yan and
Emiru Tsunoo and
Shinji Watanabe},
title = {E-Branchformer-Based {E2E} {SLU} Toward Stop on-Device Challenge},
booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing
{ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023},
pages = {1--2},
publisher = {{IEEE}},
year = {2023},
url = {https://doi.org/10.1109/ICASSP49357.2023.10097145},
doi = {10.1109/ICASSP49357.2023.10097145},
timestamp = {Fri, 27 Jun 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/icassp/KashiwagiAFHWPYTW23.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/KashiwagiAFHWPY23,
author = {Yosuke Kashiwagi and
Siddhant Arora and
Hayato Futami and
Jessica Huynh and
Shih{-}Lun Wu and
Yifan Peng and
Brian Yan and
Emiru Tsunoo and
Shinji Watanabe},
editor = {Naomi Harte and
Julie Carson{-}Berndsen and
Gareth Jones},
title = {Tensor decomposition for minimization of {E2E} {SLU} model toward
on-device processing},
booktitle = {24th Annual Conference of the International Speech Communication Association,
Interspeech 2023, Dublin, Ireland, August 20-24, 2023},
pages = {710--714},
publisher = {{ISCA}},
year = {2023},
url = {https://doi.org/10.21437/Interspeech.2023-1299},
doi = {10.21437/INTERSPEECH.2023-1299},
timestamp = {Fri, 27 Jun 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/interspeech/KashiwagiAFHWPY23.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/AroraFKTY023,
author = {Siddhant Arora and
Hayato Futami and
Yosuke Kashiwagi and
Emiru Tsunoo and
Brian Yan and
Shinji Watanabe},
editor = {Naomi Harte and
Julie Carson{-}Berndsen and
Gareth Jones},
title = {Integrating Pretrained {ASR} and {LM} to Perform Sequence Generation
for Spoken Language Understanding},
booktitle = {24th Annual Conference of the International Speech Communication Association,
Interspeech 2023, Dublin, Ireland, August 20-24, 2023},
pages = {720--724},
publisher = {{ISCA}},
year = {2023},
url = {https://doi.org/10.21437/Interspeech.2023-1962},
doi = {10.21437/INTERSPEECH.2023-1962},
timestamp = {Mon, 03 Mar 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/interspeech/AroraFKTY023.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/TsunooFKA023,
author = {Emiru Tsunoo and
Hayato Futami and
Yosuke Kashiwagi and
Siddhant Arora and
Shinji Watanabe},
editor = {Naomi Harte and
Julie Carson{-}Berndsen and
Gareth Jones},
title = {Integration of Frame- and Label-synchronous Beam Search for Streaming
Encoder-decoder Speech Recognition},
booktitle = {24th Annual Conference of the International Speech Communication Association,
Interspeech 2023, Dublin, Ireland, August 20-24, 2023},
pages = {1369--1373},
publisher = {{ISCA}},
year = {2023},
url = {https://doi.org/10.21437/Interspeech.2023-1517},
doi = {10.21437/INTERSPEECH.2023-1517},
timestamp = {Mon, 03 Mar 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/interspeech/TsunooFKA023.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/0001AZ0SR23,
author = {Roshan Sharma and
Siddhant Arora and
Kenneth Zheng and
Shinji Watanabe and
Rita Singh and
Bhiksha Raj},
editor = {Naomi Harte and
Julie Carson{-}Berndsen and
Gareth Jones},
title = {{BASS:} Block-wise Adaptation for Speech Summarization},
booktitle = {24th Annual Conference of the International Speech Communication Association,
Interspeech 2023, Dublin, Ireland, August 20-24, 2023},
pages = {1454--1458},
publisher = {{ISCA}},
year = {2023},
url = {https://doi.org/10.21437/Interspeech.2023-916},
doi = {10.21437/INTERSPEECH.2023-916},
timestamp = {Mon, 03 Mar 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/interspeech/0001AZ0SR23.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/PengKWYACTSS023,
author = {Yifan Peng and
Kwangyoun Kim and
Felix Wu and
Brian Yan and
Siddhant Arora and
William Chen and
Jiyang Tang and
Suwon Shon and
Prashant Sridhar and
Shinji Watanabe},
editor = {Naomi Harte and
Julie Carson{-}Berndsen and
Gareth Jones},
title = {A Comparative Study on E-Branchformer vs Conformer in Speech Recognition,
Translation, and Understanding Tasks},
booktitle = {24th Annual Conference of the International Speech Communication Association,
Interspeech 2023, Dublin, Ireland, August 20-24, 2023},
pages = {2208--2212},
publisher = {{ISCA}},
year = {2023},
url = {https://doi.org/10.21437/Interspeech.2023-1194},
doi = {10.21437/INTERSPEECH.2023-1194},
timestamp = {Fri, 27 Jun 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/interspeech/PengKWYACTSS023.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iwslt/YanSMCLPA023,
author = {Brian Yan and
Jiatong Shi and
Soumi Maiti and
William Chen and
Xinjian Li and
Yifan Peng and
Siddhant Arora and
Shinji Watanabe},
editor = {Elizabeth Salesky and
Marcello Federico and
Marine Carpuat},
title = {CMU's {IWSLT} 2023 Simultaneous Speech Translation System},
booktitle = {Proceedings of the 20th International Conference on Spoken Language
Translation, IWSLT@ACL 2023, Toronto, Canada (in-person and online),
13-14 July, 2023},
pages = {235--240},
publisher = {Association for Computational Linguistics},
year = {2023},
url = {https://doi.org/10.18653/v1/2023.iwslt-1.20},
doi = {10.18653/V1/2023.IWSLT-1.20},
timestamp = {Fri, 27 Jun 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/iwslt/YanSMCLPA023.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2305-00926,
author = {Siddhant Arora and
Hayato Futami and
Emiru Tsunoo and
Brian Yan and
Shinji Watanabe},
title = {Joint Modelling of Spoken Language Understanding Tasks with Integrated
Dialog History},
journal = {CoRR},
volume = {abs/2305.00926},
year = {2023},
url = {https://doi.org/10.48550/arXiv.2305.00926},
doi = {10.48550/ARXIV.2305.00926},
eprinttype = {arXiv},
eprint = {2305.00926},
timestamp = {Mon, 03 Mar 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2305-00926.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2305-01194,
author = {Hayato Futami and
Jessica Huynh and
Siddhant Arora and
Shih{-}Lun Wu and
Yosuke Kashiwagi and
Yifan Peng and
Brian Yan and
Emiru Tsunoo and
Shinji Watanabe},
title = {The Pipeline System of {ASR} and {NLU} with MLM-based Data Augmentation
toward {STOP} Low-resource Challenge},
journal = {CoRR},
volume = {abs/2305.01194},
year = {2023},
url = {https://doi.org/10.48550/arXiv.2305.01194},
doi = {10.48550/ARXIV.2305.01194},
eprinttype = {arXiv},
eprint = {2305.01194},
timestamp = {Fri, 27 Jun 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2305-01194.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2305-01620,
author = {Siddhant Arora and
Hayato Futami and
Shih{-}Lun Wu and
Jessica Huynh and
Yifan Peng and
Yosuke Kashiwagi and
Emiru Tsunoo and
Brian Yan and
Shinji Watanabe},
title = {A Study on the Integration of Pipeline and {E2E} {SLU} systems for
Spoken Semantic Parsing toward {STOP} Quality Challenge},
journal = {CoRR},
volume = {abs/2305.01620},
year = {2023},
url = {https://doi.org/10.48550/arXiv.2305.01620},
doi = {10.48550/ARXIV.2305.01620},
eprinttype = {arXiv},
eprint = {2305.01620},
timestamp = {Fri, 27 Jun 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2305-01620.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2305-11073,
author = {Yifan Peng and
Kwangyoun Kim and
Felix Wu and
Brian Yan and
Siddhant Arora and
William Chen and
Jiyang Tang and
Suwon Shon and
Prashant Sridhar and
Shinji Watanabe},
title = {A Comparative Study on E-Branchformer vs Conformer in Speech Recognition,
Translation, and Understanding Tasks},
journal = {CoRR},
volume = {abs/2305.11073},
year = {2023},
url = {https://doi.org/10.48550/arXiv.2305.11073},
doi = {10.48550/ARXIV.2305.11073},
eprinttype = {arXiv},
eprint = {2305.11073},
timestamp = {Fri, 27 Jun 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2305-11073.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2307-08217,
author = {Roshan S. Sharma and
Kenneth Zheng and
Siddhant Arora and
Shinji Watanabe and
Rita Singh and
Bhiksha Raj},
title = {{BASS:} Block-wise Adaptation for Speech Summarization},
journal = {CoRR},
volume = {abs/2307.08217},
year = {2023},
url = {https://doi.org/10.48550/arXiv.2307.08217},
doi = {10.48550/ARXIV.2307.08217},
eprinttype = {arXiv},
eprint = {2307.08217},
timestamp = {Wed, 26 Jul 2023 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2307-08217.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2307-11005,
author = {Siddhant Arora and
Hayato Futami and
Yosuke Kashiwagi and
Emiru Tsunoo and
Brian Yan and
Shinji Watanabe},
title = {Integrating Pretrained {ASR} and {LM} to Perform Sequence Generation
for Spoken Language Understanding},
journal = {CoRR},
volume = {abs/2307.11005},
year = {2023},
url = {https://doi.org/10.48550/arXiv.2307.11005},
doi = {10.48550/ARXIV.2307.11005},
eprinttype = {arXiv},
eprint = {2307.11005},
timestamp = {Wed, 26 Jul 2023 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2307-11005.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2307-12767,
author = {Emiru Tsunoo and
Hayato Futami and
Yosuke Kashiwagi and
Siddhant Arora and
Shinji Watanabe},
title = {Integration of Frame- and Label-synchronous Beam Search for Streaming
Encoder-decoder Speech Recognition},
journal = {CoRR},
volume = {abs/2307.12767},
year = {2023},
url = {https://doi.org/10.48550/arXiv.2307.12767},
doi = {10.48550/ARXIV.2307.12767},
eprinttype = {arXiv},
eprint = {2307.12767},
timestamp = {Tue, 01 Aug 2023 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2307-12767.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2309-08876,
author = {Emiru Tsunoo and
Hayato Futami and
Yosuke Kashiwagi and
Siddhant Arora and
Shinji Watanabe},
title = {Decoder-only Architecture for Speech Recognition with {CTC} Prompts
and Text Data Augmentation},
journal = {CoRR},
volume = {abs/2309.08876},
year = {2023},
url = {https://doi.org/10.48550/arXiv.2309.08876},
doi = {10.48550/ARXIV.2309.08876},
eprinttype = {arXiv},
eprint = {2309.08876},
timestamp = {Mon, 03 Mar 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2309-08876.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2309-09510,
author = {Chien{-}yu Huang and
Ke{-}Han Lu and
Shih{-}Heng Wang and
Chi{-}Yuan Hsiao and
Chun{-}Yi Kuan and
Haibin Wu and
Siddhant Arora and
Kai{-}Wei Chang and
Jiatong Shi and
Yifan Peng and
Roshan S. Sharma and
Shinji Watanabe and
Bhiksha Ramakrishnan and
Shady Shehata and
Hung{-}yi Lee},
title = {Dynamic-SUPERB: Towards {A} Dynamic, Collaborative, and Comprehensive
Instruction-Tuning Benchmark for Speech},
journal = {CoRR},
volume = {abs/2309.09510},
year = {2023},
url = {https://doi.org/10.48550/arXiv.2309.09510},
doi = {10.48550/ARXIV.2309.09510},
eprinttype = {arXiv},
eprint = {2309.09510},
timestamp = {Thu, 25 Dec 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2309-09510.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2309-10926,
author = {Siddhant Arora and
George Saon and
Shinji Watanabe and
Brian Kingsbury},
title = {Semi-Autoregressive Streaming {ASR} With Label Context},
journal = {CoRR},
volume = {abs/2309.10926},
year = {2023},
url = {https://doi.org/10.48550/arXiv.2309.10926},
doi = {10.48550/ARXIV.2309.10926},
eprinttype = {arXiv},
eprint = {2309.10926},
timestamp = {Mon, 03 Mar 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2309-10926.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2309-13876,
author = {Yifan Peng and
Jinchuan Tian and
Brian Yan and
Dan Berrebbi and
Xuankai Chang and
Xinjian Li and
Jiatong Shi and
Siddhant Arora and
William Chen and
Roshan S. Sharma and
Wangyou Zhang and
Yui Sudo and
Muhammad Shakeel and
Jee{-}weon Jung and
Soumi Maiti and
Shinji Watanabe},
title = {Reproducing Whisper-Style Training Using an Open-Source Toolkit and
Publicly Available Data},
journal = {CoRR},
volume = {abs/2309.13876},
year = {2023},
url = {https://doi.org/10.48550/arXiv.2309.13876},
doi = {10.48550/ARXIV.2309.13876},
eprinttype = {arXiv},
eprint = {2309.13876},
timestamp = {Fri, 27 Jun 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2309-13876.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2310-02973,
author = {Siddhant Arora and
Hayato Futami and
Jee{-}weon Jung and
Yifan Peng and
Roshan S. Sharma and
Yosuke Kashiwagi and
Emiru Tsunoo and
Shinji Watanabe},
title = {UniverSLU: Universal Spoken Language Understanding for Diverse Classification
and Sequence Generation Tasks with a Single Network},
journal = {CoRR},
volume = {abs/2310.02973},
year = {2023},
url = {https://doi.org/10.48550/arXiv.2310.02973},
doi = {10.48550/ARXIV.2310.02973},
eprinttype = {arXiv},
eprint = {2310.02973},
timestamp = {Mon, 30 Jun 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2310-02973.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2312-09582,
author = {Hayato Futami and
Emiru Tsunoo and
Yosuke Kashiwagi and
Hiroaki Ogawa and
Siddhant Arora and
Shinji Watanabe},
title = {Phoneme-aware Encoding for Prefix-tree-based Contextual {ASR}},
journal = {CoRR},
volume = {abs/2312.09582},
year = {2023},
url = {https://doi.org/10.48550/arXiv.2312.09582},
doi = {10.48550/ARXIV.2312.09582},
eprinttype = {arXiv},
eprint = {2312.09582},
timestamp = {Mon, 03 Mar 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2312-09582.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/AroraPSCLN22,
author = {Siddhant Arora and
Danish Pruthi and
Norman M. Sadeh and
William W. Cohen and
Zachary C. Lipton and
Graham Neubig},
title = {Explain, Edit, and Understand: Rethinking User Study Design for Evaluating
Model Explanations},
booktitle = {Thirty-Sixth {AAAI} Conference on Artificial Intelligence, {AAAI}
2022, Thirty-Fourth Conference on Innovative Applications of Artificial
Intelligence, {IAAI} 2022, The Twelveth Symposium on Educational Advances
in Artificial Intelligence, {EAAI} 2022 Virtual Event, February 22
- March 1, 2022},
pages = {5277--5285},
publisher = {{AAAI} Press},
year = {2022},
url = {https://doi.org/10.1609/aaai.v36i5.20464},
doi = {10.1609/AAAI.V36I5.20464},
timestamp = {Wed, 18 Mar 2026 17:07:12 +0100},
biburl = {https://dblp.org/rec/conf/aaai/AroraPSCLN22.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/emnlp/AroraDYMB022,
author = {Siddhant Arora and
Siddharth Dalmia and
Brian Yan and
Florian Metze and
Alan W. Black and
Shinji Watanabe},
editor = {Yoav Goldberg and
Zornitsa Kozareva and
Yue Zhang},
title = {Token-level Sequence Labeling for Spoken Language Understanding using
Compositional End-to-End Models},
booktitle = {Findings of the Association for Computational Linguistics: {EMNLP}
2022, Abu Dhabi, United Arab Emirates, December 7-11, 2022},
series = {Findings of {ACL}},
pages = {5419--5429},
publisher = {Association for Computational Linguistics},
year = {2022},
url = {https://doi.org/10.18653/v1/2022.findings-emnlp.396},
doi = {10.18653/V1/2022.FINDINGS-EMNLP.396},
timestamp = {Tue, 27 Jan 2026 20:14:34 +0100},
biburl = {https://dblp.org/rec/conf/emnlp/AroraDYMB022.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/emnlp/HiguchiYAOK022,
author = {Yosuke Higuchi and
Brian Yan and
Siddhant Arora and
Tetsuji Ogawa and
Tetsunori Kobayashi and
Shinji Watanabe},
editor = {Yoav Goldberg and
Zornitsa Kozareva and
Yue Zhang},
title = {{BERT} Meets {CTC:} New Formulation of End-to-End Speech Recognition
with Pre-trained Masked Language Model},
booktitle = {Findings of the Association for Computational Linguistics: {EMNLP}
2022, Abu Dhabi, United Arab Emirates, December 7-11, 2022},
series = {Findings of {ACL}},
pages = {5486--5503},
publisher = {Association for Computational Linguistics},
year = {2022},
url = {https://doi.org/10.18653/v1/2022.findings-emnlp.402},
doi = {10.18653/V1/2022.FINDINGS-EMNLP.402},
timestamp = {Thu, 10 Aug 2023 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/emnlp/HiguchiYAOK022.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/AroraDDCUPZKGYV22,
author = {Siddhant Arora and
Siddharth Dalmia and
Pavel Denisov and
Xuankai Chang and
Yushi Ueda and
Yifan Peng and
Yuekai Zhang and
Sujay Kumar and
Karthik Ganesan and
Brian Yan and
Ngoc Thang Vu and
Alan W. Black and
Shinji Watanabe},
title = {ESPnet-SLU: Advancing Spoken Language Understanding Through ESPnet},
booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing,
{ICASSP} 2022, Virtual and Singapore, 23-27 May 2022},
pages = {7167--7171},
publisher = {{IEEE}},
year = {2022},
url = {https://doi.org/10.1109/ICASSP43922.2022.9747674},
doi = {10.1109/ICASSP43922.2022.9747674},
timestamp = {Fri, 27 Jun 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/icassp/AroraDDCUPZKGYV22.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/Deng0SA22,
author = {Keqi Deng and
Shinji Watanabe and
Jiatong Shi and
Siddhant Arora},
editor = {Hanseok Ko and
John H. L. Hansen},
title = {Blockwise Streaming Transformer for Spoken Language Understanding
and Simultaneous Speech Translation},
booktitle = {23rd Annual Conference of the International Speech Communication Association,
Interspeech 2022, Incheon, Korea, September 18-22, 2022},
pages = {1746--1750},
publisher = {{ISCA}},
year = {2022},
url = {https://doi.org/10.21437/Interspeech.2022-933},
doi = {10.21437/INTERSPEECH.2022-933},
timestamp = {Tue, 11 Jun 2024 16:45:43 +0200},
biburl = {https://dblp.org/rec/conf/interspeech/Deng0SA22.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/AroraDCYB022,
author = {Siddhant Arora and
Siddharth Dalmia and
Xuankai Chang and
Brian Yan and
Alan W. Black and
Shinji Watanabe},
editor = {Hanseok Ko and
John H. L. Hansen},
title = {Two-Pass Low Latency End-to-End Spoken Language Understanding},
booktitle = {23rd Annual Conference of the International Speech Communication Association,
Interspeech 2022, Incheon, Korea, September 18-22, 2022},
pages = {3478--3482},
publisher = {{ISCA}},
year = {2022},
url = {https://doi.org/10.21437/Interspeech.2022-10890},
doi = {10.21437/INTERSPEECH.2022-10890},
timestamp = {Wed, 21 Jun 2023 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/interspeech/AroraDCYB022.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/lrec/AroraHUKDRSMCDN22,
author = {Siddhant Arora and
Henry Hosseini and
Christine Utz and
Vinayshekhar Bannihatti Kumar and
Tristan Dhellemmes and
Abhilasha Ravichander and
Peter Story and
Jasmine Mangat and
Rex Chen and
Martin Degeling and
Thomas B. Norton and
Thomas Hupperich and
Shomir Wilson and
Norman M. Sadeh},
editor = {Nicoletta Calzolari and
Fr{\'{e}}d{\'{e}}ric B{\'{e}}chet and
Philippe Blache and
Khalid Choukri and
Christopher Cieri and
Thierry Declerck and
Sara Goggi and
Hitoshi Isahara and
Bente Maegaard and
Joseph Mariani and
H{\'{e}}l{\`{e}}ne Mazo and
Jan Odijk and
Stelios Piperidis},
title = {A Tale of Two Regulatory Regimes: Creation and Analysis of a Bilingual
Privacy Policy Corpus},
booktitle = {Proceedings of the Thirteenth Language Resources and Evaluation Conference,
{LREC} 2022, Marseille, France, 20-25 June 2022},
pages = {5460--5472},
publisher = {European Language Resources Association},
year = {2022},
url = {https://aclanthology.org/2022.lrec-1.585},
timestamp = {Mon, 10 Oct 2022 16:57:52 +0200},
biburl = {https://dblp.org/rec/conf/lrec/AroraHUKDRSMCDN22.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/PengAHUKGDCW22,
author = {Yifan Peng and
Siddhant Arora and
Yosuke Higuchi and
Yushi Ueda and
Sujay Kumar and
Karthik Ganesan and
Siddharth Dalmia and
Xuankai Chang and
Shinji Watanabe},
title = {A Study on the Integration of Pre-Trained SSL, ASR, {LM} and {SLU}
Models for Spoken Language Understanding},
booktitle = {{IEEE} Spoken Language Technology Workshop, {SLT} 2022, Doha, Qatar,
January 9-12, 2023},
pages = {406--413},
publisher = {{IEEE}},
year = {2022},
url = {https://doi.org/10.1109/SLT54892.2023.10022399},
doi = {10.1109/SLT54892.2023.10022399},
timestamp = {Fri, 27 Jun 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/slt/PengAHUKGDCW22.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2204-08920,
author = {Keqi Deng and
Shinji Watanabe and
Jiatong Shi and
Siddhant Arora},
title = {Blockwise Streaming Transformer for Spoken Language Understanding
and Simultaneous Speech Translation},
journal = {CoRR},
volume = {abs/2204.08920},
year = {2022},
url = {https://doi.org/10.48550/arXiv.2204.08920},
doi = {10.48550/ARXIV.2204.08920},
eprinttype = {arXiv},
eprint = {2204.08920},
timestamp = {Mon, 03 Mar 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2204-08920.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2206-14169,
author = {Sonu Gupta and
Ellen Poplavska and
Nora O'Toole and
Siddhant Arora and
Thomas B. Norton and
Norman M. Sadeh and
Shomir Wilson},
title = {Creation and Analysis of an International Corpus of Privacy Laws},
journal = {CoRR},
volume = {abs/2206.14169},
year = {2022},
url = {https://doi.org/10.48550/arXiv.2206.14169},
doi = {10.48550/ARXIV.2206.14169},
eprinttype = {arXiv},
eprint = {2206.14169},
timestamp = {Mon, 04 Jul 2022 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2206-14169.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2207-06670,
author = {Siddhant Arora and
Siddharth Dalmia and
Xuankai Chang and
Brian Yan and
Alan W. Black and
Shinji Watanabe},
title = {Two-Pass Low Latency End-to-End Spoken Language Understanding},
journal = {CoRR},
volume = {abs/2207.06670},
year = {2022},
url = {https://doi.org/10.48550/arXiv.2207.06670},
doi = {10.48550/ARXIV.2207.06670},
eprinttype = {arXiv},
eprint = {2207.06670},
timestamp = {Tue, 21 Mar 2023 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2207-06670.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2210-15734,
author = {Siddhant Arora and
Siddharth Dalmia and
Brian Yan and
Florian Metze and
Alan W. Black and
Shinji Watanabe},
title = {Token-level Sequence Labeling for Spoken Language Understanding using
Compositional End-to-End Models},
journal = {CoRR},
volume = {abs/2210.15734},
year = {2022},
url = {https://doi.org/10.48550/arXiv.2210.15734},
doi = {10.48550/ARXIV.2210.15734},
eprinttype = {arXiv},
eprint = {2210.15734},
timestamp = {Tue, 21 Mar 2023 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2210-15734.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2210-16663,
author = {Yosuke Higuchi and
Brian Yan and
Siddhant Arora and
Tetsuji Ogawa and
Tetsunori Kobayashi and
Shinji Watanabe},
title = {{BERT} Meets {CTC:} New Formulation of End-to-End Speech Recognition
with Pre-trained Masked Language Model},
journal = {CoRR},
volume = {abs/2210.16663},
year = {2022},
url = {https://doi.org/10.48550/arXiv.2210.16663},
doi = {10.48550/ARXIV.2210.16663},
eprinttype = {arXiv},
eprint = {2210.16663},
timestamp = {Tue, 21 Mar 2023 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2210-16663.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2211-05869,
author = {Yifan Peng and
Siddhant Arora and
Yosuke Higuchi and
Yushi Ueda and
Sujay Kumar and
Karthik Ganesan and
Siddharth Dalmia and
Xuankai Chang and
Shinji Watanabe},
title = {A Study on the Integration of Pre-trained SSL, ASR, {LM} and {SLU}
Models for Spoken Language Understanding},
journal = {CoRR},
volume = {abs/2211.05869},
year = {2022},
url = {https://doi.org/10.48550/arXiv.2211.05869},
doi = {10.48550/ARXIV.2211.05869},
eprinttype = {arXiv},
eprint = {2211.05869},
timestamp = {Fri, 27 Jun 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2211-05869.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2211-08726,
author = {Hayato Futami and
Emiru Tsunoo and
Kentaro Shibata and
Yosuke Kashiwagi and
Takao Okuda and
Siddhant Arora and
Shinji Watanabe},
title = {Streaming Joint Speech Recognition and Disfluency Detection},
journal = {CoRR},
volume = {abs/2211.08726},
year = {2022},
url = {https://doi.org/10.48550/arXiv.2211.08726},
doi = {10.48550/ARXIV.2211.08726},
eprinttype = {arXiv},
eprint = {2211.08726},
timestamp = {Tue, 21 Mar 2023 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2211-08726.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2212-10525,
author = {Suwon Shon and
Siddhant Arora and
Chyi{-}Jiunn Lin and
Ankita Pasad and
Felix Wu and
Roshan S. Sharma and
Wei{-}Lun Wu and
Hung{-}Yi Lee and
Karen Livescu and
Shinji Watanabe},
title = {{SLUE} Phase-2: {A} Benchmark Suite of Diverse Spoken Language Understanding
Tasks},
journal = {CoRR},
volume = {abs/2212.10525},
year = {2022},
url = {https://doi.org/10.48550/arXiv.2212.10525},
doi = {10.48550/ARXIV.2212.10525},
eprinttype = {arXiv},
eprint = {2212.10525},
timestamp = {Thu, 13 Jul 2023 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2212-10525.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/AroraO0DM0B21,
author = {Siddhant Arora and
Alissa Ostapenko and
Vijay Viswanathan and
Siddharth Dalmia and
Florian Metze and
Shinji Watanabe and
Alan W. Black},
editor = {Hynek Hermansky and
Honza Cernock{\'{y}} and
Luk{\'{a}}s Burget and
Lori Lamel and
Odette Scharenborg and
Petr Motl{\'{\i}}cek},
title = {Rethinking End-to-End Evaluation of Decomposable Tasks: {A} Case Study
on Spoken Language Understanding},
booktitle = {22nd Annual Conference of the International Speech Communication Association,
Interspeech 2021, Brno, Czechia, August 30 - September 3, 2021},
pages = {1264--1268},
publisher = {{ISCA}},
year = {2021},
url = {https://doi.org/10.21437/Interspeech.2021-1537},
doi = {10.21437/INTERSPEECH.2021-1537},
timestamp = {Tue, 11 Jun 2024 16:45:43 +0200},
biburl = {https://dblp.org/rec/conf/interspeech/AroraO0DM0B21.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2104-14914,
author = {Siddhant Arora and
Vinayak Gupta and
Garima Gaur and
Srikanta Bedathur},
title = {{BERT} Meets Relational {DB:} Contextual Representations of Relational
Databases},
journal = {CoRR},
volume = {abs/2104.14914},
year = {2021},
url = {https://arxiv.org/abs/2104.14914},
eprinttype = {arXiv},
eprint = {2104.14914},
timestamp = {Tue, 04 May 2021 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2104-14914.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2106-15065,
author = {Siddhant Arora and
Alissa Ostapenko and
Vijay Viswanathan and
Siddharth Dalmia and
Florian Metze and
Shinji Watanabe and
Alan W. Black},
title = {Rethinking End-to-End Evaluation of Decomposable Tasks: {A} Case Study
on Spoken Language Understanding},
journal = {CoRR},
volume = {abs/2106.15065},
year = {2021},
url = {https://arxiv.org/abs/2106.15065},
eprinttype = {arXiv},
eprint = {2106.15065},
timestamp = {Wed, 17 Nov 2021 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2106-15065.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2111-14706,
author = {Siddhant Arora and
Siddharth Dalmia and
Pavel Denisov and
Xuankai Chang and
Yushi Ueda and
Yifan Peng and
Yuekai Zhang and
Sujay Kumar and
Karthik Ganesan and
Brian Yan and
Ngoc Thang Vu and
Alan W. Black and
Shinji Watanabe},
title = {ESPnet-SLU: Advancing Spoken Language Understanding through ESPnet},
journal = {CoRR},
volume = {abs/2111.14706},
year = {2021},
url = {https://arxiv.org/abs/2111.14706},
eprinttype = {arXiv},
eprint = {2111.14706},
timestamp = {Fri, 27 Jun 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2111-14706.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2112-09669,
author = {Siddhant Arora and
Danish Pruthi and
Norman M. Sadeh and
William W. Cohen and
Zachary C. Lipton and
Graham Neubig},
title = {Explain, Edit, and Understand: Rethinking User Study Design for Evaluating
Model Explanations},
journal = {CoRR},
volume = {abs/2112.09669},
year = {2021},
url = {https://arxiv.org/abs/2112.09669},
eprinttype = {arXiv},
eprint = {2112.09669},
timestamp = {Mon, 03 Jan 2022 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2112-09669.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/akbc/AroraBRS20,
author = {Siddhant Arora and
Srikanta Bedathur and
Maya Ramanath and
Deepak Sharma},
editor = {Dipanjan Das and
Hannaneh Hajishirzi and
Andrew McCallum and
Sameer Singh},
title = {IterefinE: Iterative {KG} Refinement Embeddings using Symbolic Knowledge},
booktitle = {Conference on Automated Knowledge Base Construction, {AKBC} 2020,
Virtual, June 22-24, 2020},
year = {2020},
url = {https://doi.org/10.24432/C5NP46},
doi = {10.24432/C5NP46},
timestamp = {Tue, 12 Jan 2021 16:24:06 +0100},
biburl = {https://dblp.org/rec/conf/akbc/AroraBRS20.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/wsdm/YatesAZYJL20,
author = {Andrew Yates and
Siddhant Arora and
Xinyu Zhang and
Wei Yang and
Kevin Martin Jose and
Jimmy Lin},
editor = {James Caverlee and
Xia (Ben) Hu and
Mounia Lalmas and
Wei Wang},
title = {Capreolus: {A} Toolkit for End-to-End Neural Ad Hoc Retrieval},
booktitle = {{WSDM} '20: The Thirteenth {ACM} International Conference on Web Search
and Data Mining, Houston, TX, USA, February 3-7, 2020},
pages = {861--864},
publisher = {{ACM}},
year = {2020},
url = {https://doi.org/10.1145/3336191.3371868},
doi = {10.1145/3336191.3371868},
timestamp = {Fri, 15 Dec 2023 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/wsdm/YatesAZYJL20.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2005-06437,
author = {Siddhant Arora and
Srikanta Bedathur},
title = {On Embeddings in Relational Databases},
journal = {CoRR},
volume = {abs/2005.06437},
year = {2020},
url = {https://arxiv.org/abs/2005.06437},
eprinttype = {arXiv},
eprint = {2005.06437},
timestamp = {Thu, 14 May 2020 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2005-06437.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2006-04509,
author = {Siddhant Arora and
Srikanta Bedathur and
Maya Ramanath and
Deepak Sharma},
title = {IterefinE: Iterative {KG} Refinement Embeddings using Symbolic Knowledge},
journal = {CoRR},
volume = {abs/2006.04509},
year = {2020},
url = {https://arxiv.org/abs/2006.04509},
eprinttype = {arXiv},
eprint = {2006.04509},
timestamp = {Fri, 12 Jun 2020 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2006-04509.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2007-12374,
author = {Siddhant Arora},
title = {A Survey on Graph Neural Networks for Knowledge Graph Completion},
journal = {CoRR},
volume = {abs/2007.12374},
year = {2020},
url = {https://arxiv.org/abs/2007.12374},
eprinttype = {arXiv},
eprint = {2007.12374},
timestamp = {Wed, 29 Jul 2020 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2007-12374.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ecir/AroraY19,
author = {Siddhant Arora and
Andrew Yates},
editor = {J{\"{o}}ran Beel and
Lars Kotthoff},
title = {Investigating Retrieval Method Selection with Axiomatic Features},
booktitle = {Proceedings of the 1st Interdisciplinary Workshop on Algorithm Selection
and Meta-Learning in Information Retrieval co-located with the 41st
European Conference on Information Retrieval {(ECIR} 2019), Cologne,
Germany, April 14, 2019},
series = {{CEUR} Workshop Proceedings},
pages = {18--31},
publisher = {CEUR-WS.org},
year = {2019},
url = {https://ceur-ws.org/Vol-2360/paper4Axiomatic.pdf},
timestamp = {Fri, 10 Mar 2023 16:22:16 +0100},
biburl = {https://dblp.org/rec/conf/ecir/AroraY19.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iui/KhoslaANSN19,
author = {Sopan Khosla and
Siddhant Arora and
Abhilash Nandy and
Ankita Saxena and
Anandhavelu Natarajan},
editor = {Christoph Trattner and
Denis Parra and
Nathalie Riche},
title = {Understanding Community Rivalry on Social Media: {A} Case Study of
Two Footballing Giants},
booktitle = {Joint Proceedings of the {ACM} {IUI} 2019 Workshops co-located with
the 24th {ACM} Conference on Intelligent User Interfaces {(ACM} {IUI}
2019), Los Angeles, USA, March 20, 2019},
series = {{CEUR} Workshop Proceedings},
publisher = {CEUR-WS.org},
year = {2019},
url = {https://ceur-ws.org/Vol-2327/IUI19WS-HUMANIZE-3.pdf},
timestamp = {Fri, 10 Mar 2023 16:23:10 +0100},
biburl = {https://dblp.org/rec/conf/iui/KhoslaANSN19.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1904-05737,
author = {Siddhant Arora and
Andrew Yates},
title = {Investigating Retrieval Method Selection with Axiomatic Features},
journal = {CoRR},
volume = {abs/1904.05737},
year = {2019},
url = {http://arxiv.org/abs/1904.05737},
eprinttype = {arXiv},
eprint = {1904.05737},
timestamp = {Thu, 25 Apr 2019 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-1904-05737.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/intellisys/SharmaAAAA18,
author = {Richa Sharma and
Tejas Arya and
Siddhant Arora and
Arti Arya and
Pooja Agarwal},
editor = {Kohei Arai and
Supriya Kapoor and
Rahul Bhatia},
title = {A Naive Deep Nets Based Approach for Authenticating Viral Textual
Content on Social Media},
booktitle = {Intelligent Systems and Applications - Proceedings of the 2018 Intelligent
Systems Conference, IntelliSys 2018, London, UK, September 6-7, 2018,
Volume 2},
series = {Advances in Intelligent Systems and Computing},
pages = {679--689},
publisher = {Springer},
year = {2018},
url = {https://doi.org/10.1007/978-3-030-01057-7\_52},
doi = {10.1007/978-3-030-01057-7\_52},
timestamp = {Tue, 18 Nov 2025 11:24:21 +0100},
biburl = {https://dblp.org/rec/conf/intellisys/SharmaAAAA18.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}

manage site settings
To protect your privacy, all features that rely on external API calls from your browser are turned off by default. You need to opt-in for them to become active. All settings here will be stored as cookies with your web browser. For more information see our F.A.Q.


Google
Google Scholar
Semantic Scholar
Internet Archive Scholar
CiteSeerX
ORCID













