{"id":"https://openalex.org/W4408352169","doi":"https://doi.org/10.1109/icassp49660.2025.10889475","title":"Leveraging Self-Supervised Learning for Speaker Diarization","display_name":"Leveraging Self-Supervised Learning for Speaker Diarization","publication_year":2025,"publication_date":"2025-03-12","ids":{"openalex":"https://openalex.org/W4408352169","doi":"https://doi.org/10.1109/icassp49660.2025.10889475"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49660.2025.10889475","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10889475","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5090367911","display_name":"Jiangyu Han","orcid":"https://orcid.org/0000-0001-5390-8520"},"institutions":[{"id":"https://openalex.org/I60587646","display_name":"Brno University of Technology","ror":"https://ror.org/03613d656","country_code":"CZ","type":"education","lineage":["https://openalex.org/I60587646"]}],"countries":["CZ"],"is_corresponding":true,"raw_author_name":"Jiangyu Han","raw_affiliation_strings":["Brno University of Technology, Speech@FIT,Czechia"],"affiliations":[{"raw_affiliation_string":"Brno University of Technology, Speech@FIT,Czechia","institution_ids":["https://openalex.org/I60587646"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084170554","display_name":"Federico Landini","orcid":"https://orcid.org/0000-0003-0379-9834"},"institutions":[{"id":"https://openalex.org/I60587646","display_name":"Brno University of Technology","ror":"https://ror.org/03613d656","country_code":"CZ","type":"education","lineage":["https://openalex.org/I60587646"]}],"countries":["CZ"],"is_corresponding":false,"raw_author_name":"Federico Landini","raw_affiliation_strings":["Brno University of Technology, Speech@FIT,Czechia"],"affiliations":[{"raw_affiliation_string":"Brno University of Technology, Speech@FIT,Czechia","institution_ids":["https://openalex.org/I60587646"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043239262","display_name":"Johan Rohdin","orcid":"https://orcid.org/0000-0003-0978-2064"},"institutions":[{"id":"https://openalex.org/I60587646","display_name":"Brno University of Technology","ror":"https://ror.org/03613d656","country_code":"CZ","type":"education","lineage":["https://openalex.org/I60587646"]}],"countries":["CZ"],"is_corresponding":false,"raw_author_name":"Johan Rohdin","raw_affiliation_strings":["Brno University of Technology, Speech@FIT,Czechia"],"affiliations":[{"raw_affiliation_string":"Brno University of Technology, Speech@FIT,Czechia","institution_ids":["https://openalex.org/I60587646"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043410084","display_name":"Anna Silnova","orcid":null},"institutions":[{"id":"https://openalex.org/I60587646","display_name":"Brno University of Technology","ror":"https://ror.org/03613d656","country_code":"CZ","type":"education","lineage":["https://openalex.org/I60587646"]}],"countries":["CZ"],"is_corresponding":false,"raw_author_name":"Anna Silnova","raw_affiliation_strings":["Brno University of Technology, Speech@FIT,Czechia"],"affiliations":[{"raw_affiliation_string":"Brno University of Technology, Speech@FIT,Czechia","institution_ids":["https://openalex.org/I60587646"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035162271","display_name":"Mireia D\u00edez","orcid":"https://orcid.org/0000-0001-7894-8377"},"institutions":[{"id":"https://openalex.org/I60587646","display_name":"Brno University of Technology","ror":"https://ror.org/03613d656","country_code":"CZ","type":"education","lineage":["https://openalex.org/I60587646"]}],"countries":["CZ"],"is_corresponding":false,"raw_author_name":"Mireia Diez","raw_affiliation_strings":["Brno University of Technology, Speech@FIT,Czechia"],"affiliations":[{"raw_affiliation_string":"Brno University of Technology, Speech@FIT,Czechia","institution_ids":["https://openalex.org/I60587646"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5042273299","display_name":"Luk\u00e1\u0161 Burget","orcid":"https://orcid.org/0000-0002-4951-5908"},"institutions":[{"id":"https://openalex.org/I60587646","display_name":"Brno University of Technology","ror":"https://ror.org/03613d656","country_code":"CZ","type":"education","lineage":["https://openalex.org/I60587646"]}],"countries":["CZ"],"is_corresponding":false,"raw_author_name":"Luk\u00e1\u0161 Burget","raw_affiliation_strings":["Brno University of Technology, Speech@FIT,Czechia"],"affiliations":[{"raw_affiliation_string":"Brno University of Technology, Speech@FIT,Czechia","institution_ids":["https://openalex.org/I60587646"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5090367911"],"corresponding_institution_ids":["https://openalex.org/I60587646"],"apc_list":null,"apc_paid":null,"fwci":12.0603,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.98076162,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9776999950408936,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9776999950408936,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.8224622011184692},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7928723692893982},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5932493805885315},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.45668378472328186},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.40366214513778687}],"concepts":[{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.8224622011184692},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7928723692893982},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5932493805885315},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.45668378472328186},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.40366214513778687}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49660.2025.10889475","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10889475","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320311687","display_name":"Ministry of Education","ror":"https://ror.org/03m01yf64"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":37,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W2125336414","https://openalex.org/W2808631503","https://openalex.org/W2963470929","https://openalex.org/W2964052309","https://openalex.org/W2997419692","https://openalex.org/W3008357631","https://openalex.org/W3093964892","https://openalex.org/W3095212884","https://openalex.org/W3097777922","https://openalex.org/W3163903701","https://openalex.org/W3178462146","https://openalex.org/W3196117288","https://openalex.org/W3196857193","https://openalex.org/W3197580070","https://openalex.org/W3207834622","https://openalex.org/W3209059054","https://openalex.org/W3209984917","https://openalex.org/W4224939064","https://openalex.org/W4281492411","https://openalex.org/W4297841362","https://openalex.org/W4319862271","https://openalex.org/W4372267289","https://openalex.org/W4385822353","https://openalex.org/W4385822356","https://openalex.org/W4385823306","https://openalex.org/W4391892525","https://openalex.org/W4392904030","https://openalex.org/W4400275805","https://openalex.org/W4400615784","https://openalex.org/W4400617062","https://openalex.org/W4402111631","https://openalex.org/W4402111989","https://openalex.org/W4402115971","https://openalex.org/W6757817989","https://openalex.org/W6780218876","https://openalex.org/W7033105191"],"related_works":["https://openalex.org/W2206035908","https://openalex.org/W1491159402","https://openalex.org/W4297807400","https://openalex.org/W2249138175","https://openalex.org/W4389984014","https://openalex.org/W2144208207","https://openalex.org/W1509309911","https://openalex.org/W1599425004","https://openalex.org/W2118860825","https://openalex.org/W2096510939"],"abstract_inverted_index":{"End-to-end":[0],"neural":[1,60,75],"diarization":[2,41,61,76,152],"has":[3],"evolved":[4],"considerably":[5],"over":[6],"the":[7,54,65,72,95,112,170,181],"past":[8],"few":[9],"years,":[10],"but":[11,36],"data":[12,57,117,130,137],"scarcity":[13,58,131],"is":[14,42,187],"still":[15],"a":[16],"major":[17],"obstacle":[18],"for":[19,59],"further":[20],"improvements.":[21],"Self-supervised":[22],"learning":[23],"methods":[24],"such":[25],"as":[26,68],"WavLM":[27,51,78,123,159],"have":[28],"shown":[29],"promising":[30],"performance":[31,114,179],"on":[32,39,82,103,169],"several":[33],"downstream":[34],"tasks,":[35],"their":[37],"application":[38],"speaker":[40],"somehow":[43],"limited.":[44],"In":[45,108],"this":[46],"work,":[47],"we":[48,120,142,164],"explore":[49],"using":[50,158],"to":[52,149],"alleviate":[53],"problem":[55],"of":[56],"training.":[62],"We":[63],"use":[64],"same":[66],"pipeline":[67],"Pyannote":[69,96,182],"and":[70,79,86,98,105],"improve":[71],"local":[73],"end-to-end":[74,151],"with":[77],"Conformer.":[80],"Experiments":[81],"far-field":[83],"AMI,":[84],"AISHELL-4,":[85],"AliMeeting":[87],"datasets":[88],"show":[89,121],"that":[90,122,144],"our":[91,161,167],"method":[92],"substantially":[93],"outperforms":[94],"baseline":[97],"achieves":[99,177],"new":[100],"state-of-the-art":[101],"results":[102],"AMI":[104],"AISHELL4,":[106],"respectively.":[107],"addition,":[109],"by":[110],"analyzing":[111],"system":[113],"under":[115],"different":[116],"quantity":[118],"scenarios,":[119],"representations":[124],"are":[125],"much":[126],"more":[127],"robust":[128],"against":[129],"than":[132,180],"filterbank":[133],"features,":[134],"enabling":[135],"less":[136],"hungry":[138],"training":[139],"strategies.":[140],"Furthermore,":[141],"found":[143],"simulated":[145],"data,":[146],"usually":[147],"used":[148],"train":[150],"models,":[153],"does":[154],"not":[155],"help":[156],"when":[157],"in":[160],"experiments.":[162],"Additionally,":[163],"also":[165],"evaluate":[166],"model":[168],"recent":[171],"CHiME8":[172],"NOTSOFAR-1":[173],"task":[174],"where":[175],"it":[176],"better":[178],"baseline.":[183],"Our":[184],"source":[185],"code":[186],"publicly":[188],"available":[189],"at":[190],"https://github.com/BUTSpeechFIT/DiariZen.":[191]},"counts_by_year":[{"year":2025,"cited_by_count":5}],"updated_date":"2026-04-03T22:45:19.894376","created_date":"2025-10-10T00:00:00"}
