{"id":"https://openalex.org/W2152994038","doi":"https://doi.org/10.1109/hoti.2011.14","title":"Designing Non-blocking Broadcast with Collective Offload on InfiniBand Clusters: A Case Study with HPL","display_name":"Designing Non-blocking Broadcast with Collective Offload on InfiniBand Clusters: A Case Study with HPL","publication_year":2011,"publication_date":"2011-08-01","ids":{"openalex":"https://openalex.org/W2152994038","doi":"https://doi.org/10.1109/hoti.2011.14","mag":"2152994038"},"language":"en","primary_location":{"id":"doi:10.1109/hoti.2011.14","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hoti.2011.14","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2011 IEEE 19th Annual Symposium on High Performance Interconnects","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5010923189","display_name":"Krishna Kandalla","orcid":null},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"K. Kandalla","raw_affiliation_strings":["Department of Computer Science and Engineering, Ohio State University, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Ohio State University, USA","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034293705","display_name":"Hari Subramoni","orcid":"https://orcid.org/0000-0002-1200-2754"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"H. Subramoni","raw_affiliation_strings":["Department of Computer Science and Engineering, Ohio State University, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Ohio State University, USA","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078989266","display_name":"J. Vienne","orcid":"https://orcid.org/0000-0002-2848-6095"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"J. Vienne","raw_affiliation_strings":["Department of Computer Science and Engineering, Ohio State University, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Ohio State University, USA","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011002813","display_name":"S. Pai Raikar","orcid":null},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"S. Pai Raikar","raw_affiliation_strings":["Department of Computer Science and Engineering, Ohio State University, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Ohio State University, USA","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040711735","display_name":"Karen Tomko","orcid":"https://orcid.org/0000-0002-6542-853X"},"institutions":[{"id":"https://openalex.org/I34077533","display_name":"Ohio Supercomputer Center","ror":"https://ror.org/01apna436","country_code":"US","type":"facility","lineage":["https://openalex.org/I34077533"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"K. Tomko","raw_affiliation_strings":["Ohio Supercomputer Center, Columbus, OH, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Ohio Supercomputer Center, Columbus, OH, USA","institution_ids":["https://openalex.org/I34077533"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000953035","display_name":"Sayantan Sur","orcid":null},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"S. Sur","raw_affiliation_strings":["Department of Computer Science and Engineering, Ohio State University, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Ohio State University, USA","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5024879682","display_name":"Dhabaleswar K. Panda","orcid":"https://orcid.org/0000-0002-0356-1781"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"D.K. Panda","raw_affiliation_strings":["Department of Computer Science and Engineering, Ohio State University, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Ohio State University, USA","institution_ids":["https://openalex.org/I52357470"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.5599,"has_fulltext":false,"cited_by_count":16,"citation_normalized_percentile":{"value":0.90419866,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":"4331 2006","issue":null,"first_page":"27","last_page":"34"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.9947999715805054,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/infiniband","display_name":"InfiniBand","score":0.98231041431427},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8862940073013306},{"id":"https://openalex.org/keywords/blocking","display_name":"Blocking (statistics)","score":0.7632368803024292},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6348913311958313},{"id":"https://openalex.org/keywords/message-passing-interface","display_name":"Message Passing Interface","score":0.5846616625785828},{"id":"https://openalex.org/keywords/remote-direct-memory-access","display_name":"Remote direct memory access","score":0.5794694423675537},{"id":"https://openalex.org/keywords/interface","display_name":"Interface (matter)","score":0.5686832070350647},{"id":"https://openalex.org/keywords/multi-core-processor","display_name":"Multi-core processor","score":0.5230296850204468},{"id":"https://openalex.org/keywords/network-interface","display_name":"Network interface","score":0.4945678412914276},{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.4799414277076721},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.43509477376937866},{"id":"https://openalex.org/keywords/implementation","display_name":"Implementation","score":0.4340660572052002},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.4227059483528137},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.40563979744911194},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.36388593912124634},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.35875970125198364},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.32771748304367065},{"id":"https://openalex.org/keywords/message-passing","display_name":"Message passing","score":0.26911661028862},{"id":"https://openalex.org/keywords/wireless","display_name":"Wireless","score":0.16158542037010193},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.13588178157806396},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.12145668268203735}],"concepts":[{"id":"https://openalex.org/C2781030343","wikidata":"https://www.wikidata.org/wiki/Q922437","display_name":"InfiniBand","level":2,"score":0.98231041431427},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8862940073013306},{"id":"https://openalex.org/C144745244","wikidata":"https://www.wikidata.org/wiki/Q4927286","display_name":"Blocking (statistics)","level":2,"score":0.7632368803024292},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6348913311958313},{"id":"https://openalex.org/C166782233","wikidata":"https://www.wikidata.org/wiki/Q127879","display_name":"Message Passing Interface","level":3,"score":0.5846616625785828},{"id":"https://openalex.org/C130795937","wikidata":"https://www.wikidata.org/wiki/Q2561570","display_name":"Remote direct memory access","level":2,"score":0.5794694423675537},{"id":"https://openalex.org/C113843644","wikidata":"https://www.wikidata.org/wiki/Q901882","display_name":"Interface (matter)","level":4,"score":0.5686832070350647},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.5230296850204468},{"id":"https://openalex.org/C103987645","wikidata":"https://www.wikidata.org/wiki/Q985806","display_name":"Network interface","level":3,"score":0.4945678412914276},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.4799414277076721},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.43509477376937866},{"id":"https://openalex.org/C26713055","wikidata":"https://www.wikidata.org/wiki/Q245962","display_name":"Implementation","level":2,"score":0.4340660572052002},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.4227059483528137},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.40563979744911194},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.36388593912124634},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.35875970125198364},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.32771748304367065},{"id":"https://openalex.org/C854659","wikidata":"https://www.wikidata.org/wiki/Q1859284","display_name":"Message passing","level":2,"score":0.26911661028862},{"id":"https://openalex.org/C555944384","wikidata":"https://www.wikidata.org/wiki/Q249","display_name":"Wireless","level":2,"score":0.16158542037010193},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.13588178157806396},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.12145668268203735},{"id":"https://openalex.org/C129307140","wikidata":"https://www.wikidata.org/wiki/Q6795880","display_name":"Maximum bubble pressure method","level":3,"score":0.0},{"id":"https://openalex.org/C157915830","wikidata":"https://www.wikidata.org/wiki/Q2928001","display_name":"Bubble","level":2,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hoti.2011.14","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hoti.2011.14","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2011 IEEE 19th Annual Symposium on High Performance Interconnects","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W134816542","https://openalex.org/W1530542361","https://openalex.org/W2062011830","https://openalex.org/W2062764889","https://openalex.org/W2068975988","https://openalex.org/W2091780466","https://openalex.org/W2099020156","https://openalex.org/W2120827897","https://openalex.org/W2140973798","https://openalex.org/W2143539682","https://openalex.org/W2166097017","https://openalex.org/W2174937805","https://openalex.org/W3140467006","https://openalex.org/W6685646442"],"related_works":["https://openalex.org/W1978254186","https://openalex.org/W4245217724","https://openalex.org/W4390917331","https://openalex.org/W1984788566","https://openalex.org/W4253931064","https://openalex.org/W3166154920","https://openalex.org/W2154082760","https://openalex.org/W2134172439","https://openalex.org/W2751263050","https://openalex.org/W2540148975"],"abstract_inverted_index":{"The":[0,72],"upcoming":[1],"MPI-3.0":[2],"standard":[3],"is":[4,58,160],"expected":[5],"to":[6,32,59,147,156,206,214],"include":[7],"non-blocking":[8,55,102,139,170],"collective":[9,29,56,140],"operations.":[10,30],"Non-blocking":[11],"collectives":[12,149],"offer":[13,79],"a":[14,40,80,101],"new":[15],"MPI":[16,36,129,134,153,183],"interface,":[17],"using":[18,107],"which":[19,203],"an":[20,53,161,164,207],"application":[21,165],"can":[22,193,226],"decouple":[23],"the":[24,35,48,63,98,108,125,128,175,228],"initiation":[25],"and":[26,43,188],"completion":[27],"of":[28,47,62,100,127,152,163,177,230],"However,":[31],"be":[33],"effective,":[34],"library":[37],"should":[38],"provide":[39,138],"high":[41],"performance":[42],"scalable":[44],"implementation.":[45],"One":[46],"major":[49],"challenges":[50],"in":[51,69,209],"designing":[52],"effective":[54],"operation":[57,64,104],"ensure":[60],"progress":[61,88],"while":[65],"processors":[66],"are":[67],"busy":[68],"application-level":[70],"computation.":[71],"recently":[73],"introduced":[74],"Mellanox":[75],"ConnectX-2":[76],"InfiniBand":[77],"adapters":[78],"task":[81],"offload":[82,110,181],"interface":[83],"(CORE-Direct)":[84],"that":[85,116,191,222],"enables":[86],"communication":[87],"without":[89,123],"requiring":[90],"CPU":[91],"cycles.":[92],"In":[93],"this":[94],"paper,":[95],"we":[96,189],"present":[97],"design":[99],"broadcast":[103],"(MPI":[105],"Ibcast)":[106],"CORE-Direct":[109],"interface.":[111],"Our":[112],"experimental":[113],"evaluations":[114],"show":[115],"our":[117,178,223],"implementation":[118,185],"delivers":[119],"near":[120],"perfect":[121],"overlap,":[122],"penalizing":[124],"latency":[126],"Ibcast":[130,184],"operation.":[131],"Since":[132],"existing":[133],"implementations":[135],"do":[136],"not":[137],"communication,":[141],"scientific":[142],"applications":[143],"have":[144,173],"been":[145],"modified":[146],"implement":[148],"on":[150,233],"top":[151],"point-to-point":[154],"operations":[155],"achieve":[157,194],"overlap.":[158],"HPL":[159,187,192],"example":[162],"use":[166],"case":[167],"scenario":[168],"for":[169],"collectives.":[171],"We":[172,219],"explored":[174],"benefits":[176],"proposed":[179,224],"network":[180],"based":[182],"with":[186,198,216],"observe":[190,221],"its":[195,210],"peak":[196],"throughput":[197],"significantly":[199],"smaller":[200],"problem":[201],"sizes,":[202],"also":[204,220],"leads":[205],"improvement":[208],"run-time":[211],"by":[212],"up":[213],"78%,":[215],"512":[217],"processors.":[218],"designs":[225],"minimize":[227],"impact":[229],"system":[231],"noise":[232],"applications.":[234]},"counts_by_year":[{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":2},{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":4},{"year":2014,"cited_by_count":5},{"year":2013,"cited_by_count":1},{"year":2012,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
