{"id":"https://openalex.org/W3126246198","doi":"https://doi.org/10.1109/exampi52011.2020.00007","title":"Scalable MPI Collectives using SHARP: Large Scale Performance Evaluation on the TACC Frontera System","display_name":"Scalable MPI Collectives using SHARP: Large Scale Performance Evaluation on the TACC Frontera System","publication_year":2020,"publication_date":"2020-11-01","ids":{"openalex":"https://openalex.org/W3126246198","doi":"https://doi.org/10.1109/exampi52011.2020.00007","mag":"3126246198"},"language":"en","primary_location":{"id":"doi:10.1109/exampi52011.2020.00007","is_oa":false,"landing_page_url":"https://doi.org/10.1109/exampi52011.2020.00007","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 Workshop on Exascale MPI (ExaMPI)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103171233","display_name":"Bharath Ramesh","orcid":"https://orcid.org/0000-0002-6430-8587"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Bharath Ramesh","raw_affiliation_strings":["The Ohio State University,Department of Computer Science and Engineering,Columbus,USA"],"affiliations":[{"raw_affiliation_string":"The Ohio State University,Department of Computer Science and Engineering,Columbus,USA","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055205226","display_name":"Kaushik Kandadi Suresh","orcid":"https://orcid.org/0000-0002-3705-2387"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kaushik Kandadi Suresh","raw_affiliation_strings":["The Ohio State University,Department of Computer Science and Engineering,Columbus,USA"],"affiliations":[{"raw_affiliation_string":"The Ohio State University,Department of Computer Science and Engineering,Columbus,USA","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083883762","display_name":"Nick Sarkauskas","orcid":null},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nick Sarkauskas","raw_affiliation_strings":["The Ohio State University,Department of Computer Science and Engineering,Columbus,USA"],"affiliations":[{"raw_affiliation_string":"The Ohio State University,Department of Computer Science and Engineering,Columbus,USA","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013350491","display_name":"Mohammadreza Bayatpour","orcid":null},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mohammadreza Bayatpour","raw_affiliation_strings":["The Ohio State University,Department of Computer Science and Engineering,Columbus,USA"],"affiliations":[{"raw_affiliation_string":"The Ohio State University,Department of Computer Science and Engineering,Columbus,USA","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008298485","display_name":"Jahanzeb Maqbool Hashmi","orcid":null},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jahanzeb Maqbool Hashmi","raw_affiliation_strings":["The Ohio State University,Department of Computer Science and Engineering,Columbus,USA"],"affiliations":[{"raw_affiliation_string":"The Ohio State University,Department of Computer Science and Engineering,Columbus,USA","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034293705","display_name":"Hari Subramoni","orcid":"https://orcid.org/0000-0002-1200-2754"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hari Subramoni","raw_affiliation_strings":["The Ohio State University,Department of Computer Science and Engineering,Columbus,USA"],"affiliations":[{"raw_affiliation_string":"The Ohio State University,Department of Computer Science and Engineering,Columbus,USA","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5024879682","display_name":"Dhabaleswar K. Panda","orcid":"https://orcid.org/0000-0002-0356-1781"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dhabaleswar K. Panda","raw_affiliation_strings":["The Ohio State University,Department of Computer Science and Engineering,Columbus,USA"],"affiliations":[{"raw_affiliation_string":"The Ohio State University,Department of Computer Science and Engineering,Columbus,USA","institution_ids":["https://openalex.org/I52357470"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5103171233"],"corresponding_institution_ids":["https://openalex.org/I52357470"],"apc_list":null,"apc_paid":null,"fwci":1.3862,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.81852497,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"11","last_page":"20"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/infiniband","display_name":"InfiniBand","score":0.9008623957633972},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8247675895690918},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.7615296840667725},{"id":"https://openalex.org/keywords/message-passing-interface","display_name":"Message Passing Interface","score":0.6547638773918152},{"id":"https://openalex.org/keywords/message-passing","display_name":"Message passing","score":0.6293208599090576},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.5684667825698853},{"id":"https://openalex.org/keywords/remote-direct-memory-access","display_name":"Remote direct memory access","score":0.5666860342025757},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5278109312057495},{"id":"https://openalex.org/keywords/massively-parallel","display_name":"Massively parallel","score":0.49677354097366333},{"id":"https://openalex.org/keywords/low-latency","display_name":"Low latency (capital markets)","score":0.4569161534309387},{"id":"https://openalex.org/keywords/reduction","display_name":"Reduction (mathematics)","score":0.4527552127838135},{"id":"https://openalex.org/keywords/supercomputer","display_name":"Supercomputer","score":0.4393099248409271},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.434699147939682},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.21228274703025818}],"concepts":[{"id":"https://openalex.org/C2781030343","wikidata":"https://www.wikidata.org/wiki/Q922437","display_name":"InfiniBand","level":2,"score":0.9008623957633972},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8247675895690918},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.7615296840667725},{"id":"https://openalex.org/C166782233","wikidata":"https://www.wikidata.org/wiki/Q127879","display_name":"Message Passing Interface","level":3,"score":0.6547638773918152},{"id":"https://openalex.org/C854659","wikidata":"https://www.wikidata.org/wiki/Q1859284","display_name":"Message passing","level":2,"score":0.6293208599090576},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.5684667825698853},{"id":"https://openalex.org/C130795937","wikidata":"https://www.wikidata.org/wiki/Q2561570","display_name":"Remote direct memory access","level":2,"score":0.5666860342025757},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5278109312057495},{"id":"https://openalex.org/C190475519","wikidata":"https://www.wikidata.org/wiki/Q544384","display_name":"Massively parallel","level":2,"score":0.49677354097366333},{"id":"https://openalex.org/C46637626","wikidata":"https://www.wikidata.org/wiki/Q6693015","display_name":"Low latency (capital markets)","level":2,"score":0.4569161534309387},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.4527552127838135},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.4393099248409271},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.434699147939682},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.21228274703025818},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/exampi52011.2020.00007","is_oa":false,"landing_page_url":"https://doi.org/10.1109/exampi52011.2020.00007","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 Workshop on Exascale MPI (ExaMPI)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W82183796","https://openalex.org/W104088248","https://openalex.org/W1568736435","https://openalex.org/W1572016165","https://openalex.org/W1581994707","https://openalex.org/W2042120213","https://openalex.org/W2043143850","https://openalex.org/W2056476515","https://openalex.org/W2059300917","https://openalex.org/W2091780466","https://openalex.org/W2102061396","https://openalex.org/W2107406335","https://openalex.org/W2115020004","https://openalex.org/W2118646353","https://openalex.org/W2131613942","https://openalex.org/W2136041296","https://openalex.org/W2166097017","https://openalex.org/W2167967054","https://openalex.org/W2563521659","https://openalex.org/W2767393956","https://openalex.org/W3037182822","https://openalex.org/W3043992826","https://openalex.org/W4242634435","https://openalex.org/W4252163020","https://openalex.org/W6633977482","https://openalex.org/W6675424803"],"related_works":["https://openalex.org/W1978254186","https://openalex.org/W4245217724","https://openalex.org/W4390917331","https://openalex.org/W1984788566","https://openalex.org/W4253931064","https://openalex.org/W3166154920","https://openalex.org/W2154082760","https://openalex.org/W2104094072","https://openalex.org/W2751263050","https://openalex.org/W1970720081"],"abstract_inverted_index":{"The":[0],"Message-Passing":[1],"Interface":[2],"(MPI)":[3],"is":[4],"the":[5,49,56,77,93,96,100,114],"de-facto":[6],"standard":[7],"for":[8,23,67,85,124,127,131],"designing":[9],"and":[10,39,51,63,70,81,88,129],"executing":[11],"applications":[12],"on":[13,92,99],"massively":[14],"parallel":[15],"hardware.":[16],"MPI":[17,68,71],"collectives":[18],"provide":[19],"a":[20,141],"convenient":[21],"abstraction":[22],"multiple":[24],"processes/threads":[25],"to":[26,44,48,119],"communicate":[27],"with":[28],"one":[29],"another.":[30],"Mellanox's":[31],"HDR":[32],"InfiniBand":[33],"switches":[34],"pro-vide":[35],"Scalable":[36],"Hierarchical":[37],"Aggregation":[38],"Reduction":[40],"Protocol":[41],"(SHARP)":[42],"capabilities":[43],"offload":[45],"collective":[46,97],"communication":[47],"network":[50],"reduce":[52],"CPU":[53],"involvement":[54],"in":[55,73,122],"process.":[57],"In":[58],"this":[59],"paper,":[60],"we":[61],"design":[62],"implement":[64],"SHARP-based":[65,83,115],"solutions":[66,84],"Reduce":[69],"Barrier":[72,132],"MVAPICH2-X.":[74],"We":[75],"evaluate":[76],"impact":[78],"of":[79,95,113,137],"proposed":[80],"existing":[82],"MPI_Allreduce,":[86],"MPI_Reduce,":[87],"MPI_Barrier":[89],"operations":[90],"have":[91],"performance":[94],"operation":[98],"8":[101],"<sup":[102],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[103],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">th</sup>":[104],"ranked":[105],"TACC":[106],"Frontera":[107],"HPC":[108],"system.":[109],"Our":[110],"experimental":[111],"evaluation":[112],"designs":[116],"show":[117],"up":[118],"5.4X":[120],"reduction":[121],"latency":[123],"Reduce,":[125],"5.1X":[126],"Allreduce,":[128],"7.1X":[130],"at":[133],"full":[134],"system":[135],"scale":[136],"7,861":[138],"nodes":[139],"over":[140],"host-based":[142],"solution.":[143]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
