{"id":"https://openalex.org/W4300980463","doi":"https://doi.org/10.48550/arxiv.2209.15466","title":"Sparsity-Constrained Optimal Transport","display_name":"Sparsity-Constrained Optimal Transport","publication_year":2022,"publication_date":"2022-09-30","ids":{"openalex":"https://openalex.org/W4300980463","doi":"https://doi.org/10.48550/arxiv.2209.15466"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2209.15466","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2209.15466","pdf_url":"https://arxiv.org/pdf/2209.15466","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2209.15466","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101848358","display_name":"Tianlin Liu","orcid":"https://orcid.org/0000-0002-3485-2629"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Liu, Tianlin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001091279","display_name":"Joan Puigcerver","orcid":"https://orcid.org/0000-0002-1926-2233"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Puigcerver, Joan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5049123454","display_name":"Mathieu Blondel","orcid":"https://orcid.org/0000-0002-2366-2993"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Blondel, Mathieu","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5101848358"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9954000115394592,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9954000115394592,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10129","display_name":"Glioma Diagnosis and Treatment","score":0.9866999983787537,"subfield":{"id":"https://openalex.org/subfields/2716","display_name":"Genetics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T10500","display_name":"Sparse and Compressive Sensing Techniques","score":0.9861000180244446,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.6377791166305542},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6302791833877563},{"id":"https://openalex.org/keywords/regularization","display_name":"Regularization (linguistics)","score":0.6274747252464294},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5942792892456055},{"id":"https://openalex.org/keywords/cardinality","display_name":"Cardinality (data modeling)","score":0.5492515563964844},{"id":"https://openalex.org/keywords/quadratic-equation","display_name":"Quadratic equation","score":0.5339006185531616},{"id":"https://openalex.org/keywords/semimartingale","display_name":"Semimartingale","score":0.5195698738098145},{"id":"https://openalex.org/keywords/quadratic-growth","display_name":"Quadratic growth","score":0.497754842042923},{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.4425146281719208},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3934587836265564},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2782183289527893},{"id":"https://openalex.org/keywords/applied-mathematics","display_name":"Applied mathematics","score":0.2084837555885315},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.18789342045783997},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.10118252038955688}],"concepts":[{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.6377791166305542},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6302791833877563},{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.6274747252464294},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5942792892456055},{"id":"https://openalex.org/C87117476","wikidata":"https://www.wikidata.org/wiki/Q362383","display_name":"Cardinality (data modeling)","level":2,"score":0.5492515563964844},{"id":"https://openalex.org/C129844170","wikidata":"https://www.wikidata.org/wiki/Q41299","display_name":"Quadratic equation","level":2,"score":0.5339006185531616},{"id":"https://openalex.org/C39943821","wikidata":"https://www.wikidata.org/wiki/Q506346","display_name":"Semimartingale","level":2,"score":0.5195698738098145},{"id":"https://openalex.org/C195956108","wikidata":"https://www.wikidata.org/wiki/Q7268362","display_name":"Quadratic growth","level":2,"score":0.497754842042923},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.4425146281719208},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3934587836265564},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2782183289527893},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.2084837555885315},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.18789342045783997},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.10118252038955688},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2209.15466","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2209.15466","pdf_url":"https://arxiv.org/pdf/2209.15466","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2209.15466","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2209.15466","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2209.15466","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2209.15466","pdf_url":"https://arxiv.org/pdf/2209.15466","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W3110410503","https://openalex.org/W2023096387","https://openalex.org/W2215800250","https://openalex.org/W2077146756","https://openalex.org/W4293088966","https://openalex.org/W2917463375","https://openalex.org/W2939562063","https://openalex.org/W4238939226","https://openalex.org/W4288366317","https://openalex.org/W2623997598"],"abstract_inverted_index":{"Regularized":[0],"optimal":[1,237],"transport":[2],"(OT)":[3],"is":[4,115,159,213],"now":[5],"increasingly":[6],"used":[7,129],"as":[8,12,135,142,194,222],"a":[9,13,100,195,228],"loss":[10],"or":[11],"matching":[14],"layer":[15],"in":[16,97,202],"neural":[17,143],"networks.":[18,144],"Entropy-regularized":[19],"OT":[20,104,126,200,209],"can":[21,70,127,182,191],"be":[22,71,128,183,192],"computed":[23],"using":[24],"the":[25,86,92,110,166,174,203,219,236],"Sinkhorn":[26],"algorithm":[27],"but":[28],"it":[29],"leads":[30,61],"to":[31,62,120,130,227],"fully-dense":[32],"transportation":[33,93,111],"plans,":[34],"meaning":[35],"that":[36,69,148,173],"all":[37,43],"sources":[38],"are":[39,153,179],"(fractionally)":[40],"matched":[41,154],"with":[42,73,105,138,155,185],"targets.":[44],"To":[45],"address":[46],"this":[47,98],"issue,":[48],"several":[49],"works":[50],"have":[51],"investigated":[52],"quadratic":[53,78],"regularization":[54,57,79],"instead.":[55],"This":[56],"preserves":[58],"sparsity":[59,234],"and":[60,64,181,207,233],"unconstrained":[63],"smooth":[65],"(semi)":[66,176],"dual":[67,177],"objectives,":[68],"solved":[72,184],"off-the-shelf":[74],"gradient":[75,187],"methods.":[76,188],"Unfortunately,":[77],"does":[80],"not":[81],"give":[82],"direct":[83],"control":[84],"over":[85],"cardinality":[87,107,169],"(number":[88],"of":[89,91,123,168,218,235],"nonzeros)":[90],"plan.":[94,112,238],"We":[95],"propose":[96],"paper":[99],"new":[101],"approach":[102],"for":[103,161],"explicit":[106],"constraints":[108,146],"on":[109],"Our":[113,189],"work":[114],"motivated":[116],"by":[117],"an":[118,156],"application":[119],"sparse":[121],"mixture":[122],"experts,":[124],"where":[125],"match":[131],"input":[132],"tokens":[133,152],"such":[134,141],"image":[136],"patches":[137],"expert":[139],"models":[140],"Cardinality":[145],"ensure":[147],"at":[149],"most":[150],"$k$":[151,212,223],"expert,":[157],"which":[158],"crucial":[160],"computational":[162],"performance":[163],"reasons.":[164],"Despite":[165],"nonconvexity":[167],"constraints,":[170],"we":[171],"show":[172],"corresponding":[175],"problems":[178],"tractable":[180],"first-order":[186],"method":[190],"thought":[193],"middle":[196],"ground":[197],"between":[198,230],"unregularized":[199],"(recovered":[201,210],"limit":[204],"case":[205],"$k=1$)":[206],"quadratically-regularized":[208],"when":[211],"large":[214],"enough).":[215],"The":[216],"smoothness":[217],"objectives":[220],"increases":[221],"increases,":[224],"giving":[225],"rise":[226],"trade-off":[229],"convergence":[231],"speed":[232]},"counts_by_year":[{"year":2024,"cited_by_count":3}],"updated_date":"2026-02-09T09:26:11.010843","created_date":"2025-10-10T00:00:00"}
