{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,26]],"date-time":"2026-06-26T04:07:07Z","timestamp":1782446827580,"version":"3.54.5"},"reference-count":76,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"3","license":[{"start":{"date-parts":[[2023,8,1]],"date-time":"2023-08-01T00:00:00Z","timestamp":1690848000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/2.zoppoz.workers.dev:443\/https\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2023,8,1]],"date-time":"2023-08-01T00:00:00Z","timestamp":1690848000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/2.zoppoz.workers.dev:443\/https\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,8,1]],"date-time":"2023-08-01T00:00:00Z","timestamp":1690848000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/2.zoppoz.workers.dev:443\/https\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Comput. Intell. Mag."],"published-print":{"date-parts":[[2023,8]]},"DOI":"10.1109\/mci.2023.3277769","type":"journal-article","created":{"date-parts":[[2023,7,19]],"date-time":"2023-07-19T20:01:06Z","timestamp":1689796866000},"page":"29-40","source":"Crossref","is-referenced-by-count":14,"title":["Jack and Masters of all Trades: One-Pass Learning Sets of Model Sets From Large Pre-Trained Models"],"prefix":"10.1109","volume":"18","author":[{"ORCID":"https:\/\/2.zoppoz.workers.dev:443\/https\/orcid.org\/0000-0002-5623-0062","authenticated-orcid":false,"given":"Han Xiang","family":"Choong","sequence":"first","affiliation":[{"name":"Nanyang Technological University, Singapore"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/2.zoppoz.workers.dev:443\/https\/orcid.org\/0000-0002-4480-169X","authenticated-orcid":false,"given":"Yew-Soon","family":"Ong","sequence":"additional","affiliation":[{"name":"Agency for Science, Technology and Research (A*STAR) and Nanyang Technological University, Singapore"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/2.zoppoz.workers.dev:443\/https\/orcid.org\/0000-0002-6080-855X","authenticated-orcid":false,"given":"Abhishek","family":"Gupta","sequence":"additional","affiliation":[{"name":"Agency for Science, Technology and Research (A*STAR) and Nanyang Technological University, Singapore"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/2.zoppoz.workers.dev:443\/https\/orcid.org\/0000-0003-1143-3138","authenticated-orcid":false,"given":"Caishun","family":"Chen","sequence":"additional","affiliation":[{"name":"Agency for Science, Technology and Research (A*STAR), Singapore"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/2.zoppoz.workers.dev:443\/https\/orcid.org\/0000-0003-2058-6574","authenticated-orcid":false,"given":"Ray","family":"Lim","sequence":"additional","affiliation":[{"name":"Nanyang Technological University, Singapore"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref13","first-page":"3854","article-title":"Learning to branch for multi-task learning","volume":"119","author":"guo","year":"0","journal-title":"Proc 37th Int Conf Mach Learn"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1007\/s12293-021-00346-5"},{"key":"ref12","first-page":"5824","article-title":"Gradient surgery for multi-task learning","volume":"33","author":"yu","year":"0","journal-title":"Proc Int Conf Adv Neural Inf Process Syst"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/TEVC.2021.3086308"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/SmartCloud.2016.18"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1007\/s12559-022-10012-8"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/3469029"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2020.2981733"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2021.3052509"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1007\/s12293-021-00347-4"},{"key":"ref11","article-title":"Understanding and improving information transfer in multi-task learning","author":"wu","year":"0","journal-title":"Proc 8th Int Conf Learn Representations"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1007\/s12559-016-9395-7"},{"key":"ref10","first-page":"2847","article-title":"On the expressive power of deep neural networks","volume":"70","author":"raghu","year":"0","journal-title":"Proc 34th Int Conf Mach Learn Res"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58115-2_44"},{"key":"ref17","article-title":"Deep compression: Compressing deep neural network with pruning, trained quantization and Huffman coding","author":"han","year":"0","journal-title":"Proc 4th Int Conf Learn Representations"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1155\/2021\/2609700"},{"key":"ref19","author":"tegmark","year":"2017","journal-title":"Life 3 0 Being in the Age of Human Intelligence"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/3358205"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/TETCI.2017.2769104"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/MCI.2022.3155332"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/TEVC.2019.2909744"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1007\/s12293-022-00355-y"},{"key":"ref48","first-page":"440","article-title":"A survey of multiobjective evolutionary algorithms based on decomposition","volume":"21","author":"trivedi","year":"2017","journal-title":"IEEE Trans Evol Comput"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/4235.996017"},{"key":"ref42","article-title":"Once-for-all: Train one network and specialize it for efficient deployment","author":"cai","year":"0","journal-title":"Int Conf Learn Representations"},{"key":"ref41","volume":"491","author":"ehrgott","year":"2005","journal-title":"Multicriteria Optimization"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/MC.2017.3641648"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2018.2844341"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1007\/s12559-018-9620-7"},{"key":"ref8","article-title":"On the opportunities and risks of foundation models","author":"bommasani","year":"2021"},{"key":"ref7","article-title":"Data management opportunities for foundation models","author":"orr","year":"0","journal-title":"Proc 12th Annu Conf Innov Data Syst Res"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2019.04.009"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1093\/ectj\/utaa005"},{"key":"ref3","doi-asserted-by":"crossref","first-page":"541","DOI":"10.1162\/neco.1989.1.4.541","article-title":"Backpropagation applied to handwritten zip code recognition","volume":"1","author":"jackel","year":"1989","journal-title":"Neural Comput"},{"key":"ref6","first-page":"1877","article-title":"Language models are few-shot learners","volume":"33","author":"brown","year":"0","journal-title":"Proc Int Conf Adv Neural Inf Process Syst"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01179"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2019.2928174"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00474"},{"key":"ref34","article-title":"Edge intelligence: Architectures, challenges, and applications","author":"xu","year":"2020"},{"key":"ref37","article-title":"Scale efficiently: Insights from pre-training and fine-tuning transformers","author":"tay","year":"0","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2018.2858826"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1109\/MCI.2021.3084393"},{"key":"ref30","first-page":"8792","article-title":"Generalized cross entropy loss for training deep neural networks with noisy labels","author":"zhang","year":"0","journal-title":"Proc 32nd Int Conf Neural Inf Process Syst"},{"key":"ref74","article-title":"ADAM: A method for stochastic optimization","author":"kingma","year":"0","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/BigData.2017.8258272"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1145\/3453474"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/3381831"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1142\/S2705078520300042"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219970"},{"key":"ref38","article-title":"Scaling laws for neural language models","author":"kaplan","year":"2020"},{"key":"ref71","article-title":"Learning multiple layers of features from tiny images","author":"krizhevsky","year":"2009"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2012.2211477"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2016.2554622"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.3390\/su14063269"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/TEVC.2015.2458037"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1098\/rspa.2017.0457"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-63836-8_23"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1007\/s12559-016-9395-7"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1016\/j.chemosphere.2018.12.128"},{"key":"ref20","article-title":"Deep learning: A critical appraisal","author":"marcus","year":"2018"},{"key":"ref64","article-title":"Decoupled weight decay regularization","author":"loshchilov","year":"0","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-demos.6"},{"key":"ref22","first-page":"5958","article-title":"Train big, then compress: Rethinking model size for efficient training and inference of transformers","volume":"119","author":"li","year":"0","journal-title":"Proc 37th Int Conf Mach Learn Res"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.3115\/1073083.1073135"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i09.7123"},{"key":"ref65","first-page":"4596","article-title":"Adafactor: Adaptive learning rates with sublinear memory cost","volume":"80","author":"shazeer","year":"0","journal-title":"Proc 35th Int Conf Mach Learn"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2022.108962"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3100554"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00049"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/TEVC.2021.3139437"},{"key":"ref62","first-page":"1","article-title":"Beyond english-centric multilingual machine translation","volume":"22","author":"fan","year":"2021","journal-title":"J Mach Learn Res"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W19-5301"}],"container-title":["IEEE Computational Intelligence Magazine"],"original-title":[],"link":[{"URL":"https:\/\/2.zoppoz.workers.dev:443\/http\/xplorestaging.ieee.org\/ielx7\/10207\/10188447\/10188456.pdf?arnumber=10188456","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,8,7]],"date-time":"2023-08-07T17:57:26Z","timestamp":1691431046000},"score":1,"resource":{"primary":{"URL":"https:\/\/2.zoppoz.workers.dev:443\/https\/ieeexplore.ieee.org\/document\/10188456\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,8]]},"references-count":76,"journal-issue":{"issue":"3"},"URL":"https:\/\/2.zoppoz.workers.dev:443\/https\/doi.org\/10.1109\/mci.2023.3277769","relation":{},"ISSN":["1556-603X","1556-6048"],"issn-type":[{"value":"1556-603X","type":"print"},{"value":"1556-6048","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,8]]}}}