{"id":"https://openalex.org/W4417276558","doi":"https://doi.org/10.48550/arxiv.2510.19400","title":"Seeing Across Views: Benchmarking Spatial Reasoning of Vision-Language Models in Robotic Scenes","display_name":"Seeing Across Views: Benchmarking Spatial Reasoning of Vision-Language Models in Robotic Scenes","publication_year":2025,"publication_date":"2025-10-22","ids":{"openalex":"https://openalex.org/W4417276558","doi":"https://doi.org/10.48550/arxiv.2510.19400"},"language":null,"primary_location":{"id":"pmh:oai:arXiv.org:2510.19400","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2510.19400","pdf_url":"https://arxiv.org/pdf/2510.19400","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2510.19400","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5065622088","display_name":"Zhiyuan Feng","orcid":"https://orcid.org/0000-0003-0366-7639"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Feng, Zhiyuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111134948","display_name":"Zhaolu Kang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kang, Zhaolu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049119787","display_name":"Qijie Wang","orcid":"https://orcid.org/0000-0002-9910-1455"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Qijie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063237877","display_name":"Zhiying Du","orcid":"https://orcid.org/0009-0000-9155-1534"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Du, Zhiying","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101509693","display_name":"Jingjie Yan","orcid":"https://orcid.org/0000-0003-1213-0921"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yan, Jiongrui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109976089","display_name":"Shubin Shi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shi, Shubin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023581735","display_name":"Chengbo Yuan","orcid":"https://orcid.org/0000-0002-9004-0908"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuan, Chengbo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075528828","display_name":"Huizhi Liang","orcid":"https://orcid.org/0000-0003-4408-4528"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liang, Huizhi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103130115","display_name":"Yu Deng","orcid":"https://orcid.org/0000-0001-7855-5100"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Deng, Yu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107503613","display_name":"Qixiu Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Qixiu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113231897","display_name":"Rushuai Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Rushuai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088308455","display_name":"A.X. An","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"An, Arctanx","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067167924","display_name":"Lanqin Zheng","orcid":"https://orcid.org/0000-0001-9378-5027"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zheng, Leqi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100398440","display_name":"Weijie Wang","orcid":"https://orcid.org/0009-0004-4320-9119"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Weijie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009137213","display_name":"Shawn Chen","orcid":"https://orcid.org/0000-0002-6678-3293"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Shawn","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032527095","display_name":"Shenghua Xu","orcid":"https://orcid.org/0000-0001-5433-0409"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Sicheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100678419","display_name":"Yaobo Liang","orcid":"https://orcid.org/0000-0002-6595-5145"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liang, Yaobo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076804411","display_name":"Jiaolong Yang","orcid":"https://orcid.org/0000-0002-7314-6567"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Jiaolong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5101666011","display_name":"Baining Guo","orcid":"https://orcid.org/0000-0001-8349-8868"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guo, Baining","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":19,"corresponding_author_ids":["https://openalex.org/A5065622088"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8986999988555908,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8986999988555908,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10709","display_name":"Social Robot Interaction and HRI","score":0.03370000049471855,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11431","display_name":"Action Observation and Synchronization","score":0.006800000090152025,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.723800003528595},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.590399980545044},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.5740000009536743},{"id":"https://openalex.org/keywords/robotics","display_name":"Robotics","score":0.5503000020980835},{"id":"https://openalex.org/keywords/spatial-intelligence","display_name":"Spatial intelligence","score":0.5426999926567078},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.47540000081062317},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.4223000109195709},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.41670000553131104}],"concepts":[{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.723800003528595},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7099999785423279},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6459000110626221},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.590399980545044},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.5740000009536743},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.5503000020980835},{"id":"https://openalex.org/C155911833","wikidata":"https://www.wikidata.org/wiki/Q3817354","display_name":"Spatial intelligence","level":2,"score":0.5426999926567078},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.4828999936580658},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.47540000081062317},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.4223000109195709},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.41670000553131104},{"id":"https://openalex.org/C100609095","wikidata":"https://www.wikidata.org/wiki/Q1335050","display_name":"Embodied cognition","level":2,"score":0.41620001196861267},{"id":"https://openalex.org/C192327766","wikidata":"https://www.wikidata.org/wiki/Q1038799","display_name":"Cognitive robotics","level":3,"score":0.4065000116825104},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.39969998598098755},{"id":"https://openalex.org/C100776233","wikidata":"https://www.wikidata.org/wiki/Q2532492","display_name":"Bridge (graph theory)","level":2,"score":0.3774000108242035},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.35580000281333923},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.35569998621940613},{"id":"https://openalex.org/C2780385302","wikidata":"https://www.wikidata.org/wiki/Q367158","display_name":"Protocol (science)","level":3,"score":0.3531999886035919},{"id":"https://openalex.org/C159620131","wikidata":"https://www.wikidata.org/wiki/Q1938983","display_name":"Spatial analysis","level":2,"score":0.3393000066280365},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.3384999930858612},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2971000075340271},{"id":"https://openalex.org/C83725634","wikidata":"https://www.wikidata.org/wiki/Q7268699","display_name":"Qualitative reasoning","level":2,"score":0.2639000117778778},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.26330000162124634},{"id":"https://openalex.org/C115901376","wikidata":"https://www.wikidata.org/wiki/Q184199","display_name":"Automation","level":2,"score":0.25540000200271606}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2510.19400","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2510.19400","pdf_url":"https://arxiv.org/pdf/2510.19400","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2510.19400","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2510.19400","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2510.19400","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2510.19400","pdf_url":"https://arxiv.org/pdf/2510.19400","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4417276558.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Vision-language":[0],"models":[1,156],"(VLMs)":[2],"are":[3,53,186],"essential":[4],"to":[5,10,42,64,97,208,226],"Embodied":[6],"AI,":[7],"enabling":[8],"robots":[9],"perceive,":[11],"reason,":[12],"and":[13,67,127,141,182,193,233],"act":[14],"in":[15,56,106,168,189,210,229],"complex":[16],"environments.":[17],"They":[18],"also":[19,240],"serve":[20],"as":[21,59,222],"the":[22,25,48,99,163,211],"foundation":[23],"for":[24,78,245],"recent":[26],"Vision-Language-Action":[27],"(VLA)":[28],"models.":[29],"Yet":[30],"most":[31],"evaluations":[32],"of":[33,104,111,135],"VLMs":[34,71,105,166,232],"focus":[35],"on":[36,197],"single-view":[37,200],"settings,":[38],"leaving":[39],"their":[40],"ability":[41],"integrate":[43],"multi-view":[44,76,100,169,190,246],"information":[45],"underexplored.":[46],"At":[47],"same":[49],"time,":[50],"multi-camera":[51],"setups":[52],"increasingly":[54],"standard":[55],"robotic":[57,79,107,128,170,183,191,212],"platforms,":[58],"they":[60],"provide":[61],"complementary":[62],"perspectives":[63],"mitigate":[65],"occlusion":[66],"depth":[68],"ambiguity.":[69],"Whether":[70],"can":[72],"effectively":[73],"leverage":[74],"such":[75],"inputs":[77],"reasoning":[80,102],"therefore":[81],"remains":[82],"an":[83,223],"open":[84,224],"question.":[85],"To":[86],"bridge":[87],"this":[88],"gap,":[89],"we":[90],"introduce":[91],"MV-RoboBench,":[92],"a":[93,132,241],"benchmark":[94],"specifically":[95],"designed":[96],"evaluate":[98,131],"spatial":[101,125,180,201,213],"capabilities":[103],"manipulation.":[108],"MV-RoboBench":[109,221],"consists":[110],"1.7k":[112],"manually":[113],"curated":[114],"QA":[115],"items":[116],"across":[117],"eight":[118],"subtasks,":[119],"divided":[120],"into":[121],"two":[122,176],"primary":[123],"categories:":[124],"understanding":[126,202],"execution.":[129],"We":[130,219],"diverse":[133],"set":[134],"existing":[136,198],"VLMs,":[137],"including":[138],"both":[139],"open-source":[140],"closed-source":[142],"models,":[143],"along":[144],"with":[145],"enhanced":[146],"versions":[147],"incorporating":[148],"CoT-inspired":[149],"techniques.":[150],"The":[151],"results":[152],"show":[153],"that":[154],"state-of-the-art":[155],"remain":[157],"far":[158],"below":[159],"human":[160],"performance,":[161],"underscoring":[162],"substantial":[164],"challenges":[165],"face":[167],"perception.":[171],"Additionally,":[172],"our":[173,217],"analysis":[174],"uncovers":[175],"key":[177],"findings:":[178],"(i)":[179],"intelligence":[181],"task":[184],"execution":[185],"positively":[187],"correlated":[188],"scenarios;":[192],"(ii)":[194],"strong":[195],"performance":[196],"general-purpose":[199],"benchmarks":[203],"does":[204],"not":[205,236],"reliably":[206],"translate":[207],"success":[209],"tasks":[214],"assessed":[215],"by":[216],"benchmark.":[218],"release":[220],"resource":[225],"foster":[227],"progress":[228],"spatially":[230],"grounded":[231],"VLAs,":[234],"providing":[235],"only":[237],"data":[238],"but":[239],"standardized":[242],"evaluation":[243],"protocol":[244],"embodied":[247],"reasoning.":[248]},"counts_by_year":[],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-24T00:00:00"}
