- Timestamp:
- 05/30/11 15:00:20 (14 years ago)
- Location:
- ps/trunk/source/lib
- Files:
-
- 4 edited
-
file/io/io.h (modified) (2 diffs)
-
sysdep/arch/x86_x64/topology.cpp (modified) (13 diffs)
-
sysdep/arch/x86_x64/topology.h (modified) (4 diffs)
-
sysdep/os/win/wnuma.cpp (modified) (6 diffs)
Legend:
- Unmodified
- Added
- Removed
-
ps/trunk/source/lib/file/io/io.h
r9550 r9580 283 283 { 284 284 File file; 285 WARN_RETURN_STATUS_IF_ERR(file.Open(pathname, O_WRONLY)); 285 int oflag = O_WRONLY; 286 if(p.queueDepth != 1) 287 oflag |= O_DIRECT; 288 WARN_RETURN_STATUS_IF_ERR(file.Open(pathname, oflag)); 286 289 io::Operation op(file, (void*)data, size); 287 290 … … 319 322 { 320 323 File file; 321 RETURN_STATUS_IF_ERR(file.Open(pathname, O_RDONLY)); 324 int oflag = O_RDONLY; 325 if(p.queueDepth != 1) 326 oflag |= O_DIRECT; 327 RETURN_STATUS_IF_ERR(file.Open(pathname, oflag)); 322 328 io::Operation op(file, buf, size); 323 329 return io::Run(op, p, completedHook, issueHook); -
ps/trunk/source/lib/sysdep/arch/x86_x64/topology.cpp
r9423 r9580 1 /* Copyright (c) 201 0Wildfire Games1 /* Copyright (c) 2011 Wildfire Games 2 2 * 3 3 * Permission is hereby granted, free of charge, to any person obtaining … … 119 119 // APIC IDs 120 120 121 // APIC IDs consist of variable-length fields identifying the logical unit, 122 // core, package and shared cache. if they are available, we can determine 123 // the exact topology; otherwise we have to guess. 124 125 // APIC IDs should always be unique; if not (false is returned), then 126 // something went wrong and the IDs shouldn't be used. 127 // side effect: sorts IDs and `removes' duplicates. 128 static bool AreApicIdsUnique(u8* apicIds, size_t numIds) 129 { 130 std::sort(apicIds, apicIds+numIds); 131 u8* const end = std::unique(apicIds, apicIds+numIds); 132 const size_t numUnique = end-apicIds; 133 // all unique => IDs are valid. 134 if(numUnique == numIds) 135 return true; 136 137 // all zero => the system lacks an xAPIC. 138 if(numUnique == 1 && apicIds[0] == 0) 139 return false; 140 141 // duplicated IDs => something went wrong. for example, VMs might not 142 // expose all physical processors, and OS X still doesn't support 143 // thread affinity masks. 144 return false; 145 } 146 147 static u8 apicIdStorage[os_cpu_MaxProcessors]; 148 static const u8* apicIds; // = apicIdStorage, or 0 if IDs invalid 149 150 static Status InitApicIds() 121 typedef u8 ApicId; 122 123 // APIC IDs consist of variable-length bit fields indicating the logical, 124 // core, package and cache IDs. Vol3a says they aren't guaranteed to be 125 // contiguous, but that also applies to the individual fields. 126 // for example, quad-core E5630 CPUs report 4-bit core IDs 0, 1, 6, 7. 127 128 // (IDs are indeterminate unless INFO::OK is returned) 129 static Status GetApicIds(ApicId* apicIds, ApicId* sortedApicIds, size_t numIds) 151 130 { 152 131 struct StoreEachProcessorsApicId 153 132 { 154 static void Callback(size_t processor, uintptr_t UNUSED(cbData)) 155 { 156 apicIdStorage[processor] = x86_x64_ApicId(); 133 static void Callback(size_t processor, uintptr_t cbData) 134 { 135 ApicId* apicIds = (ApicId*)cbData; 136 apicIds[processor] = x86_x64_ApicId(); 157 137 } 158 138 }; 159 // (fails if the OS limits our process affinity) 160 if(os_cpu_CallByEachCPU(StoreEachProcessorsApicId::Callback, (uintptr_t)&apicIds) == INFO::OK) 161 { 162 if(AreApicIdsUnique(apicIdStorage, os_cpu_NumProcessors())) 163 apicIds = apicIdStorage; // success, ApicIds will return this pointer 164 } 139 // (can fail due to restrictions on our process affinity or lack of 140 // support for affinity masks in OS X.) 141 RETURN_STATUS_IF_ERR(os_cpu_CallByEachCPU(StoreEachProcessorsApicId::Callback, (uintptr_t)apicIds)); 142 143 std::copy(apicIds, apicIds+numIds, sortedApicIds); 144 std::sort(sortedApicIds, sortedApicIds+numIds); 145 ApicId* const end = std::unique(sortedApicIds, sortedApicIds+numIds); 146 const size_t numUnique = end-sortedApicIds; 147 148 // all IDs are zero - system lacks an xAPIC. 149 if(numUnique == 1 && sortedApicIds[0] == 0) 150 return ERR::CPU_FEATURE_MISSING; // NOWARN 151 152 // not all unique - probably running in a VM whose emulation is 153 // imperfect or doesn't allow access to all processors. 154 if(numUnique != numIds) 155 return ERR::FAIL; // NOWARN 165 156 166 157 return INFO::OK; 167 }168 169 const u8* ApicIds()170 {171 static ModuleInitState initState;172 ModuleInit(&initState, InitApicIds);173 return apicIds;174 }175 176 177 size_t ProcessorFromApicId(size_t apicId)178 {179 const u8* apicIds = ApicIds();180 const u8* end = apicIds + os_cpu_NumProcessors();181 const u8* pos = std::find(apicIds, end, apicId);182 if(pos == end)183 {184 DEBUG_WARN_ERR(ERR::LOGIC);185 return 0;186 }187 return pos - apicIds; // index188 158 } 189 159 … … 204 174 // CPU topology interface 205 175 206 207 176 struct CpuTopology // POD 208 177 { 178 size_t numProcessors; // total reported by OS 179 ApicId apicIds[os_cpu_MaxProcessors]; 180 ApicId sortedApicIds[os_cpu_MaxProcessors]; 181 209 182 ApicField logical; 210 183 ApicField core; … … 221 194 static Status InitCpuTopology() 222 195 { 196 cpuTopology.numProcessors = os_cpu_NumProcessors(); 197 223 198 const size_t maxLogicalPerCore = MaxLogicalPerCore(); 224 199 const size_t maxCoresPerPackage = MaxCoresPerPackage(); … … 237 212 cpuTopology.package.shift = logicalWidth + coreWidth; 238 213 239 const u8* apicIds = ApicIds(); 240 if(apicIds) 214 if(GetApicIds(cpuTopology.apicIds, cpuTopology.sortedApicIds, cpuTopology.numProcessors) == INFO::OK) 241 215 { 242 216 struct NumUniqueValuesInField 243 217 { 244 size_t operator()(const u8* apicIds, const ApicField& apicField) const218 size_t operator()(const ApicId* apicIds, const ApicField& apicField) const 245 219 { 246 std:: set<size_t> values;220 std::bitset<os_cpu_MaxProcessors> values; 247 221 for(size_t processor = 0; processor < os_cpu_NumProcessors(); processor++) 248 222 { 249 223 const size_t value = apicField(apicIds[processor]); 250 values. insert(value);224 values.set(value); 251 225 } 252 return values. size();226 return values.count(); 253 227 } 254 228 }; 255 229 256 cpuTopology.logicalPerCore = NumUniqueValuesInField()( apicIds, cpuTopology.logical);257 cpuTopology.coresPerPackage = NumUniqueValuesInField()( apicIds, cpuTopology.core);258 cpuTopology.numPackages = NumUniqueValuesInField()( apicIds, cpuTopology.package);259 } 260 else // the processor lacks an xAPIC, or theIDs are invalid230 cpuTopology.logicalPerCore = NumUniqueValuesInField()(cpuTopology.apicIds, cpuTopology.logical); 231 cpuTopology.coresPerPackage = NumUniqueValuesInField()(cpuTopology.apicIds, cpuTopology.core); 232 cpuTopology.numPackages = NumUniqueValuesInField()(cpuTopology.apicIds, cpuTopology.package); 233 } 234 else // processor lacks an xAPIC, or IDs are invalid 261 235 { 262 236 struct MinPackages … … 277 251 // maximum feasible number of cores and minimal number of packages: 278 252 const size_t minPackages = MinPackages()(maxCoresPerPackage, maxLogicalPerCore); 279 const size_t numProcessors = os_cpu_NumProcessors(); 280 for(size_t numPackages = minPackages; numPackages <= numProcessors; numPackages++) 281 { 282 if(numProcessors % numPackages != 0) 253 for(size_t numPackages = minPackages; numPackages <= cpuTopology.numProcessors; numPackages++) 254 { 255 if(cpuTopology.numProcessors % numPackages != 0) 283 256 continue; 284 const size_t logicalPerPackage = numProcessors / numPackages;257 const size_t logicalPerPackage = cpuTopology.numProcessors / numPackages; 285 258 const size_t minCoresPerPackage = DivideRoundUp(logicalPerPackage, maxLogicalPerCore); 286 259 for(size_t coresPerPackage = maxCoresPerPackage; coresPerPackage >= minCoresPerPackage; coresPerPackage--) … … 291 264 if(logicalPerCore <= maxLogicalPerCore) 292 265 { 293 ENSURE( numProcessors == numPackages*coresPerPackage*logicalPerCore);266 ENSURE(cpuTopology.numProcessors == numPackages*coresPerPackage*logicalPerCore); 294 267 cpuTopology.logicalPerCore = logicalPerCore; 295 268 cpuTopology.coresPerPackage = coresPerPackage; 296 269 cpuTopology.numPackages = numPackages; 270 271 // generate fake but legitimate APIC IDs 272 for(size_t processor = 0; processor < cpuTopology.numProcessors; processor++) 273 cpuTopology.apicIds[processor] = cpuTopology.sortedApicIds[processor] = processor; 297 274 return INFO::OK; 298 275 } … … 306 283 } 307 284 285 308 286 size_t cpu_topology_NumPackages() 309 287 { … … 324 302 } 325 303 304 305 static size_t IndexFromApicId(const ApicId* apicIds, size_t apicId) 306 { 307 ModuleInit(&cpuInitState, InitCpuTopology); 308 309 const ApicId* end = apicIds + cpuTopology.numProcessors; 310 const ApicId* pos = std::find(apicIds, end, apicId); 311 if(pos == end) 312 { 313 DEBUG_WARN_ERR(ERR::LOGIC); 314 return 0; 315 } 316 317 const size_t index = pos - apicIds; 318 return index; 319 } 320 321 322 size_t cpu_topology_ProcessorFromApicId(size_t apicId) 323 { 324 return IndexFromApicId(cpuTopology.apicIds, apicId); 325 } 326 326 327 size_t cpu_topology_LogicalFromApicId(size_t apicId) 327 328 { 329 const size_t contiguousId = IndexFromApicId(cpuTopology.sortedApicIds, apicId); 330 return cpuTopology.logical(contiguousId); 331 } 332 333 size_t cpu_topology_CoreFromApicId(size_t apicId) 334 { 335 const size_t contiguousId = IndexFromApicId(cpuTopology.sortedApicIds, apicId); 336 return cpuTopology.core(contiguousId); 337 } 338 339 size_t cpu_topology_PackageFromApicId(size_t apicId) 340 { 341 const size_t contiguousId = IndexFromApicId(cpuTopology.sortedApicIds, apicId); 342 return cpuTopology.package(contiguousId); 343 } 344 345 346 size_t cpu_topology_ApicId(size_t idxLogical, size_t idxCore, size_t idxPackage) 347 { 328 348 ModuleInit(&cpuInitState, InitCpuTopology); 329 return cpuTopology.logical(apicId); 330 } 331 332 size_t cpu_topology_CoreFromApicId(size_t apicId) 333 { 334 ModuleInit(&cpuInitState, InitCpuTopology); 335 return cpuTopology.core(apicId); 336 } 337 338 size_t cpu_topology_PackageFromApicId(size_t apicId) 339 { 340 ModuleInit(&cpuInitState, InitCpuTopology); 341 return cpuTopology.package(apicId); 342 } 343 344 size_t cpu_topology_ApicId(size_t idxLogical, size_t idxCore, size_t idxPackage) 345 { 346 ModuleInit(&cpuInitState, InitCpuTopology); 347 348 // NB: APIC IDs aren't guaranteed to be contiguous; 349 // quad-core E5630 CPUs report 4-bit core IDs 0, 1, 6, 7. 350 // we therefore compute an index into the sorted ApicIds array. 351 352 size_t idx = 0; 349 350 size_t contiguousId = 0; 353 351 ENSURE(idxPackage < cpuTopology.numPackages); 354 idx+= idxPackage;355 356 idx*= cpuTopology.coresPerPackage;352 contiguousId += idxPackage; 353 354 contiguousId *= cpuTopology.coresPerPackage; 357 355 ENSURE(idxCore < cpuTopology.coresPerPackage); 358 idx+= idxCore;359 360 idx*= cpuTopology.logicalPerCore;356 contiguousId += idxCore; 357 358 contiguousId *= cpuTopology.logicalPerCore; 361 359 ENSURE(idxLogical < cpuTopology.logicalPerCore); 362 idx += idxLogical; 363 364 ENSURE(idx < os_cpu_NumProcessors()); 365 const size_t apicId = ApicIds()[idx]; 366 return apicId; 360 contiguousId += idxLogical; 361 362 ENSURE(contiguousId < cpuTopology.numProcessors); 363 return cpuTopology.sortedApicIds[contiguousId]; 367 364 } 368 365 … … 419 416 } 420 417 421 bool Matches(u8 id) const422 { 423 return m_cacheId == id;418 bool Matches(u8 cacheId) const 419 { 420 return m_cacheId == cacheId; 424 421 } 425 422 … … 453 450 }; 454 451 455 static void DetermineCachesProcessorMask(const u8* apicIds, uintptr_t* cachesProcessorMask, size_t& numCaches)452 static void DetermineCachesProcessorMask(const ApicId* apicIds, uintptr_t* cachesProcessorMask, size_t& numCaches) 456 453 { 457 454 CacheRelations cacheRelations; … … 462 459 for(size_t processor = 0; processor < os_cpu_NumProcessors(); processor++) 463 460 { 464 const u8apicId = apicIds[processor];461 const ApicId apicId = apicIds[processor]; 465 462 const u8 cacheId = u8(apicId & cacheIdMask); 466 463 cacheRelations.Add(cacheId, processor); … … 514 511 static Status InitCacheTopology() 515 512 { 516 const u8* apicIds = ApicIds();517 DetermineCachesProcessorMask( apicIds, cacheTopology.cachesProcessorMask, cacheTopology.numCaches);513 ModuleInit(&cpuInitState, InitCpuTopology); 514 DetermineCachesProcessorMask(cpuTopology.apicIds, cacheTopology.cachesProcessorMask, cacheTopology.numCaches); 518 515 DetermineProcessorsCache(cacheTopology.cachesProcessorMask, cacheTopology.numCaches, cacheTopology.processorsCache, os_cpu_NumProcessors()); 519 516 return INFO::OK; -
ps/trunk/source/lib/sysdep/arch/x86_x64/topology.h
r8922 r9580 1 /* Copyright (c) 201 0Wildfire Games1 /* Copyright (c) 2011 Wildfire Games 2 2 * 3 3 * Permission is hereby granted, free of charge, to any person obtaining … … 29 29 #define INCLUDED_TOPOLOGY 30 30 31 /**32 * @return pointer to an array (up to os_cpu_MaxProcessors entries;33 * os_cpu_NumProcessors() of them are valid) of the processors'34 * unique, strictly monotonically increasing APIC IDs --35 * or zero if no xAPIC is present or process affinity is restricted.36 **/37 LIB_API const u8* ApicIds();38 39 LIB_API size_t ProcessorFromApicId(size_t apicId);40 41 42 31 //----------------------------------------------------------------------------- 43 32 // cpu 44 33 45 // the CPU topology, i.e. how many packages, cores and SMT units are46 // actually present and enabled, is useful for detecting SMP systems,47 // predicting performance and dimensioning thread pools.34 // the CPU topology, i.e. how many packages, cores and logical processors are 35 // actually present and enabled, is useful for parameterizing parallel 36 // algorithms, especially on NUMA systems. 48 37 // 49 38 // note: OS abstractions usually only mention "processors", which could be … … 62 51 63 52 /** 64 * @return number of *enabled* hyperthreading units per core.65 * (2 on P4 EE)53 * @return number of *enabled* logical processors (aka Hyperthreads) 54 * per core. (2 on P4 EE) 66 55 **/ 67 56 LIB_API size_t cpu_topology_LogicalPerCore(); 68 57 69 58 LIB_API size_t cpu_topology_ProcessorFromApicId(size_t apicId); 70 59 LIB_API size_t cpu_topology_PackageFromApicId(size_t apicId); 71 60 LIB_API size_t cpu_topology_CoreFromApicId(size_t apicId); … … 91 80 92 81 /** 93 * @return L2 cache number (zero-based) to which \<processor\>belongs.82 * @return L2 cache number (zero-based) to which the given processor belongs. 94 83 **/ 95 84 LIB_API size_t cache_topology_CacheFromProcessor(size_t processor); 96 85 97 86 /** 98 * @return bit-mask of all processors sharing \<cache\>.87 * @return bit-mask of all processors sharing the given cache. 99 88 **/ 100 89 LIB_API uintptr_t cache_topology_ProcessorMaskFromCache(size_t cache); -
ps/trunk/source/lib/sysdep/os/win/wnuma.cpp
r9545 r9580 86 86 87 87 88 // cached results of FindNodeWithProcessor for each processor89 static size_t processorsNode[os_cpu_MaxProcessors];90 91 static void FillProcessorsNode()92 {93 for(size_t processor = 0; processor < os_cpu_NumProcessors(); processor++)94 {95 Node* node = FindNodeWithProcessor(processor);96 if(node)97 processorsNode[processor] = node-nodes;98 else99 DEBUG_WARN_ERR(ERR::LOGIC);100 }101 }102 103 104 88 //----------------------------------------------------------------------------- 105 89 // Windows topology … … 229 213 } 230 214 231 static void PopulateProcessorMaskFromApicId(u32 apicId, uintptr_t& processorMask)232 {233 const u8* apicIds = ApicIds();234 for(size_t processor = 0; processor < os_cpu_NumProcessors(); processor++)235 {236 if(apicIds[processor] == apicId)237 {238 processorMask |= Bit<uintptr_t>(processor);239 return;240 }241 }242 243 DEBUG_WARN_ERR(ERR::LOGIC); // APIC ID not found244 }245 246 215 struct ProximityDomain 247 216 { … … 263 232 if(affinityAPIC) 264 233 { 234 const size_t processor = cpu_topology_ProcessorFromApicId(affinityAPIC->apicId); 265 235 const u32 proximityDomainNumber = affinityAPIC->ProximityDomainNumber(); 266 236 ProximityDomain& proximityDomain = proximityDomains[proximityDomainNumber]; 267 PopulateProcessorMaskFromApicId(affinityAPIC->apicId, proximityDomain.processorMask);237 proximityDomain.processorMask |= Bit<uintptr_t>(processor); 268 238 } 269 239 } … … 282 252 if(!node) 283 253 node = AddNode(); 254 // (we don't know Windows' nodeNumber; it has hopefully already been set) 284 255 node->proximityDomainNumber = proximityDomainNumber; 285 256 node->processorMask = proximityDomain.processorMask; … … 317 288 } 318 289 319 FillProcessorsNode();320 290 return INFO::OK; 321 291 } … … 331 301 (void)ModuleInit(&initState, InitTopology); 332 302 ENSURE(processor < os_cpu_NumProcessors()); 333 return processorsNode[processor]; 303 Node* node = FindNodeWithProcessor(processor); 304 ENSURE(node); 305 return nodes-node; 334 306 } 335 307
Note:
See TracChangeset
for help on using the changeset viewer.
