This Trac instance is not used for development anymore!

We migrated our development workflow to git and Gitea.
To test the future redirection, replace trac by ariadne in the page URL.

Changeset 9580 for ps


Ignore:
Timestamp:
05/30/11 15:00:20 (14 years ago)
Author:
Jan Wassenberg
Message:

sync with work. simplify wnuma, refactor+fix topology (bugs: APIC IDs array didn't correspond to OS processor number; couldn't handle noncontiguous APIC ID field values)

Location:
ps/trunk/source/lib
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • ps/trunk/source/lib/file/io/io.h

    r9550 r9580  
    283283{
    284284    File file;
    285     WARN_RETURN_STATUS_IF_ERR(file.Open(pathname, O_WRONLY));
     285    int oflag = O_WRONLY;
     286    if(p.queueDepth != 1)
     287        oflag |= O_DIRECT;
     288    WARN_RETURN_STATUS_IF_ERR(file.Open(pathname, oflag));
    286289    io::Operation op(file, (void*)data, size);
    287290
     
    319322{
    320323    File file;
    321     RETURN_STATUS_IF_ERR(file.Open(pathname, O_RDONLY));
     324    int oflag = O_RDONLY;
     325    if(p.queueDepth != 1)
     326        oflag |= O_DIRECT;
     327    RETURN_STATUS_IF_ERR(file.Open(pathname, oflag));
    322328    io::Operation op(file, buf, size);
    323329    return io::Run(op, p, completedHook, issueHook);
  • ps/trunk/source/lib/sysdep/arch/x86_x64/topology.cpp

    r9423 r9580  
    1 /* Copyright (c) 2010 Wildfire Games
     1/* Copyright (c) 2011 Wildfire Games
    22 *
    33 * Permission is hereby granted, free of charge, to any person obtaining
     
    119119// APIC IDs
    120120
    121 // APIC IDs consist of variable-length fields identifying the logical unit,
    122 // core, package and shared cache. if they are available, we can determine
    123 // the exact topology; otherwise we have to guess.
    124 
    125 // APIC IDs should always be unique; if not (false is returned), then
    126 // something went wrong and the IDs shouldn't be used.
    127 // side effect: sorts IDs and `removes' duplicates.
    128 static bool AreApicIdsUnique(u8* apicIds, size_t numIds)
    129 {
    130     std::sort(apicIds, apicIds+numIds);
    131     u8* const end = std::unique(apicIds, apicIds+numIds);
    132     const size_t numUnique = end-apicIds;
    133     // all unique => IDs are valid.
    134     if(numUnique == numIds)
    135         return true;
    136 
    137     // all zero => the system lacks an xAPIC.
    138     if(numUnique == 1 && apicIds[0] == 0)
    139         return false;
    140 
    141     // duplicated IDs => something went wrong. for example, VMs might not
    142     // expose all physical processors, and OS X still doesn't support
    143     // thread affinity masks.
    144     return false;
    145 }
    146 
    147 static u8 apicIdStorage[os_cpu_MaxProcessors];
    148 static const u8* apicIds;   // = apicIdStorage, or 0 if IDs invalid
    149 
    150 static Status InitApicIds()
     121typedef u8 ApicId;
     122
     123// APIC IDs consist of variable-length bit fields indicating the logical,
     124// core, package and cache IDs. Vol3a says they aren't guaranteed to be
     125// contiguous, but that also applies to the individual fields.
     126// for example, quad-core E5630 CPUs report 4-bit core IDs 0, 1, 6, 7.
     127
     128// (IDs are indeterminate unless INFO::OK is returned)
     129static Status GetApicIds(ApicId* apicIds, ApicId* sortedApicIds, size_t numIds)
    151130{
    152131    struct StoreEachProcessorsApicId
    153132    {
    154         static void Callback(size_t processor, uintptr_t UNUSED(cbData))
    155         {
    156             apicIdStorage[processor] = x86_x64_ApicId();
     133        static void Callback(size_t processor, uintptr_t cbData)
     134        {
     135            ApicId* apicIds = (ApicId*)cbData;
     136            apicIds[processor] = x86_x64_ApicId();
    157137        }
    158138    };
    159     // (fails if the OS limits our process affinity)
    160     if(os_cpu_CallByEachCPU(StoreEachProcessorsApicId::Callback, (uintptr_t)&apicIds) == INFO::OK)
    161     {
    162         if(AreApicIdsUnique(apicIdStorage, os_cpu_NumProcessors()))
    163             apicIds = apicIdStorage;    // success, ApicIds will return this pointer
    164     }
     139    // (can fail due to restrictions on our process affinity or lack of
     140    // support for affinity masks in OS X.)
     141    RETURN_STATUS_IF_ERR(os_cpu_CallByEachCPU(StoreEachProcessorsApicId::Callback, (uintptr_t)apicIds));
     142
     143    std::copy(apicIds, apicIds+numIds, sortedApicIds);
     144    std::sort(sortedApicIds, sortedApicIds+numIds);
     145    ApicId* const end = std::unique(sortedApicIds, sortedApicIds+numIds);
     146    const size_t numUnique = end-sortedApicIds;
     147
     148    // all IDs are zero - system lacks an xAPIC.
     149    if(numUnique == 1 && sortedApicIds[0] == 0)
     150        return ERR::CPU_FEATURE_MISSING;    // NOWARN
     151
     152    // not all unique - probably running in a VM whose emulation is
     153    // imperfect or doesn't allow access to all processors.
     154    if(numUnique != numIds)
     155        return ERR::FAIL;   // NOWARN
    165156
    166157    return INFO::OK;
    167 }
    168 
    169 const u8* ApicIds()
    170 {
    171     static ModuleInitState initState;
    172     ModuleInit(&initState, InitApicIds);
    173     return apicIds;
    174 }
    175 
    176 
    177 size_t ProcessorFromApicId(size_t apicId)
    178 {
    179     const u8* apicIds = ApicIds();
    180     const u8* end = apicIds + os_cpu_NumProcessors();
    181     const u8* pos = std::find(apicIds, end, apicId);
    182     if(pos == end)
    183     {
    184         DEBUG_WARN_ERR(ERR::LOGIC);
    185         return 0;
    186     }
    187     return pos - apicIds;   // index
    188158}
    189159
     
    204174// CPU topology interface
    205175
    206 
    207176struct CpuTopology  // POD
    208177{
     178    size_t numProcessors;   // total reported by OS
     179    ApicId apicIds[os_cpu_MaxProcessors];
     180    ApicId sortedApicIds[os_cpu_MaxProcessors];
     181
    209182    ApicField logical;
    210183    ApicField core;
     
    221194static Status InitCpuTopology()
    222195{
     196    cpuTopology.numProcessors = os_cpu_NumProcessors();
     197
    223198    const size_t maxLogicalPerCore = MaxLogicalPerCore();
    224199    const size_t maxCoresPerPackage = MaxCoresPerPackage();
     
    237212    cpuTopology.package.shift = logicalWidth + coreWidth;
    238213
    239     const u8* apicIds = ApicIds();
    240     if(apicIds)
     214    if(GetApicIds(cpuTopology.apicIds, cpuTopology.sortedApicIds, cpuTopology.numProcessors) == INFO::OK)
    241215    {
    242216        struct NumUniqueValuesInField
    243217        {
    244             size_t operator()(const u8* apicIds, const ApicField& apicField) const
     218            size_t operator()(const ApicId* apicIds, const ApicField& apicField) const
    245219            {
    246                 std::set<size_t> values;
     220                std::bitset<os_cpu_MaxProcessors> values;
    247221                for(size_t processor = 0; processor < os_cpu_NumProcessors(); processor++)
    248222                {
    249223                    const size_t value = apicField(apicIds[processor]);
    250                     values.insert(value);
     224                    values.set(value);
    251225                }
    252                 return values.size();
     226                return values.count();
    253227            }
    254228        };
    255229
    256         cpuTopology.logicalPerCore  = NumUniqueValuesInField()(apicIds, cpuTopology.logical);
    257         cpuTopology.coresPerPackage = NumUniqueValuesInField()(apicIds, cpuTopology.core);
    258         cpuTopology.numPackages     = NumUniqueValuesInField()(apicIds, cpuTopology.package);
    259     }
    260     else // the processor lacks an xAPIC, or the IDs are invalid
     230        cpuTopology.logicalPerCore  = NumUniqueValuesInField()(cpuTopology.apicIds, cpuTopology.logical);
     231        cpuTopology.coresPerPackage = NumUniqueValuesInField()(cpuTopology.apicIds, cpuTopology.core);
     232        cpuTopology.numPackages     = NumUniqueValuesInField()(cpuTopology.apicIds, cpuTopology.package);
     233    }
     234    else // processor lacks an xAPIC, or IDs are invalid
    261235    {
    262236        struct MinPackages
     
    277251        // maximum feasible number of cores and minimal number of packages:
    278252        const size_t minPackages = MinPackages()(maxCoresPerPackage, maxLogicalPerCore);
    279         const size_t numProcessors = os_cpu_NumProcessors();
    280         for(size_t numPackages = minPackages; numPackages <= numProcessors; numPackages++)
    281         {
    282             if(numProcessors % numPackages != 0)
     253        for(size_t numPackages = minPackages; numPackages <= cpuTopology.numProcessors; numPackages++)
     254        {
     255            if(cpuTopology.numProcessors % numPackages != 0)
    283256                continue;
    284             const size_t logicalPerPackage = numProcessors / numPackages;
     257            const size_t logicalPerPackage = cpuTopology.numProcessors / numPackages;
    285258            const size_t minCoresPerPackage = DivideRoundUp(logicalPerPackage, maxLogicalPerCore);
    286259            for(size_t coresPerPackage = maxCoresPerPackage; coresPerPackage >= minCoresPerPackage; coresPerPackage--)
     
    291264                if(logicalPerCore <= maxLogicalPerCore)
    292265                {
    293                     ENSURE(numProcessors == numPackages*coresPerPackage*logicalPerCore);
     266                    ENSURE(cpuTopology.numProcessors == numPackages*coresPerPackage*logicalPerCore);
    294267                    cpuTopology.logicalPerCore = logicalPerCore;
    295268                    cpuTopology.coresPerPackage = coresPerPackage;
    296269                    cpuTopology.numPackages = numPackages;
     270
     271                    // generate fake but legitimate APIC IDs
     272                    for(size_t processor = 0; processor < cpuTopology.numProcessors; processor++)
     273                        cpuTopology.apicIds[processor] = cpuTopology.sortedApicIds[processor] = processor;
    297274                    return INFO::OK;
    298275                }
     
    306283}
    307284
     285
    308286size_t cpu_topology_NumPackages()
    309287{
     
    324302}
    325303
     304
     305static size_t IndexFromApicId(const ApicId* apicIds, size_t apicId)
     306{
     307    ModuleInit(&cpuInitState, InitCpuTopology);
     308
     309    const ApicId* end = apicIds + cpuTopology.numProcessors;
     310    const ApicId* pos = std::find(apicIds, end, apicId);
     311    if(pos == end)
     312    {
     313        DEBUG_WARN_ERR(ERR::LOGIC);
     314        return 0;
     315    }
     316
     317    const size_t index = pos - apicIds;
     318    return index;
     319}
     320
     321
     322size_t cpu_topology_ProcessorFromApicId(size_t apicId)
     323{
     324    return IndexFromApicId(cpuTopology.apicIds, apicId);
     325}
     326
    326327size_t cpu_topology_LogicalFromApicId(size_t apicId)
    327328{
     329    const size_t contiguousId = IndexFromApicId(cpuTopology.sortedApicIds, apicId);
     330    return cpuTopology.logical(contiguousId);
     331}
     332
     333size_t cpu_topology_CoreFromApicId(size_t apicId)
     334{
     335    const size_t contiguousId = IndexFromApicId(cpuTopology.sortedApicIds, apicId);
     336    return cpuTopology.core(contiguousId);
     337}
     338
     339size_t cpu_topology_PackageFromApicId(size_t apicId)
     340{
     341    const size_t contiguousId = IndexFromApicId(cpuTopology.sortedApicIds, apicId);
     342    return cpuTopology.package(contiguousId);
     343}
     344
     345
     346size_t cpu_topology_ApicId(size_t idxLogical, size_t idxCore, size_t idxPackage)
     347{
    328348    ModuleInit(&cpuInitState, InitCpuTopology);
    329     return cpuTopology.logical(apicId);
    330 }
    331 
    332 size_t cpu_topology_CoreFromApicId(size_t apicId)
    333 {
    334     ModuleInit(&cpuInitState, InitCpuTopology);
    335     return cpuTopology.core(apicId);
    336 }
    337 
    338 size_t cpu_topology_PackageFromApicId(size_t apicId)
    339 {
    340     ModuleInit(&cpuInitState, InitCpuTopology);
    341     return cpuTopology.package(apicId);
    342 }
    343 
    344 size_t cpu_topology_ApicId(size_t idxLogical, size_t idxCore, size_t idxPackage)
    345 {
    346     ModuleInit(&cpuInitState, InitCpuTopology);
    347 
    348     // NB: APIC IDs aren't guaranteed to be contiguous;
    349     // quad-core E5630 CPUs report 4-bit core IDs 0, 1, 6, 7.
    350     // we therefore compute an index into the sorted ApicIds array.
    351 
    352     size_t idx = 0;
     349
     350    size_t contiguousId = 0;
    353351    ENSURE(idxPackage < cpuTopology.numPackages);
    354     idx += idxPackage;
    355 
    356     idx *= cpuTopology.coresPerPackage;
     352    contiguousId += idxPackage;
     353
     354    contiguousId *= cpuTopology.coresPerPackage;
    357355    ENSURE(idxCore < cpuTopology.coresPerPackage);
    358     idx += idxCore;
    359 
    360     idx *= cpuTopology.logicalPerCore;
     356    contiguousId += idxCore;
     357
     358    contiguousId *= cpuTopology.logicalPerCore;
    361359    ENSURE(idxLogical < cpuTopology.logicalPerCore);
    362     idx += idxLogical;
    363 
    364     ENSURE(idx < os_cpu_NumProcessors());
    365     const size_t apicId = ApicIds()[idx];
    366     return apicId;
     360    contiguousId += idxLogical;
     361
     362    ENSURE(contiguousId < cpuTopology.numProcessors);
     363    return cpuTopology.sortedApicIds[contiguousId];
    367364}
    368365
     
    419416        }
    420417
    421         bool Matches(u8 id) const
    422         {
    423             return m_cacheId == id;
     418        bool Matches(u8 cacheId) const
     419        {
     420            return m_cacheId == cacheId;
    424421        }
    425422
     
    453450};
    454451
    455 static void DetermineCachesProcessorMask(const u8* apicIds, uintptr_t* cachesProcessorMask, size_t& numCaches)
     452static void DetermineCachesProcessorMask(const ApicId* apicIds, uintptr_t* cachesProcessorMask, size_t& numCaches)
    456453{
    457454    CacheRelations cacheRelations;
     
    462459        for(size_t processor = 0; processor < os_cpu_NumProcessors(); processor++)
    463460        {
    464             const u8 apicId = apicIds[processor];
     461            const ApicId apicId = apicIds[processor];
    465462            const u8 cacheId = u8(apicId & cacheIdMask);
    466463            cacheRelations.Add(cacheId, processor);
     
    514511static Status InitCacheTopology()
    515512{
    516     const u8* apicIds = ApicIds();
    517     DetermineCachesProcessorMask(apicIds, cacheTopology.cachesProcessorMask, cacheTopology.numCaches);
     513    ModuleInit(&cpuInitState, InitCpuTopology);
     514    DetermineCachesProcessorMask(cpuTopology.apicIds, cacheTopology.cachesProcessorMask, cacheTopology.numCaches);
    518515    DetermineProcessorsCache(cacheTopology.cachesProcessorMask, cacheTopology.numCaches, cacheTopology.processorsCache, os_cpu_NumProcessors());
    519516    return INFO::OK;
  • ps/trunk/source/lib/sysdep/arch/x86_x64/topology.h

    r8922 r9580  
    1 /* Copyright (c) 2010 Wildfire Games
     1/* Copyright (c) 2011 Wildfire Games
    22 *
    33 * Permission is hereby granted, free of charge, to any person obtaining
     
    2929#define INCLUDED_TOPOLOGY
    3030
    31 /**
    32  * @return pointer to an array (up to os_cpu_MaxProcessors entries;
    33  * os_cpu_NumProcessors() of them are valid) of the processors'
    34  * unique, strictly monotonically increasing APIC IDs --
    35  * or zero if no xAPIC is present or process affinity is restricted.
    36  **/
    37 LIB_API const u8* ApicIds();
    38 
    39 LIB_API size_t ProcessorFromApicId(size_t apicId);
    40 
    41 
    4231//-----------------------------------------------------------------------------
    4332// cpu
    4433
    45 // the CPU topology, i.e. how many packages, cores and SMT units are
    46 // actually present and enabled, is useful for detecting SMP systems,
    47 // predicting performance and dimensioning thread pools.
     34// the CPU topology, i.e. how many packages, cores and logical processors are
     35// actually present and enabled, is useful for parameterizing parallel
     36// algorithms, especially on NUMA systems.
    4837//
    4938// note: OS abstractions usually only mention "processors", which could be
     
    6251
    6352/**
    64  * @return number of *enabled* hyperthreading units per core.
    65  * (2 on P4 EE)
     53 * @return number of *enabled* logical processors (aka Hyperthreads)
     54 * per core. (2 on P4 EE)
    6655 **/
    6756LIB_API size_t cpu_topology_LogicalPerCore();
    6857
    69 
     58LIB_API size_t cpu_topology_ProcessorFromApicId(size_t apicId);
    7059LIB_API size_t cpu_topology_PackageFromApicId(size_t apicId);
    7160LIB_API size_t cpu_topology_CoreFromApicId(size_t apicId);
     
    9180
    9281/**
    93  * @return L2 cache number (zero-based) to which \<processor\> belongs.
     82 * @return L2 cache number (zero-based) to which the given processor belongs.
    9483 **/
    9584LIB_API size_t cache_topology_CacheFromProcessor(size_t processor);
    9685
    9786/**
    98  * @return bit-mask of all processors sharing \<cache\>.
     87 * @return bit-mask of all processors sharing the given cache.
    9988 **/
    10089LIB_API uintptr_t cache_topology_ProcessorMaskFromCache(size_t cache);
  • ps/trunk/source/lib/sysdep/os/win/wnuma.cpp

    r9545 r9580  
    8686
    8787
    88 // cached results of FindNodeWithProcessor for each processor
    89 static size_t processorsNode[os_cpu_MaxProcessors];
    90 
    91 static void FillProcessorsNode()
    92 {
    93     for(size_t processor = 0; processor < os_cpu_NumProcessors(); processor++)
    94     {
    95         Node* node = FindNodeWithProcessor(processor);
    96         if(node)
    97             processorsNode[processor] = node-nodes;
    98         else
    99             DEBUG_WARN_ERR(ERR::LOGIC);
    100     }
    101 }
    102 
    103 
    10488//-----------------------------------------------------------------------------
    10589// Windows topology
     
    229213}
    230214
    231 static void PopulateProcessorMaskFromApicId(u32 apicId, uintptr_t& processorMask)
    232 {
    233     const u8* apicIds = ApicIds();
    234     for(size_t processor = 0; processor < os_cpu_NumProcessors(); processor++)
    235     {
    236         if(apicIds[processor] == apicId)
    237         {
    238             processorMask |= Bit<uintptr_t>(processor);
    239             return;
    240         }
    241     }
    242 
    243     DEBUG_WARN_ERR(ERR::LOGIC); // APIC ID not found
    244 }
    245 
    246215struct ProximityDomain
    247216{
     
    263232        if(affinityAPIC)
    264233        {
     234            const size_t processor = cpu_topology_ProcessorFromApicId(affinityAPIC->apicId);
    265235            const u32 proximityDomainNumber = affinityAPIC->ProximityDomainNumber();
    266236            ProximityDomain& proximityDomain = proximityDomains[proximityDomainNumber];
    267             PopulateProcessorMaskFromApicId(affinityAPIC->apicId, proximityDomain.processorMask);
     237            proximityDomain.processorMask |= Bit<uintptr_t>(processor);
    268238        }
    269239    }
     
    282252        if(!node)
    283253            node = AddNode();
     254        // (we don't know Windows' nodeNumber; it has hopefully already been set)
    284255        node->proximityDomainNumber = proximityDomainNumber;
    285256        node->processorMask = proximityDomain.processorMask;
     
    317288    }
    318289
    319     FillProcessorsNode();
    320290    return INFO::OK;
    321291}
     
    331301    (void)ModuleInit(&initState, InitTopology);
    332302    ENSURE(processor < os_cpu_NumProcessors());
    333     return processorsNode[processor];
     303    Node* node = FindNodeWithProcessor(processor);
     304    ENSURE(node);
     305    return nodes-node;
    334306}
    335307
Note: See TracChangeset for help on using the changeset viewer.