Extracting structure information from pdbs of unloaded modules
Question
I'm trying to write a WinDbg debugger extension that works on both live remote targets and crash dumps. This extension analyzes an opaque block of memory by walking down it via structure offsets and casting different regions of it to known objects.
The structures change fields/field ordering between versions, so I can't hard code it (or include the headers) in my debugger extension itself. Instead I'd like to extract the structure information from the pdbs for which I have private symbols.
When using this on a live target where the pdb/image is in the loaded module list, this works great, and I can use functions like GetFieldOffset to get a field in a structure in a class.
GetFieldOffset("MyClass!MyNestedClass", "m_Struct", &offsetInClass);
GetFieldData(offsetInClass + classAddr, "MyClass!_MY_STRUCT", "FieldInStruct",
sizeof(ULONG), &myFieldValue);
My problem: When I don't have the module in the loaded module list (either in the wrong context, or analyzing a crash dump), I can't use the above functions.
At the beginning of the memory region I'm analyzing, I've stored the pdb GUID and age. Using that I can find the path to my pdb in my symbol path/symbol cache using SymFindFileInPath.
char symbolPath[MAX_SYMBOL_PATH] = "";
PSTR pdbPath = NULL;
hr = ExtSymbols->lpVtbl->GetSymbolPath(ExtSymbols,
symbolPath,
sizeof(symbolPath),
NULL);
SymSetOptions(SYMOPT_IGNORE_CVREC | SYMOPT_FAIL_CRITICAL_ERRORS |
SYMOPT_CASE_INSENSITIVE);
result = SymFindFileInPath(hSymbols,
symbolPath,
Name,
&GUID,
Age,
0,
SSRVOPT_GUIDPTR,
pdbPath,
NULL,
NULL);
So I have the path to my specific pdb instance, but I'm not sure where to go from here. Looking through the Sym* functions exposed by DbgHelp.dll, I don't see any obvious way to use this pdb file to get type information. Functions such as SymGetTypeInfo require a module base, and my module is not and cannot be loaded. All I need is the byte offset of fields within a structure. Any ideas?
Thanks!
Solution
The POC CODE
below show how to extract TypeInfo from a pdb using dia sdk
//fragile code handle with care
#include "typefrompdb.h"
int main(int argc, char* argv[]) {
USAGE;
swprintf(pdb, MAX_PATH,L"%S",argv[1]);
swprintf(type, MAX_PATH,L"%S",argv[2]);
result = CoInitialize(NULL);
result = CoCreateInstance( CLSID_DiaSource,NULL,
CLSCTX_INPROC_SERVER,__uuidof( IDiaDataSource ),(void **) &pSource);
result = pSource->loadDataFromPdb(pdb);
SHOUT("%s 2find %S %d\n",(result==S_OK)?"succeded":"failed",pdb,__LINE__);
result = pSource->openSession(&pSession);
result = pSession->get_globalScope(&pSymbol);
result = pSymbol->findChildren(SymTagUDT,type,nsNone,&pEnumsymbols);
result = pEnumsymbols->get_Count(&count);
result = pEnumsymbols->Next(1,&pSymudt,&noofsymret);
SHOUT("%s 2find %S %d\n",(result==S_OK)?"succeded":"failed",type,__LINE__);
result = pSymudt->get_name(udtname);
result = pSymudt->findChildren(SymTagNull,NULL,nsNone,&pEnumsymbols);
result = pEnumsymbols->get_Count(&count);
SHOUT("no of members in struct %S is 0X%X %d\n",type,count,__LINE__);
wprintf(L"\nstruct %s {\nType Leng Tags Name \n",*udtname);
for (LONG i =0 ; i< count; i++) {
result = pEnumsymbols->Next(1,&pSymchild,&noofsymret);
result = pSymchild->get_name(childname);
result = pSymchild->get_type(&pSymtags);
result = pSymtags->get_symTag(&dwtag);
result = pSymtags->get_length(&len);
result = pSymtags->get_baseType(&basetype);
wprintf(L"0x%.2X 0x%.2I64X 0x%.2X %s\n",basetype,len,dwtag,*childname);
} return 0; }
contents of header file typefrompdb.h
/* handling errors/releasing memory BSTRS pointers closing handles using
sensible coding standards using dynamic allocations replacing ansi with unicode
etc etc etc should be implemented POC CODE not meant for blind copy pasting
typical test case is typefrompdb.exe ntdll.pdb _DRIVER_OBJECT */
#include <stdio.h>
#include <Windows.h>
#include <Dia2.h> // set INCLUDE=diasdkdir\inc
#include <atlbase.h> // vs 2010 express edition used with wdk 7600
#include <atlcom.h> // set INCLUDE=C:\WinDDK\7600.16385.1\inc\atl71
#include <dbghelp.h> // set INCLUDE=windbg\sdk\inc
#define SHOUT(...) if(result!=S_OK){printf(__VA_ARGS__);exit(0);\
}else{printf(__VA_ARGS__);}
#define USAGE if (argc != 3) { printf( \
"usage %s %s %s\n",argv[0],"file.pdb","typename"); return 0;}
HRESULT result = E_FAIL;
IDiaDataSource *pSource = NULL;
IDiaSession *pSession = NULL;
IDiaSymbol *pSymbol,*pSymchild = NULL;
IDiaEnumSymbols *pEnumsymbols = NULL;
wchar_t pdb[500], type[500] = {0};
BSTR childname[0x100],udtname[0x100] = {0};
LONG count = 0;
ULONG noofsymret = 0;
DWORD dwtag,basetype = 0;
ULONGLONG len = 0;
CComPtr< IDiaSymbol > pSymudt;
CComPtr< IDiaSymbol > pSymtags;
compiled and linked with
@call "C:\Program Files\Microsoft Visual Studio 10.0\VC\vcvarsall.bat" x86
set INCLUDE=XXXX;XXXX;XXXX;%INCLUDE%
set LIB=YYYY;YYYY;YYYY;%LIB%
cl /c /Zi /nologo /W4 /wd6387 /analyze %1%
link /DEBUG /nologo /RELEASE /IGNORE:4254 diaguids.lib *.obj
pause
results for test case
typefrompdb.exe
usage typefrompdb.exe file.pdb typename
typefrompdb.exe ntd dr
failed 2find ntd 11
typefrompdb.exe ntdll.pdb _driver_object
succeded 2find ntdll.pdb 11
failed 2find _driver_object 17
typefrompdb.exe ntdll.pdb _DRIVER_OBJECT
succeded 2find ntdll.pdb 11
succeded 2find _DRIVER_OBJECT 17
no of members in struct _DRIVER_OBJECT is 0XF 21
struct _DRIVER_OBJECT {
Type Leng Tags Name
0x06 0x02 0x10 Type
0x06 0x02 0x10 Size
0x00 0x04 0x0E DeviceObject
0x0E 0x04 0x10 Flags
0x00 0x04 0x0E DriverStart
0x0E 0x04 0x10 DriverSize
0x00 0x04 0x0E DriverSection
0x00 0x04 0x0E DriverExtension
0x00 0x08 0x0B DriverName
0x00 0x04 0x0E HardwareDatabase
0x00 0x04 0x0E FastIoDispatch
0x00 0x04 0x0E DriverInit
0x00 0x04 0x0E DriverStartIo
0x00 0x04 0x0E DriverUnload
0x00 0x70 0x0F MajorFunction
interpretation of results for the typical test case
grep -iE "btint|btulong|btnotype" cvconst.h
btNoType = 0,
btInt = 6,
btULong = 14,
grep -i -A 35 "enum symtagenum" cvconst.h
| awk "{ if ( NR==0x0B || NR==0x0E || NR==0x0F || NR==0x10 ) print $0 }"
SymTagAnnotation,
SymTagUDT,
SymTagEnum,
SymTagFunctionType,
edit
I was never sure this was an answer this was more of a test or experiment in using dia sdk
SymLoadModule uses an arbitrary address hardcoded as 0x1000000
you can see that in DBH src
in platform sdk samples the compiled binary of the same src is available in windbg installation
C:\Program Files\Microsoft SDKs\Windows\v7.1\Samples\winbase\debug\dbh
//global
DWORD64 gDefaultBaseForVirtualMods;
BOOL init()
{
int i;
*gModName = 0;
gBase = 0;;
gDefaultBaseForVirtualMods = 0x1000000;
else if (!_tcsicmp(ext, _T(".pdb")))
{
addr = gDefaultBaseForVirtualMods;
dontopen = true;
} else {
addr = gDefaultBase;
}
C:\>dbh XXXXXXX\ntdll.pdb t _DRIVER_OBJECT
name : _DRIVER_OBJECT
addr : 0
size : a8
flags : 0
type : 1
modbase : 1000000
value : 0
reg : 0
scope : SymTagNull (0)
tag : SymTagUDT (b)
index : 1
C:\>
anyway i would reiterate that you are
not supposed to use dbghelp functions
in windbg extensions read the notes on following links (writing wdbg extensions/ dbgeng extensions / engextcpp function which states that it is not supported to use dbghelp functions in windbg extension
Note You must not attempt to call any DbgHelp or ImageHlp routines from any debugger extension. Calling these routines is not supported and may cause a variety of problems.
writing wdbgexts windbg extensions
OTHER TIPS
The Debug Interface Access SDK provides APIs for working with the PDB directly:
http://msdn.microsoft.com/en-us/library/x93ctkx8.aspx
The DIA2Dump sample is full featured and demonstrates how to extract type information.