Autonomous real-time face recognition system

Question

so, here's my proposal:

// -- >8 ---------- speech.h --------------------------
#ifndef __speech_onboard__
#define __speech_onboard__


struct ISpVoice; // fwd ref, since mixing opencv and windows headers is a receipt for desaster

namespace Speech
{
    class Voice
    {
        ISpVoice * spVoice;

    public:

        Voice();
        ~Voice();


        int speak( const char * txt, int flags=0 ) const ;

        // Supported values range from -10 to 10 
        int setRate( int s );

        // Supported values range from 0 to 100 
        int setVolume( int s );
    };
};


#endif // __speech_onboard__



// ---- >8 speech.cpp ------------------------------
#include <windows.h>
#include <sapi.h>
#include "speech.h"


#define COM_RELEASE(x) { if ((x)) (x)->Release(); (x) = NULL; }


namespace Speech
{
    struct _ComUser
    {
        _ComUser()  {CoInitialize(0);}
        ~_ComUser() {CoUninitialize();}
    } _we_need_a_singleton_per_module;


    inline int w2a( WCHAR *in, char *out )
    {
        out[0]=0;
        return WideCharToMultiByte(CP_ACP, 0, in, -1, out, MAX_PATH, 0, 0); 
    }

    inline int a2w( const char *in, WCHAR *out )
    {
        out[0]=0;
        return MultiByteToWideChar(CP_ACP, 0, in, -1, out, MAX_PATH); 
    }




    Voice::Voice()
        : spVoice(0)
    {
        HRESULT hr = CoCreateInstance( CLSID_SpVoice, NULL, CLSCTX_INPROC_SERVER, IID_ISpVoice, (LPVOID *)&(spVoice) ); 
    }


    Voice::~Voice()
    {
        COM_RELEASE( spVoice );
    }

    //SPF_ASYNC = ( 1L << 0 ) ,
    //SPF_PURGEBEFORESPEAK  = ( 1L << 1 ) ,
    //SPF_IS_FILENAME   = ( 1L << 2 ) ,
    //SPF_IS_XML    = ( 1L << 3 ) ,
    //SPF_IS_NOT_XML    = ( 1L << 4 ) ,
    //SPF_PERSIST_XML   = ( 1L << 5 ) ,
    //SPF_NLP_SPEAK_PUNC    = ( 1L << 6 ) ,
    //SPF_PARSE_SAPI    = ( 1L << 7 ) ,
    //SPF_PARSE_SSML    = ( 1L << 8 ) ,
    //SPF_PARSE_AUTODETECT  = 0,
    int Voice::speak( const char * txt, int flags ) const 
    {
        if ( ! spVoice )
            return 0;

        WCHAR wtxt[800];
        a2w(txt,wtxt);

        ULONG pulstream = 0;
        HRESULT hr = spVoice->Speak( wtxt, flags, &pulstream );

        return hr==S_OK; 
    }


    // Supported values range from -10 to 10 
    int Voice::setRate( int s )
    {
        if ( ! spVoice )
            return 0;

        HRESULT hr = spVoice->SetRate( s );

        return hr==S_OK; 
    }

    // Supported values range from 0 to 100 
    int Voice::setVolume( int s )
    {
        if ( ! spVoice )
            return 0;

        HRESULT hr = spVoice->SetVolume ( s );

        return hr==S_OK; 
    }
}



// ----- >8 main.cpp --------------------------------------------

#include "opencv2/core/core.hpp"
#include "opencv2/highgui/highgui.hpp"

using namespace cv;

#include "speech.h"

int main(int argc, char** argv)
{
    Speech::Voice voice;
    voice.speak("hello , oh, hello!", 1); // async

    Mat img(300,300,CV_8UC3,Scalar(255,0,0));
    namedWindow("Display window",0);
    putText(img,"lala la",Point(20,120),0,2.5,Scalar(0,200,0),5);
    imshow("Display window", img);
    waitKey(0);

    voice.speak("bye bye, see you later !"); // sync
    return 0;
}