Why not grab http://www.reddit.com/.rss, which is much simpler than html? For example to get news titles using qt framework:
class Foo : public QObject { Q_OBJECT
public:
Foo();
private slots:
void got_it(QNetworkReply* reply);
private:
QNetworkAccessManager* news_grabber;
};
Foo::Foo() {
news_grabber = new QNetworkAccessManager(this);
QObject::connect(news_grabber, SIGNAL(finished(QNetworkReply*)),
this, SLOT(got_it(QNetworkReply*)));
news_grabber->get(QNetworkRequest(QUrl("http://www.reddit.com/.rss")));
}
void Foo::got_it(QNetworkReply* reply) {
QDomDocument document;
std::vector<QString> items_storage;
document.setContent(static_cast<QIODevice*>(reply));
QDomNodeList items = document.elementsByTagName("item");
for (int i = 0; i < items.length(); i++)
items_storage.push_back(items.at(i).firstChildElement("title").text());
}