Pergunta

Is there any api or other way, with Javascript, to search any mediawiki and print the found page (and if not found print that).

I'd prefer something like this:

function searchWiki(wikipage, search) {
    //the function here
    document.write(foundPage);
}

//run it

searchWiki('https://en.wikipedia.org', 'banana');

//it would print 'https://en.wikipedia.org/wiki/Banana'
Foi útil?

Solução

Here is my implementation of such a function. It uses the MediaWiki API through JSONP and is quite flexible. I guess a jQuery solution is fine. I created a small fiddle.

searchWiki(site, search, [callback], [options])

function searchWiki(site, search, callback, opts) {
    if(typeof callback == 'object') {
        opts = callback;
        callback = null;
    } else {
        opts = opts || {};
    }
    // Build the required URLs
    var siteUrl = (opts.ssl ? 'https' : 'http') + '://' + site;
    var apiUrl = siteUrl + (opts.apiBase || '/w/') + 'api.php';
    var queryUrl = apiUrl + '?action=query&list=search&srsearch=' + encodeURIComponent(search) + '&srlimit=' + (opts.maxResults || 1) + '&format=json';
    // Issue the JSONP request
    $.ajax(queryUrl + '&callback=?', {
        dataType: 'jsonp',
        // This prevents warnings about the unrecognized parameter "_"
        cache: true,
        success: function(data) {
            // Get all returned pages
            var titles = [], links = [];
            for(var i = 0; i < data.query.search.length; i++) {
                var title = data.query.search[i].title,
                    link = siteUrl + (opts.wikiBase || '/wiki/') + encodeURIComponent(title);
                titles.push(title);
                links.push(link);
            }
            if(!opts.maxResults) {
                // Single result requested
                if(data.query.search.length == 0) {
                    titles = links = null;
                } else {
                    titles = titles[0];
                    links = links[0];
                }
            }
            // Call the callback
            (callback || opts.success || function(){})(titles, links);
        }
    });
}

Example 1: Single wikipedia search

searchWiki('en.wikipedia.org', 'banana fruit', {
    ssl: true,
    success: function(title, link) {
        // link is now "https://en.wikipedia.org/wiki/Banana"
        if(title === null) {
            $('#search-msg').text('Not found');
        } else {
            var anchor = $('<a>').text(title).attr('href', link);
            $('#search-msg').append(anchor);
        }
    }
});

This example shows a link to a wikipedia page with the associated title.

Example 2: Multiple results

searchWiki('www.mediawiki.org', 'Release notes', {
    ssl: true,
    maxResults: 5,
    success: function(titles, links) {
        for(var i = 0; i < titles.length; i++) {
            alert('MediaWiki ' + titles[i] + ' at ' + links[i]);
        }
    }
});

This example displays up to five links to MediaWiki pages which match the query "Release notes".

Options:

  • ssl: Use HTTPS instead of HTTP
  • maxResults: Return multiple (up to n) results
  • apiBase: API directory on target site (defaults to /w/)
  • wikiBase: Wiki directory on target site (defaults to /wiki/)
  • success: Function to invoke after retrieving the result list

You can either pass the callback as a function argument (before the options) or as the success option.


Update: Here is the pure JS solution (no jQuery required). And there is another fiddle, this time without jQuery.

function searchWiki(site, search, callback, opts) {
    if(typeof callback == 'object') {
        opts = callback;
        callback = null;
    } else {
        opts = opts || {};
    }
    // Build the required URLs
    var siteUrl = (opts.ssl ? 'https' : 'http') + '://' + site;
    var apiUrl = siteUrl + (opts.apiBase || '/w/') + 'api.php';
    var queryUrl = apiUrl + '?action=query&list=search&srsearch=' + encodeURIComponent(search) + '&srlimit=' + (opts.maxResults || 1) + '&format=json';
    var fnName = '_cb_' + Math.floor(Math.random() * 4294967296);
    window[fnName] = function(data) {
        // Clear references to this function
        window[fnName] = null;
        // Get all returned pages
        var titles = [], links = [];
        for(var i = 0; i < data.query.search.length; i++) {
            var title = data.query.search[i].title,
                link = siteUrl + (opts.wikiBase || '/wiki/') + encodeURIComponent(title);
            titles.push(title);
            links.push(link);
        }
        if(!opts.maxResults) {
            // Single result requested
            if(data.query.search.length == 0) {
                titles = links = null;
            } else {
                titles = titles[0];
                links = links[0];
            }
        }
        // Call the callback
        (callback || opts.success || function(){})(titles, links);
    }
    // Issue the JSONP request
    var scriptTag = document.createElement('script');
    scriptTag.setAttribute('src', queryUrl + '&callback=' + fnName);
    document.head.appendChild(scriptTag);
}

Update 2: Finally a solution for node.js. The API is still the same, but it provides some additional options:

  • error: An error callback (this was impossible in browser-based JS)
  • userAgent: A custom user agent string as suggested in the docs
  • port: Target port (defaults to 80/443)
  • encoding: Response encoding (default to utf8)

I did not test this much, but the examples (see above) should still work.

var http = require('http'),
    https = require('https');

function searchWiki(site, search, callback, opts) {
    if(typeof callback == 'object') {
        opts = callback;
        callback = null;
    } else {
        opts = opts || {};
    }
    // Build the required paths
    var apiPath = (opts.apiBase || '/w/') + 'api.php';
    var queryPath = apiPath + '?action=query&list=search&srsearch=' + encodeURIComponent(search) + '&srlimit=' + (opts.maxResults || 1) + '&format=json';
    // Request options
    var httpOpts = {
        hostname: site,
        port: (opts.port ? opts.port : (opts.ssl ? 443 : 80)),
        method: 'GET',
        path: queryPath,
        agent: false
    };
    // Custom user agent
    if(opts.userAgent) {
        httpOpts.headers = {
            'User-Agent': opts.userAgent
        };
    }
    // Make the request
    var req = (opts.ssl ? https : http).request(httpOpts, function(res) {
        var msgBody = '';
        res.setEncoding(opts.encoding || 'utf8');

        res.on('data', function(chunk) {
            msgBody += chunk;
        });

        res.on('end', function() {
            // Parse response as JSON
            var data;
            try {
                data = JSON.parse(msgBody);
            } catch(err) {
                (opts.error || function(){})(err);
                return;
            }
            // Get all returned pages
            var siteUrl = (opts.ssl ? 'https' : 'http') + '://' + site;
            var titles = [], links = [];
            for(var i = 0; i < data.query.search.length; i++) {
                var title = data.query.search[i].title,
                    link = siteUrl + (opts.wikiBase || '/wiki/') + encodeURIComponent(title);
                titles.push(title);
                links.push(link);
            }
            if(!opts.maxResults) {
                // Single result requested
                if(data.query.search.length == 0) {
                    titles = links = null;
                } else {
                    titles = titles[0];
                    links = links[0];
                }
            }
            // Call the callback
            (callback || opts.success || function(){})(titles, links);
        });
    });
    req.on('error', function(err) {
        (opts.error || function(){})(err);
    });
    req.end();
}
Licenciado em: CC-BY-SA com atribuição
Não afiliado a StackOverflow
scroll top