Find the element with the largest area (main content area)?

Question 1

The current answer is overly complex. The main thing you need to know is element.getBoundingClientRect();. Here's a smaller function - I'm looking for the biggest table but you can use any CSS selector you want.

// Fix NodeList.sort()
NodeList.prototype.sort = Array.prototype.sort

var elements = document.querySelectorAll('table')

var getArea = function(element){
    var rectangle = element.getBoundingClientRect();
    return rectangle.width * rectangle.height;
}

elements.sort(getArea)[0]

Question 2

So while the question didn't make much sense to me, I couldn't resist the urge to toy around with the concept of recursively scanning a DOM tree to retrieve and sort elements by their size :)

Here's a dumb function for doing so (you can paste it in your browser console):

function scanSizes(root) {
  return [].reduce.call(root, function(sizes, node) {
    var bounds = node.getBoundingClientRect();
    sizes.push({tag: node.outerHTML, area: bounds.width * bounds.height});
    var children = node.querySelectorAll("*");
    if (children.length > 0)
      sizes.push.apply(sizes, scanSizes(children));
    return sizes;
  }, []).sort(function(x, y) {
    var a = x.area, b= y.area;
    return a > b ? -1 : a < b ? 1 : 0;
  });
}

var sizes = scanSizes(document.querySelectorAll("body > *"));

// sizes[0].tag contains the largest html tag (as a string)
// sizes[0].area its area size in pixels (width * height)

Edit: more seriously, you might be interested in this topic and related answers.

Edit: of course performance wise recursion wasn't a really good idea. You can go with something like this to have a more efficient solution:

function scanSizes(root) {
  return [].map.call(root, function(node) {
    var bounds = node.getBoundingClientRect();
    return {tag: node.outerHTML, area: bounds.width * bounds.height};
  }).sort(function(x, y) {
    var a = x.area, b= y.area;
    return a > b ? -1 : a < b ? 1 : 0;
  });
}

var sizes = scanSizes(document.querySelectorAll("*"));

Question 3

I'm adding another answer because I've just stumbled upon the <main> HTML5 element spec, which developers are supposed to use to define their main contents area, so that's probably the very first element you'll want to check for in any scraped page.

So basically you should check for any single <main> or role="main" element in the page, then only use other contents detection strategies :)