문제

Ok, so I have this simple HTML code

<html>
    <head>
        <script src="jquery.js"></script>
        <script src="script.js"></script>
        <title>Get all text</title>
    </head>
    <body>
        <span>This is Thomas</span>
        This is Bar
        <div id="content">
            This is Foo.
        </div>
        <span>This is Bufu</span>
    </body>
</html>

And i want to get all text in a variable. So i made this javascript code. But "This is Foo" and "This is Bar" are not shown.

var sep = '~';

$(function() {
    pageTexts = getTextFromPage();
    console.log(pageTexts);  
});

function getTextFromPage()
{
    var pageText = '';
    i = 0;
    j = 0;
    var itr = document.createTreeWalker(
        document.getElementsByTagName("body")[0],
        NodeFilter.SHOW_TEXT,
        null, // no filter
        false
    );

    while(itr.nextNode()) 
    {     
        if (itr.currentNode.textContent.search("\t") && itr.currentNode.textContent.search("\n") && itr.currentNode.parentNode.nodeName.toLowerCase() != 'script' && itr.currentNode.parentNode.nodeName.toLowerCase() != 'noscript')
        {            
            if (i == 0)
            {
               pageText = itr.currentNode.textContent;
                i++; 
            } 
            else 
            {
                pageText = pageText + sep + itr.currentNode.textContent;
                i++;
            }       
        }
        charNumber = pageText.length;    
        elemNumber = i;      
    }
    return pageText;  
}

Current result is "This is Thomas~This is Bufu". And I would like to have "This is Thomas~This is Bar~This is Foo~This is Bufu". Can anyone tell me what is wrong or what should I do? I will apreciate any answer that helps me. PS. This is for a chrome-extension and i will need to get all text from any html page even if is a very complicated page.

도움이 되었습니까?

해결책

You are filtering out the This is Bar and This is Foo. with the condition itr.currentNode.textContent.search("\n").
I'm not sure what you're trying to do with the first two conditions in your if statement but search returns -1 when the needle isn't found and -1 is truthy

        if (/*itr.currentNode.textContent.search("\t") && 
             itr.currentNode.textContent.search("\n") &&*/ 
             itr.currentNode.parentNode.nodeName.toLowerCase() != 'script' && 
             itr.currentNode.parentNode.nodeName.toLowerCase() != 'noscript')

    var text = $.trim(itr.currentNode.textContent);
    if (text.length > 0 && itr.currentNode.parentNode.nodeName.toLowerCase() != 'script' && itr.currentNode.parentNode.nodeName.toLowerCase() != 'noscript')
    {            
        if (i == 0)
        {
           pageText = text;
            i++; 
        } 
        else 
        {
            pageText = pageText + sep + text;
            i++;
        }       
    }

http://jsfiddle.net/QEvFF/2/

라이센스 : CC-BY-SA ~와 함께 속성
제휴하지 않습니다 StackOverflow
scroll top