Question

Can anybody sugesst me how to extract the content of table of contents,chapters and index from a ebook (PDF) programatically using iText library?

Was it helpful?

Solution

I have achieved this by parsing the bookmarks. Below is the code written by me to parse the bookmarks from a PDF using iText.

public void ParseBookMarkToLevel(List<HashMap<String,Object>> listBookmarks,Integer   intLevel )
{

    //if intLevel == 999 Parse the full bookmarks recursively to all levels 
    //System.out.println("Bookmarks Count: "+listBookmarks.size());
    if(intLevel != 999) 
        intLevel--;
    else
        intLevel=999;

    //System.out.println("Bookmarks Count: "+listBookmarks.size());
    String title ="";
    String pageStr ="";
    String[] pageStrArr;
    Integer pageNumber;

    try{
        for (HashMap<String,Object> bookmark: listBookmarks) {

            //System.out.println("Page Type: "+bookmark.get("Page").getClass());

            title = (bookmark.containsKey("Title")?bookmark.get("Title").toString() :"");

            if(bookmark.containsKey("Page")){
                pageStr = bookmark.get("Page").toString(); 
                pageStrArr = pageStr.split(" ");           
                pageNumber = Integer.valueOf(pageStrArr[0]);
                //System.out.println(""+title+"\t :"+pageNumber);
                this.lhmBookMarks.put(title, pageNumber);
                System.out.println(this.lhmBookMarks.toString());
            }
            else{
                //System.out.println(title);
                this.lhmBookMarks.put(title, 0);
            }           
            if(bookmark.containsKey("Kids") && intLevel != 0){
                this.ParseBookMarkToLevel((List<HashMap<String, Object>>) bookmark.get("Kids"),intLevel);
            }
        }
    }
    catch(Exception i){
        System.out.println(i);
    }
}
Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top