Question

I have a single Google doc with several pages. I'd like to separate this by the page breaks that the application uses and save each page to a separate file. It might be worth noting that this document also has images in it.

Anyway, I've looked at what google script offers and it doesn't seem to have any convenient function for splitting the document by page. What I have so far is:

var current = DocumentApp.getActiveDocument();
var body = current.getBody();
var paragraphs = body.getParagraphs();

So, I'm able to drill down into my document and get the paragraphs, but I'm not sure where to go from here. My thought process was to get the paragraphs, append them all into one string, and then split it at the page breaks. From there, I can easily create new documents and save them. But, once I've got the paragraphs, I don't see a way to get the page breaks. As well, how are images handled? Each image is on it's own page, so would they each be their own paragraph?

So, my main concern in this issue is how to break up my document by page. Any help on that would be great.

Was it helpful?

Solution

Well, it took quite some hacking, but I came up with something that did what I needed it to:

function splitAndSave()
{

    var folder = getFolderName();
    if (folder == null)
    {
    return false;
    }

var PB  = Doc().ElementType.PAGE_BREAK;

var doc = Doc().getActiveDocument();
var body = doc.getBody();
var par = body.getParagraphs();

var curPage = 1;

    // Naming scheme for files
var page = Doc().create("pg" + curPage);

for (var i = 0; i < par.length; i++)
{
    var hasbreak = false;
    for (var j = 0; j < par[i].getNumChildren(); j++)
    {
        var child = par[i].getChild(j);
        if (child.getType() == PB)
        {
            hasbreak = true;
            break;
        }
    }

    if (!hasbreak)
    {
        var seppar = par[i].copy();
        page.getBody().appendParagraph(seppar);
    }
    else
    {
        var par1 = par[i].copy();
        var par2 = par[1].copy();

        var hitbreak = false;
        for (var i1 = 0; i1 < par1.getNumChildren(); i1++)
        {
            if (par1.getChild(i1).getType() == PB)
            {
                hitbreak = true;
            }

            if (hitbreak)
            {
                par1.removeChild(par1.getChild(i1));
            }
        }

        var hitbreak = false;
        for (var i1 = 0; i1 < par2.getNumChildren(); i1++)
        {
            if (par2.getChild(i1).getType() == PB)
            {
                hitbreak = true;
            }

            if (!hitbreak || par2.getChild(i1).getType() == PB)
            {
                par2.removeChild(par2.getChild(i1));
            }
        }

        page.getBody().appendParagraph(par1);

        // Add file to folder
        page.saveAndClose();

        var file = DocsList.getFileById(page.getId());
        file.addToFolder(folder);

        // Need this step or your file ends up in two places
        file.removeFromFolder(DocsList.getRootFolder());

        curPage++;
                    // Naming scheme for files
        page = Doc().create("pg" + curPage);

        page.getBody().appendParagraph(par2);
    }
}

page.saveAndClose();

var file = DocsList.getFileById(page.getId());
file.addToFolder(folder);

// Need this step or your file ends up in two places
file.removeFromFolder(DocsList.getRootFolder());
Ui().alert("Saving process complete");
}

Helper function to get a folder name:

function getFolderName()
{
var response = Ui().prompt("Enter a folder name");
if (response.getSelectedButton() == Ui().Button.OK) 
{
    var folder = DocsList.createFolder(response.getResponseText());
    return folder;
} 
else 
{
    return null;
}
}

Other helpers:

function Doc()
{
return DocumentApp;
}

function Ui()
{
return Doc().getUi();
}

This will break up a Google Document by page with a naming convention of "pg" + incrementing number. It also puts them in a folder specified in a prompt.

The caveat here is that on your document that you plan to split, you need to place a page break at the end of each page. So, basically, when you are moving onto the next page press CTRL + ENTER. It's not a big deal, and actually is a lot faster that pressing enter over and over again until you hit a new page.

Oh, and it is rather slow. That's not a big deal to me because I only need to use it occasionally.

OTHER TIPS

Thanks to Toncoso for a great utility. Google has changed things so I've updated to use DriveApp. Still is slow, but it works.

//all thanks to Toncoso who posted this on stack overflow
//https://stackoverflow.com/questions/22362504/use-google-script-to-separate-a-google-doc
//Updated 6/1/2018 for driveapp

function splitAndSave()
{
    var folder = getFolderName();
    if (folder == null)
    {
    return false;
    }

var PB  = Doc().ElementType.PAGE_BREAK;

var doc = Doc().getActiveDocument();
var body = doc.getBody();
var par = body.getParagraphs();

var curPage = 1;

    // Naming scheme for files
var page = Doc().create("pg" + curPage);

for (var i = 0; i < par.length; i++)
{
    var hasbreak = false;
    for (var j = 0; j < par[i].getNumChildren(); j++)
    {
        var child = par[i].getChild(j);
        if (child.getType() == PB)
        {
            hasbreak = true;
            break;
        }
    }

    if (!hasbreak)
    {
        var seppar = par[i].copy();
        page.getBody().appendParagraph(seppar);
    }
    else
    {
        var par1 = par[i].copy();
        var par2 = par[1].copy();

        var hitbreak = false;
        for (var i1 = 0; i1 < par1.getNumChildren(); i1++)
        {
            if (par1.getChild(i1).getType() == PB)
            {
                hitbreak = true;
            }

            if (hitbreak)
            {
                par1.removeChild(par1.getChild(i1));
            }
        }

        var hitbreak = false;
        for (var i1 = 0; i1 < par2.getNumChildren(); i1++)
        {
            if (par2.getChild(i1).getType() == PB)
            {
                hitbreak = true;
            }

            if (!hitbreak || par2.getChild(i1).getType() == PB)
            {
                par2.removeChild(par2.getChild(i1));
            }
        }

        page.getBody().appendParagraph(par1);

        // Add file to folder
        page.saveAndClose();

        var file = DriveApp.getFileById(page.getId());
        DriveApp.getFolderById(folder.getId()).addFile(file);
        //file.addToFolder(folder);


        // Need this step or your file ends up in two places
        DriveApp.getRootFolder().removeFile(file);

        curPage++;
                    // Naming scheme for files
        page = Doc().create("pg" + curPage);

        page.getBody().appendParagraph(par2);
    }
}

page.saveAndClose();

var file = DriveApp.getFileById(page.getId());
        DriveApp.getFolderById(folder.getId()).addFile(file);

// Need this step or your file ends up in two places
DriveApp.getRootFolder().removeFile(file);
Ui().alert("Saving process complete");
}

function getFolderName()
{
var response = Ui().prompt("Enter a folder name");
if (response.getSelectedButton() == Ui().Button.OK) 
{
    var folder = DriveApp.createFolder(response.getResponseText());
    return folder;
} 
else 
{
    return null;
}
}

function Doc()
{
return DocumentApp;
}

function Ui()
{
return Doc().getUi();
}

Wow that's useful. I'm adapting it to get sparse pages from a big doc into a new doc - a concert setlist generator out of a big songbook.

But it does not seem effective to sweep the entire doc to reach the # of PB needed, and do this again for every page you need to export. Still trying to figure out a clever way, but it looks like Google Docs scripting is limited anyway.

For instance in VBA this is possible:

myOriginBigDoc.ActiveWindow.Selection.GoTo wdGoToPage, wdGoToAbsolute, myPageNumber
myOriginBigDoc.Bookmarks("\page").Range.Copy
Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top