Question

My Program runs smoothly but I want my files from ftp to be zip in my local drive

My Problem : Only 1 file is being zipped after calling my main() function

Here's my code:

main

import os
import upload
import download
import zipfile
import ConfigParser
import ftputil

def main():
    
    #create a folder Temp on d drive for later use
    path = r'D:\Temp'
    os.mkdir(path)
    
    #parse all the  values at config.ini file
    config = ConfigParser.ConfigParser()
    config.readfp(open('config.ini'))
    server = config.get('main', 'Server')
    username = config.get('main', 'Username')
    password = config.get('main', 'Password')
    uploads = config.get('main', 'Upload folder')
    downloads = config.get('main', 'Download folder')

    #connect to ftp
    ftp = ftputil.FTPHost(server, username, password)

    dirlist = ftp.listdir(downloads)
    
    for list in dirlist:
        ftp.chdir(downloads)
        target = os.path.join(path, list)
        ftp.download(list, target)
        
    
    #########################################################
    #   THis section is where algo fails but the program run#
    ########################################################
    
    #zipping files
    absolute_path = r'D:\Temp'
    dirlist = os.listdir(absolute_path)
    filepath = r'D:\Temp\project2.zip'
    for list in dirlist:
        get_file = os.path.join(absolute_path, list)
        zip_name = zipfile.ZipFile(filepath, 'w')
        zip_name.write(get_file, 'Project2b\\' + list)
        
                
        

if __name__ == '__main__':
    print "cannot be"
Was it helpful?

Solution

When you do this :

for list in dirlist:
        get_file = os.path.join(absolute_path, list)
        zip_name = zipfile.ZipFile(filepath, 'w')
        zip_name.write(get_file, 'Project2b\\' + list)

you recreate a ZipFile for each file you want to zip, the "w" mode means you recreate it from scratch.

Try this (create the zip file before the loop) :

zip_name = zipfile.ZipFile(filepath, 'w')
for list in dirlist:
        get_file = os.path.join(absolute_path, list)
        zip_name.write(get_file, 'Project2b\\' + list)

Or this, it will open the zipfile in append mode:

for list in dirlist:
        get_file = os.path.join(absolute_path, list)
        zip_name = zipfile.ZipFile(filepath, 'a')
        zip_name.write(get_file, 'Project2b\\' + list)

OTHER TIPS

Have a look at the shutil module. There is an example using shutil.make_archive():

http://docs.python.org/library/shutil.html

If you have a lot of files you can zip them in parallel:

import zipfile
from pathlib import Path, WindowsPath
from typing import List, Text
import logging
from time import time
from concurrent.futures import ThreadPoolExecutor

logging.basicConfig(
    format="%(asctime)s - %(message)s", datefmt="%H:%M:%S", level=logging.DEBUG
)

PATH = (r"\\some_directory\subdirectory\zipped")


def file_names() -> List[WindowsPath]:
    p = Path(PATH)
    file_names = list(p.glob("./*.csv"))
    logging.info("There are %d files", len(file_names))
    return file_names


def zip_file(file: WindowsPath) -> None:
    zip_file_name = Path(PATH, f"{file.stem}.zip")
    with zipfile.ZipFile(zip_file_name, "w") as zip:
        zip.write(file, arcname=file.name, compress_type=zipfile.ZIP_DEFLATED)


def main(files: List[Text]) -> None:
    t0 = time()
    number_of_files = len(files)
    with ThreadPoolExecutor() as executor:
        for counter, _ in enumerate(executor.map(zip_file, files), start=1):
            # update progress every 100 files
            if counter % 100 == 0:
                logging.info(
                    "Processed %d/%d. TT: %d:%d",
                    counter,
                    number_of_files,
                    *divmod(int(time() - t0), 60),
                )

    logging.info(
        "Finished zipping %d files. Total time: %d:%d",
        len(files),
        *divmod(int(time() - t0), 60),
    )


if __name__ == "__main__":
    files = file_names()
    main(files)

Best way to do this is by putting debug statements at your for loops, there are two possibilities;

one is that the first forloop only downloads one file from the ftp folder

two is that the first loop downloads all files but second loop zips only one of them

use print statements to see which files are downloaded/zipped at the loops, good luck

Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top