Question

Is there a way to create an NTFS junction point in Python? I know I can call the junction utility, but it would be better not to rely on external tools.

Was it helpful?

Solution

I answered this in a similar question, so I'll copy my answer to that below. Since writing that answer, I ended up writing a python-only (if you can call a module that uses ctypes python-only) module to creating, reading, and checking junctions which can be found in this folder. Hope that helps.

Also, unlike the answer that utilizes uses the CreateSymbolicLinkA API, the linked implementation should work on any Windows version that supports junctions. CreateSymbolicLinkA is only supported in Vista+.

Answer:

python ntfslink extension

Or if you want to use pywin32, you can use the previously stated method, and to read, use:

from win32file import *
from winioctlcon import FSCTL_GET_REPARSE_POINT

__all__ = ['islink', 'readlink']

# Win32file doesn't seem to have this attribute.
FILE_ATTRIBUTE_REPARSE_POINT = 1024
# To make things easier.
REPARSE_FOLDER = (FILE_ATTRIBUTE_DIRECTORY | FILE_ATTRIBUTE_REPARSE_POINT)

# For the parse_reparse_buffer function
SYMBOLIC_LINK = 'symbolic'
MOUNTPOINT = 'mountpoint'
GENERIC = 'generic'

def islink(fpath):
    """ Windows islink implementation. """
    if GetFileAttributes(fpath) & REPARSE_FOLDER:
        return True
    return False


def parse_reparse_buffer(original, reparse_type=SYMBOLIC_LINK):
    """ Implementing the below in Python:

    typedef struct _REPARSE_DATA_BUFFER {
        ULONG  ReparseTag;
        USHORT ReparseDataLength;
        USHORT Reserved;
        union {
            struct {
                USHORT SubstituteNameOffset;
                USHORT SubstituteNameLength;
                USHORT PrintNameOffset;
                USHORT PrintNameLength;
                ULONG Flags;
                WCHAR PathBuffer[1];
            } SymbolicLinkReparseBuffer;
            struct {
                USHORT SubstituteNameOffset;
                USHORT SubstituteNameLength;
                USHORT PrintNameOffset;
                USHORT PrintNameLength;
                WCHAR PathBuffer[1];
            } MountPointReparseBuffer;
            struct {
                UCHAR  DataBuffer[1];
            } GenericReparseBuffer;
        } DUMMYUNIONNAME;
    } REPARSE_DATA_BUFFER, *PREPARSE_DATA_BUFFER;

    """
    # Size of our data types
    SZULONG = 4 # sizeof(ULONG)
    SZUSHORT = 2 # sizeof(USHORT)

    # Our structure.
    # Probably a better way to iterate a dictionary in a particular order,
    # but I was in a hurry, unfortunately, so I used pkeys.
    buffer = {
        'tag' : SZULONG,
        'data_length' : SZUSHORT,
        'reserved' : SZUSHORT,
        SYMBOLIC_LINK : {
            'substitute_name_offset' : SZUSHORT,
            'substitute_name_length' : SZUSHORT,
            'print_name_offset' : SZUSHORT,
            'print_name_length' : SZUSHORT,
            'flags' : SZULONG,
            'buffer' : u'',
            'pkeys' : [
                'substitute_name_offset',
                'substitute_name_length',
                'print_name_offset',
                'print_name_length',
                'flags',
            ]
        },
        MOUNTPOINT : {
            'substitute_name_offset' : SZUSHORT,
            'substitute_name_length' : SZUSHORT,
            'print_name_offset' : SZUSHORT,
            'print_name_length' : SZUSHORT,
            'buffer' : u'',
            'pkeys' : [
                'substitute_name_offset',
                'substitute_name_length',
                'print_name_offset',
                'print_name_length',
            ]
        },
        GENERIC : {
            'pkeys' : [],
            'buffer': ''
        }
    }

    # Header stuff
    buffer['tag'] = original[:SZULONG]
    buffer['data_length'] = original[SZULONG:SZUSHORT]
    buffer['reserved'] = original[SZULONG+SZUSHORT:SZUSHORT]
    original = original[8:]

    # Parsing
    k = reparse_type
    for c in buffer[k]['pkeys']:
        if type(buffer[k][c]) == int:
            sz = buffer[k][c]
            bytes = original[:sz]
            buffer[k][c] = 0
            for b in bytes:
                n = ord(b)
                if n:
                    buffer[k][c] += n
            original = original[sz:]

    # Using the offset and length's grabbed, we'll set the buffer.
    buffer[k]['buffer'] = original
    return buffer

def readlink(fpath):
    """ Windows readlink implementation. """
    # This wouldn't return true if the file didn't exist, as far as I know.
    if not islink(fpath):
        return None

    # Open the file correctly depending on the string type.
    handle = CreateFileW(fpath, GENERIC_READ, 0, None, OPEN_EXISTING, FILE_FLAG_OPEN_REPARSE_POINT, 0) \
                if type(fpath) == unicode else \
            CreateFile(fpath, GENERIC_READ, 0, None, OPEN_EXISTING, FILE_FLAG_OPEN_REPARSE_POINT, 0)

    # MAXIMUM_REPARSE_DATA_BUFFER_SIZE = 16384 = (16*1024)
    buffer = DeviceIoControl(handle, FSCTL_GET_REPARSE_POINT, None, 16*1024)
    # Above will return an ugly string (byte array), so we'll need to parse it.

    # But first, we'll close the handle to our file so we're not locking it anymore.
    CloseHandle(handle)

    # Minimum possible length (assuming that the length of the target is bigger than 0)
    if len(buffer) < 9:
        return None
    # Parse and return our result.
    result = parse_reparse_buffer(buffer)
    offset = result[SYMBOLIC_LINK]['substitute_name_offset']
    ending = offset + result[SYMBOLIC_LINK]['substitute_name_length']
    rpath = result[SYMBOLIC_LINK]['buffer'][offset:ending].replace('\x00','')
    if len(rpath) > 4 and rpath[0:4] == '\\??\\':
        rpath = rpath[4:]
    return rpath

def realpath(fpath):
    from os import path
    while islink(fpath):
        rpath = readlink(fpath)
        if not path.isabs(rpath):
            rpath = path.abspath(path.join(path.dirname(fpath), rpath))
        fpath = rpath
    return fpath


def example():
    from os import system, unlink
    system('cmd.exe /c echo Hello World > test.txt')
    system('mklink test-link.txt test.txt')
    print 'IsLink: %s' % islink('test-link.txt')
    print 'ReadLink: %s' % readlink('test-link.txt')
    print 'RealPath: %s' % realpath('test-link.txt')
    unlink('test-link.txt')
    unlink('test.txt')

if __name__=='__main__':
    example()

Adjust the attributes in the CreateFile to your needs, but for a normal situation, it should work. Feel free to improve on it.

It should also work for folder junctions if you use MOUNTPOINT instead of SYMBOLIC_LINK.

You may way to check that

sys.getwindowsversion()[0] >= 6

if you put this into something you're releasing, since this form of symbolic link is only supported on Vista+.

OTHER TIPS

you can use python win32 API modules e.g.

import win32file

win32file.CreateSymbolicLink(srcDir, targetDir, 1)

see http://docs.activestate.com/activepython/2.5/pywin32/win32file__CreateSymbolicLink_meth.html for more details

if you do not want to rely on that too, you can always use ctypes and directly call CreateSymbolicLinl win32 API, which is anyway a simple call

here is example call using ctypes

import ctypes

kdll = ctypes.windll.LoadLibrary("kernel32.dll")

kdll.CreateSymbolicLinkA("d:\testdir", "d:\testdir_link", 1)

MSDN says Minimum supported client Windows Vista

Since Python 3.5 there's a function CreateJunction in _winapi module.

import _winapi
_winapi.CreateJunction(source, target)

You don't want to rely on external tools but you don't mind relying on the specific environment? I think you could safely assume that, if it's NTFS you're running on, the junction utility will probably be there.

But, if you mean you'd rather not call out to an external program, I've found the ctypes stuff to be invaluable. It allows you to call Windows DLLs directly from Python. And I'm pretty sure it's in the standard Python releases nowadays.

You'd just have to figure out which Windows DLL the CreateJunction() (or whatever Windows calls it) API call is in and set up the parameters and call. Best of luck with that, Microsoft don't seem to support it very well. You could disassemble the SysInternals junction program or linkd or one of the other tools to find out how they do it.

Me, I'm pretty lazy, I'd just call junction as an external process :-)

Based on the accepted answer by Charles, here improved (and cross-platform) versions of the functions (Python 2.7 and 3.5+).

  • islink() now also detects file symbolic links under Windows (just like the POSIX equivalent)
  • parse_reparse_buffer() and readlink() now actually detect the type of reparse point (NTFS Junction, symlink or generic) which is needed to correctly decode the path
  • readlink() no longer fails with access denied on NTFS Junctions or directory symlinks (unless you really have no permission to read attributes)

import os
import struct
import sys

if sys.platform == "win32":
    from win32file import *
    from winioctlcon import FSCTL_GET_REPARSE_POINT

__all__ = ['islink', 'readlink']

# Win32file doesn't seem to have this attribute.
FILE_ATTRIBUTE_REPARSE_POINT = 1024

# These are defined in win32\lib\winnt.py, but with wrong values
IO_REPARSE_TAG_MOUNT_POINT = 0xA0000003  # Junction
IO_REPARSE_TAG_SYMLINK = 0xA000000C

def islink(path):
    """
    Cross-platform islink implementation.

    Supports Windows NT symbolic links and reparse points.

    """
    if sys.platform != "win32" or sys.getwindowsversion()[0] < 6:
        return os.path.islink(path)
    return bool(os.path.exists(path) and GetFileAttributes(path) &
                FILE_ATTRIBUTE_REPARSE_POINT == FILE_ATTRIBUTE_REPARSE_POINT)


def parse_reparse_buffer(buf):
    """ Implementing the below in Python:

    typedef struct _REPARSE_DATA_BUFFER {
        ULONG  ReparseTag;
        USHORT ReparseDataLength;
        USHORT Reserved;
        union {
            struct {
                USHORT SubstituteNameOffset;
                USHORT SubstituteNameLength;
                USHORT PrintNameOffset;
                USHORT PrintNameLength;
                ULONG Flags;
                WCHAR PathBuffer[1];
            } SymbolicLinkReparseBuffer;
            struct {
                USHORT SubstituteNameOffset;
                USHORT SubstituteNameLength;
                USHORT PrintNameOffset;
                USHORT PrintNameLength;
                WCHAR PathBuffer[1];
            } MountPointReparseBuffer;
            struct {
                UCHAR  DataBuffer[1];
            } GenericReparseBuffer;
        } DUMMYUNIONNAME;
    } REPARSE_DATA_BUFFER, *PREPARSE_DATA_BUFFER;

    """
    # See https://docs.microsoft.com/en-us/windows-hardware/drivers/ddi/content/ntifs/ns-ntifs-_reparse_data_buffer

    data = {'tag': struct.unpack('<I', buf[:4])[0],
            'data_length': struct.unpack('<H', buf[4:6])[0],
            'reserved': struct.unpack('<H', buf[6:8])[0]}
    buf = buf[8:]

    if data['tag'] in (IO_REPARSE_TAG_MOUNT_POINT, IO_REPARSE_TAG_SYMLINK):
        keys = ['substitute_name_offset',
                'substitute_name_length',
                'print_name_offset',
                'print_name_length']
        if data['tag'] == IO_REPARSE_TAG_SYMLINK:
            keys.append('flags')

        # Parsing
        for k in keys:
            if k == 'flags':
                fmt, sz = '<I', 4
            else:
                fmt, sz = '<H', 2
            data[k] = struct.unpack(fmt, buf[:sz])[0]
            buf = buf[sz:]

    # Using the offset and lengths grabbed, we'll set the buffer.
    data['buffer'] = buf

    return data


def readlink(path):
    """
    Cross-platform implenentation of readlink.

    Supports Windows NT symbolic links and reparse points.

    """
    if sys.platform != "win32":
        return os.readlink(path)

    # This wouldn't return true if the file didn't exist
    if not islink(path):
        # Mimic POSIX error
        raise OSError(22, 'Invalid argument', path)

    # Open the file correctly depending on the string type.
    if type(path) is type(u''):
        createfilefn = CreateFileW
    else:
        createfilefn = CreateFile
    # FILE_FLAG_OPEN_REPARSE_POINT alone is not enough if 'path'
    # is a symbolic link to a directory or a NTFS junction.
    # We need to set FILE_FLAG_BACKUP_SEMANTICS as well.
    # See https://docs.microsoft.com/en-us/windows/desktop/api/fileapi/nf-fileapi-createfilea
    handle = createfilefn(path, GENERIC_READ, 0, None, OPEN_EXISTING,
                          FILE_FLAG_BACKUP_SEMANTICS | FILE_FLAG_OPEN_REPARSE_POINT, 0)

    # MAXIMUM_REPARSE_DATA_BUFFER_SIZE = 16384 = (16 * 1024)
    buf = DeviceIoControl(handle, FSCTL_GET_REPARSE_POINT, None, 16 * 1024)
    # Above will return an ugly string (byte array), so we'll need to parse it.

    # But first, we'll close the handle to our file so we're not locking it anymore.
    CloseHandle(handle)

    # Minimum possible length (assuming that the length is bigger than 0)
    if len(buf) < 9:
        return type(path)()
    # Parse and return our result.
    result = parse_reparse_buffer(buf)
    if result['tag'] in (IO_REPARSE_TAG_MOUNT_POINT, IO_REPARSE_TAG_SYMLINK):
        offset = result['substitute_name_offset']
        ending = offset + result['substitute_name_length']
        rpath = result['buffer'][offset:ending].decode('UTF-16-LE')
    else:
        rpath = result['buffer']
    if len(rpath) > 4 and rpath[0:4] == '\\??\\':
        rpath = rpath[4:]
    return rpath
Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top