I followed this strategy:
- run a Perl script with an LDAP query, write data to disc as JSON.
- read in the json structure with R, create a dataframe.
For step (1), I used this script:
#use Modern::Perl;
use strict;
use warnings;
use feature 'say';
use Net::LDAP;
use JSON;
chdir("~/git/_my/R_one-offs/R_grabbag");
my $ldap = Net::LDAP->new( 'ldap.mydomain.de' ) or die "$@";
my $outfile = "ldapentries_mydomain_ldap.json";
my $mesg = $ldap->bind ; # an anonymous bind
# get all cn's (= all names)
$mesg = $ldap->search(
base => " ou=People,dc=mydomain,dc=de",
filter => "(cn=*)"
);
my $json_text = "";
my @entries;
foreach my $entry ($mesg->entries){
my %entry;
foreach my $attr ($entry->attributes) {
foreach my $value ($entry->get_value($attr)) {
$entry{$attr} = $value;
}
}
push @entries, \%entry;
}
$json_text = to_json(\@entries);
say "Length json_text: " . length($json_text);
open(my $FH, ">", $outfile);
print $FH $json_text;
close($FH);
$mesg = $ldap->unbind;
You might need check the a max size limit of entries returned by the ldap server.
See https://serverfault.com/questions/328671/paging-using-ldapsearch
For step (2), I used this R code:
setwd("~/git/_my/R_one-offs/R_grabbag")
library(rjson)
# read into R list, from file, created from perl script
json <- rjson::fromJSON(file="ldapentries_mydomain_ldap.json",method = "C")
head(json)
# create a data frame from list
library(reshape2)
library(dplyr)
library(tidyr)
# not really efficient, maybe thre's a better way to do it
df.ldap <- json %>% melt %>% spread( L2,value)
# optional:
# turn factors into characters
i <- sapply(df.ldap, is.factor)
df.ldap[i] <- lapply(df.ldap[i], as.character)