Question

I have a json text file which reads

{"type":"session.ended","v":2,"post.source":"1306210600-col001.sv1.movenetworks.com:2097#5","post.ip4":"75.114.187.146","post.rtime_secs":1371794661,"post.rtime_text":"2013-06-21 06:04:20","post.time_secs":1371794596,"post.time_text":"2013-06-21 06:03:16","post.time_date":"2013-06-21","post.time_hour":6,"post.late_secs":65,"id.session":"3625657","id.sub":"2370b726-b96e-11e2-b3eb-1231380e1adf","id.partner":"0CB48A664E514CA48D378D152574EDBB","id.service":"BBTV (CBD46B77)","device.make":"Roku","device.model":"3050X","device.info":"Roku;3050X","device.serial":"12C241003940","device.version":"2.5.0.7","device.uuid":"51ce2255-62ad-5778-b2d7-b9543c1476c6","device.os":"Linux","device.os_version":"2.6.35","device.platform":"Roku","device.platform_vendor":"Move Networks","device.platform_version":"1.0.0.20130329","device.licenses":[],"user.type":"Subscriber","ip.provider":"netacuity","ip.postal_code":"48154","ip.dma":505,"ip.dma_name":"unknown","ip.city":"livonia","ip.country":"united states","ip.region":"michigan","ip.continent":"north america","ip.isp":"bright house networks llc","ip.asn":0,"ip.asn_owner":"?","clip.id":"1338713","clip.pub":"CBD46B77","asset.id":524768,"asset.pub":"CBD46B77","asset.length_ms":1800000,"asset.guid":"b7c12c09dc5aec832e142a00b0f191fa","asset.title":"Diya Aur Bati Hum","asset.type":"captured","asset.adult":false,"asset.franchise_guid":"941496e1452b4fce9acfe7b3339924eb","asset.franchise_title":"Diya Aur Bati Hum","container.id":14,"container.pub":"CBD46B77","container.guid":"3caa6afd715e4c57ac4750d29e449a9c","container.title":"SPLUS","usage.elapsed_ms":2312,"usage.viewed_ms":392,"usage.stage":"mainVideo","exp.idle_ms":350,"exp.stalls":0,"exp.stalled_ms":0,"exp.frame_renders":0,"exp.frame_drops":0,"exp.ghost_session_ms":0,"exp.ghost_sessions":0,"exp.qmx_stale_ms":0,"exp.qmx_error_ms":0,"exp.qss_late_ms":0,"exp.qss_error_ms":0,"exp.fom":0,"exp.fom_weight":0,"data.dl_bytes":228,"data.ul_bytes":406,"http.oks":2,"http.errors":0,"http.timeouts":0,"net.throughput":8977,"http.slows":0,"data.bitrate_mean":1968,"data.bitrate_stddev":0,"data.bitrate_median":1950,"data.bitrate_modes":[1950],"data.streamlets":1,"data.late_streamlets":0,"data.all_streamlets":0,"data.bf_streamlets":0,"data.ab_streamlets":0}
{"type":"session.started","v":2,"post.source":"1306210600-col004.sv1.movenetworks.com:2183#6","post.ip4":"63.225.172.43","post.rtime_secs":1371794671,"post.rtime_text":"2013-06-21 06:04:31","post.time_secs":1371794660,"post.time_text":"2013-06-21 06:04:20","post.time_date":"2013-06-21","post.time_hour":6,"post.late_secs":11,"id.session":"232169818","id.sub":"55d514ba-3858-11e2-91a7-12313d08e01f","id.partner":"0CB48A664E514CA48D378D152574EDBB","id.service":"BBTV (CBD46B77)","device.make":"Roku","device.model":"3100X","device.info":"Roku;3100X","device.serial":"13C2AE061481","device.version":"2.5.0.37","device.uuid":"7f5654d5-3aa7-5a5f-bb2b-8084da358942","device.os":"Linux","device.os_version":"2.6.35","device.platform":"Roku","device.platform_vendor":"Move Networks","device.platform_version":"1.0.0.20130615","device.licenses":[],"user.type":"Subscriber","ip.provider":"netacuity","ip.postal_code":"98115","ip.dma":819,"ip.dma_name":"unknown","ip.city":"seattle","ip.country":"united states","ip.region":"washington","ip.continent":"north america","ip.isp":"qwest communications company llc","ip.asn":0,"ip.asn_owner":"?","clip.id":"1339170","clip.pub":"CBD46B77","asset.id":522015,"asset.pub":"CBD46B77","asset.length_ms":7200000,"asset.guid":"c6938cfa200a21e90dce41f5ed131cc2","asset.title":"Spark Top 20","asset.type":"captured","asset.adult":false,"container.id":277,"container.pub":"CBD46B77","container.guid":"03e3a689e245457bba2f98c30ef931fa","container.title":"BIGMGC","usage.stage":"mainVideo","exp.idle_ms":5772}

I want to load it in R and convert to a dataframe.Here field names are a part of the data and also we have unequal no of fields in each row (a total of 13 rows)

Any help will be appreciated.

Was it helpful?

Solution

Here's one way to do it:

file <- '[
{"type":"session.ended","v":2,"post.source":"1306210600-col001.sv1.movenetworks.com:2097#5","post.ip4":"75.114.187.146","post.rtime_secs":1371794661,"post.rtime_text":"2013-06-21 06:04:20","post.time_secs":1371794596,"post.time_text":"2013-06-21 06:03:16","post.time_date":"2013-06-21","post.time_hour":6,"post.late_secs":65,"id.session":"3625657","id.sub":"2370b726-b96e-11e2-b3eb-1231380e1adf","id.partner":"0CB48A664E514CA48D378D152574EDBB","id.service":"BBTV (CBD46B77)","device.make":"Roku","device.model":"3050X","device.info":"Roku;3050X","device.serial":"12C241003940","device.version":"2.5.0.7","device.uuid":"51ce2255-62ad-5778-b2d7-b9543c1476c6","device.os":"Linux","device.os_version":"2.6.35","device.platform":"Roku","device.platform_vendor":"Move Networks","device.platform_version":"1.0.0.20130329","device.licenses":[],"user.type":"Subscriber","ip.provider":"netacuity","ip.postal_code":"48154","ip.dma":505,"ip.dma_name":"unknown","ip.city":"livonia","ip.country":"united states","ip.region":"michigan","ip.continent":"north america","ip.isp":"bright house networks llc","ip.asn":0,"ip.asn_owner":"?","clip.id":"1338713","clip.pub":"CBD46B77","asset.id":524768,"asset.pub":"CBD46B77","asset.length_ms":1800000,"asset.guid":"b7c12c09dc5aec832e142a00b0f191fa","asset.title":"Diya Aur Bati Hum","asset.type":"captured","asset.adult":false,"asset.franchise_guid":"941496e1452b4fce9acfe7b3339924eb","asset.franchise_title":"Diya Aur Bati Hum","container.id":14,"container.pub":"CBD46B77","container.guid":"3caa6afd715e4c57ac4750d29e449a9c","container.title":"SPLUS","usage.elapsed_ms":2312,"usage.viewed_ms":392,"usage.stage":"mainVideo","exp.idle_ms":350,"exp.stalls":0,"exp.stalled_ms":0,"exp.frame_renders":0,"exp.frame_drops":0,"exp.ghost_session_ms":0,"exp.ghost_sessions":0,"exp.qmx_stale_ms":0,"exp.qmx_error_ms":0,"exp.qss_late_ms":0,"exp.qss_error_ms":0,"exp.fom":0,"exp.fom_weight":0,"data.dl_bytes":228,"data.ul_bytes":406,"http.oks":2,"http.errors":0,"http.timeouts":0,"net.throughput":8977,"http.slows":0,"data.bitrate_mean":1968,"data.bitrate_stddev":0,"data.bitrate_median":1950,"data.bitrate_modes":[1950],"data.streamlets":1,"data.late_streamlets":0,"data.all_streamlets":0,"data.bf_streamlets":0,"data.ab_streamlets":0}
,{"type":"session.started","v":2,"post.source":"1306210600-col004.sv1.movenetworks.com:2183#6","post.ip4":"63.225.172.43","post.rtime_secs":1371794671,"post.rtime_text":"2013-06-21 06:04:31","post.time_secs":1371794660,"post.time_text":"2013-06-21 06:04:20","post.time_date":"2013-06-21","post.time_hour":6,"post.late_secs":11,"id.session":"232169818","id.sub":"55d514ba-3858-11e2-91a7-12313d08e01f","id.partner":"0CB48A664E514CA48D378D152574EDBB","id.service":"BBTV (CBD46B77)","device.make":"Roku","device.model":"3100X","device.info":"Roku;3100X","device.serial":"13C2AE061481","device.version":"2.5.0.37","device.uuid":"7f5654d5-3aa7-5a5f-bb2b-8084da358942","device.os":"Linux","device.os_version":"2.6.35","device.platform":"Roku","device.platform_vendor":"Move Networks","device.platform_version":"1.0.0.20130615","device.licenses":[],"user.type":"Subscriber","ip.provider":"netacuity","ip.postal_code":"98115","ip.dma":819,"ip.dma_name":"unknown","ip.city":"seattle","ip.country":"united states","ip.region":"washington","ip.continent":"north america","ip.isp":"qwest communications company llc","ip.asn":0,"ip.asn_owner":"?","clip.id":"1339170","clip.pub":"CBD46B77","asset.id":522015,"asset.pub":"CBD46B77","asset.length_ms":7200000,"asset.guid":"c6938cfa200a21e90dce41f5ed131cc2","asset.title":"Spark Top 20","asset.type":"captured","asset.adult":false,"container.id":277,"container.pub":"CBD46B77","container.guid":"03e3a689e245457bba2f98c30ef931fa","container.title":"BIGMGC","usage.stage":"mainVideo","exp.idle_ms":5772}
]'

You need the function fromJSON of the RJSONIO package:

library(RJSONIO)
json <- fromJSON(file, nullValue = NA)

Replace (empty) lists by NA and convert to data frames:

dat <- lapply(json, function(j) {
  as.data.frame(replace(j, sapply(j, is.list), NA))
})

Create a single data frame:

library(plyr)
res <- rbind.fill(dat)

The result (res):

             type v
1   session.ended 2
2 session.started 2
                                    post.source
1 1306210600-col001.sv1.movenetworks.com:2097#5
2 1306210600-col004.sv1.movenetworks.com:2183#6
        post.ip4 post.rtime_secs     post.rtime_text
1 75.114.187.146      1371794661 2013-06-21 06:04:20
2  63.225.172.43      1371794671 2013-06-21 06:04:31
  post.time_secs      post.time_text post.time_date
1     1371794596 2013-06-21 06:03:16     2013-06-21
2     1371794660 2013-06-21 06:04:20     2013-06-21
  post.time_hour post.late_secs id.session
1              6             65    3625657
2              6             11  232169818
                                id.sub
1 2370b726-b96e-11e2-b3eb-1231380e1adf
2 55d514ba-3858-11e2-91a7-12313d08e01f
                        id.partner      id.service
1 0CB48A664E514CA48D378D152574EDBB BBTV (CBD46B77)
2 0CB48A664E514CA48D378D152574EDBB BBTV (CBD46B77)
  device.make device.model device.info device.serial
1        Roku        3050X  Roku;3050X  12C241003940
2        Roku        3100X  Roku;3100X  13C2AE061481
  device.version                          device.uuid
1        2.5.0.7 51ce2255-62ad-5778-b2d7-b9543c1476c6
2       2.5.0.37 7f5654d5-3aa7-5a5f-bb2b-8084da358942
  device.os device.os_version device.platform
1     Linux            2.6.35            Roku
2     Linux            2.6.35            Roku
  device.platform_vendor device.platform_version
1          Move Networks          1.0.0.20130329
2          Move Networks          1.0.0.20130615
  device.licenses  user.type ip.provider ip.postal_code
1              NA Subscriber   netacuity          48154
2              NA Subscriber   netacuity          98115
  ip.dma ip.dma_name ip.city    ip.country  ip.region
1    505     unknown livonia united states   michigan
2    819     unknown seattle united states washington
   ip.continent                           ip.isp ip.asn
1 north america        bright house networks llc      0
2 north america qwest communications company llc      0
  ip.asn_owner clip.id clip.pub asset.id asset.pub
1            ? 1338713 CBD46B77   524768  CBD46B77
2            ? 1339170 CBD46B77   522015  CBD46B77
  asset.length_ms                       asset.guid
1         1800000 b7c12c09dc5aec832e142a00b0f191fa
2         7200000 c6938cfa200a21e90dce41f5ed131cc2
        asset.title asset.type asset.adult
1 Diya Aur Bati Hum   captured       FALSE
2      Spark Top 20   captured       FALSE
              asset.franchise_guid asset.franchise_title
1 941496e1452b4fce9acfe7b3339924eb     Diya Aur Bati Hum
2                             <NA>                  <NA>
  container.id container.pub
1           14      CBD46B77
2          277      CBD46B77
                    container.guid container.title
1 3caa6afd715e4c57ac4750d29e449a9c           SPLUS
2 03e3a689e245457bba2f98c30ef931fa          BIGMGC
  usage.elapsed_ms usage.viewed_ms usage.stage
1             2312             392   mainVideo
2               NA              NA   mainVideo
  exp.idle_ms exp.stalls exp.stalled_ms
1         350          0              0
2        5772         NA             NA
  exp.frame_renders exp.frame_drops exp.ghost_session_ms
1                 0               0                    0
2                NA              NA                   NA
  exp.ghost_sessions exp.qmx_stale_ms exp.qmx_error_ms
1                  0                0                0
2                 NA               NA               NA
  exp.qss_late_ms exp.qss_error_ms exp.fom
1               0                0       0
2              NA               NA      NA
  exp.fom_weight data.dl_bytes data.ul_bytes http.oks
1              0           228           406        2
2             NA            NA            NA       NA
  http.errors http.timeouts net.throughput http.slows
1           0             0           8977          0
2          NA            NA             NA         NA
  data.bitrate_mean data.bitrate_stddev
1              1968                   0
2                NA                  NA
  data.bitrate_median data.bitrate_modes data.streamlets
1                1950               1950               1
2                  NA                 NA              NA
  data.late_streamlets data.all_streamlets
1                    0                   0
2                   NA                  NA
  data.bf_streamlets data.ab_streamlets
1                  0                  0
2                 NA                 NA
Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top