pacman::p_load(tidyverse, jsonlite)In Class Exercise 5b
We will use jsonlite package to read the files for JSON files. Vast challenge 2024’s dataset will be used for this in class exercise.
# Specify the path to your JSON file
file_path <- "data/mc3.json"
# Read the file as lines of text
json_data <- readLines(file_path, warn = FALSE)
json_string <- paste(json_data, collapse = "")
# Replace 'NaN' with 'null' in the JSON string
json_string <- gsub("NaN", "null", json_string)
# Parse the JSON string
json_list <- fromJSON(json_string)
# Now json_list is a usable R list that you can work with
# Check the structure of the list
str(json_list)List of 5
$ directed : logi TRUE
$ multigraph: logi TRUE
$ graph : Named list()
$ nodes :'data.frame': 60520 obs. of 15 variables:
..$ type : chr [1:60520] "Entity.Organization.Company" "Entity.Organization.Company" "Entity.Organization.Company" "Entity.Organization.Company" ...
..$ country : chr [1:60520] "Uziland" "Mawalara" "Uzifrica" "Islavaragon" ...
..$ ProductServices : chr [1:60520] "Unknown" "Furniture and home accessories" "Food products" "Unknown" ...
..$ PointOfContact : chr [1:60520] "Rebecca Lewis" "Michael Lopez" "Steven Robertson" "Anthony Wyatt" ...
..$ HeadOfOrg : chr [1:60520] "Émilie-Susan Benoit" "HonorĂ© Lemoine" "Jules LabbĂ©" "Dr. VĂctor Hurtado" ...
..$ founding_date : chr [1:60520] "1954-04-24T00:00:00" "2009-06-12T00:00:00" "2029-12-15T00:00:00" "1972-02-16T00:00:00" ...
..$ revenue : num [1:60520] 5995 71767 0 0 4747 ...
..$ TradeDescription : chr [1:60520] "Unknown" "Abbott-Gomez is a leading manufacturer and supplier of high-quality furniture and home accessories, catering to"| __truncated__ "Abbott-Harrison is a leading manufacturer of high-quality food products, including baked goods, snacks, and bev"| __truncated__ "Unknown" ...
..$ _last_edited_by : chr [1:60520] "Pelagia Alethea Mordoch" "Pelagia Alethea Mordoch" "Pelagia Alethea Mordoch" "Pelagia Alethea Mordoch" ...
..$ _last_edited_date: chr [1:60520] "2035-01-01T00:00:00" "2035-01-01T00:00:00" "2035-01-01T00:00:00" "2035-01-01T00:00:00" ...
..$ _date_added : chr [1:60520] "2035-01-01T00:00:00" "2035-01-01T00:00:00" "2035-01-01T00:00:00" "2035-01-01T00:00:00" ...
..$ _raw_source : chr [1:60520] "Existing Corporate Structure Data" "Existing Corporate Structure Data" "Existing Corporate Structure Data" "Existing Corporate Structure Data" ...
..$ _algorithm : chr [1:60520] "Automatic Import" "Automatic Import" "Automatic Import" "Automatic Import" ...
..$ id : chr [1:60520] "Abbott, Mcbride and Edwards" "Abbott-Gomez" "Abbott-Harrison" "Abbott-Ibarra" ...
..$ dob : chr [1:60520] NA NA NA NA ...
$ links :'data.frame': 75817 obs. of 11 variables:
..$ start_date : chr [1:75817] "2016-10-29T00:00:00" "2035-06-03T00:00:00" "2028-11-20T00:00:00" "2024-09-04T00:00:00" ...
..$ type : chr [1:75817] "Event.Owns.Shareholdership" "Event.Owns.Shareholdership" "Event.Owns.Shareholdership" "Event.Owns.Shareholdership" ...
..$ _last_edited_by : chr [1:75817] "Pelagia Alethea Mordoch" "Niklaus Oberon" "Pelagia Alethea Mordoch" "Pelagia Alethea Mordoch" ...
..$ _last_edited_date: chr [1:75817] "2035-01-01T00:00:00" "2035-07-15T00:00:00" "2035-01-01T00:00:00" "2035-01-01T00:00:00" ...
..$ _date_added : chr [1:75817] "2035-01-01T00:00:00" "2035-07-15T00:00:00" "2035-01-01T00:00:00" "2035-01-01T00:00:00" ...
..$ _raw_source : chr [1:75817] "Existing Corporate Structure Data" "Oceanus Corporations Monthly - Jun '35" "Existing Corporate Structure Data" "Existing Corporate Structure Data" ...
..$ _algorithm : chr [1:75817] "Automatic Import" "Manual Entry" "Automatic Import" "Automatic Import" ...
..$ source : chr [1:75817] "Avery Inc" "Berger-Hayes" "Bowers Group" "Bowman-Howe" ...
..$ target : chr [1:75817] "Allen, Nichols and Thompson" "Jensen, Morris and Downs" "Barnett Inc" "Bennett Ltd" ...
..$ key : int [1:75817] 0 0 0 0 0 0 0 0 0 0 ...
..$ end_date : chr [1:75817] NA NA NA NA ...
# Summary of the list for a quick overview
summary(json_list) Length Class Mode
directed 1 -none- logical
multigraph 1 -none- logical
graph 0 -none- list
nodes 15 data.frame list
links 11 data.frame list
# Example: Replacing null values in a specific field
json_list$PointOfContact <- ifelse(is.null(json_list$PointOfContact), "No Contact", json_list$PointOfContact)
mc3_data <- fromJSON(json_string)mc1_data <-fromJSON("data/mc1.json")
mc2_data <-fromJSON("data/mc2.json")
#graph data model
#you may apply this to MC2, MC3 as well
#refer to MC1 Data Description for the data meanings.
#MC3 is bit corrupted please artificially fill in something before run summary(mc1_data) Length Class Mode
directed 1 -none- logical
multigraph 1 -none- logical
graph 0 -none- list
nodes 4 data.frame list
links 10 data.frame list
summary(mc2_data) Length Class Mode
directed 1 -none- logical
multigraph 1 -none- logical
graph 0 -none- list
nodes 20 data.frame list
links 17 data.frame list
summary(mc3_data) Length Class Mode
directed 1 -none- logical
multigraph 1 -none- logical
graph 0 -none- list
nodes 15 data.frame list
links 11 data.frame list