In Class Exercise 5b

Author

Guan Jhen Lin

Published

May 11, 2024

Modified

May 15, 2024

We will use jsonlite package to read the files for JSON files. Vast challenge 2024’s dataset will be used for this in class exercise.

pacman::p_load(tidyverse, jsonlite)
# Specify the path to your JSON file
file_path <- "data/mc3.json"

# Read the file as lines of text
json_data <- readLines(file_path, warn = FALSE)
json_string <- paste(json_data, collapse = "")
# Replace 'NaN' with 'null' in the JSON string
json_string <- gsub("NaN", "null", json_string)
# Parse the JSON string
json_list <- fromJSON(json_string)

# Now json_list is a usable R list that you can work with
# Check the structure of the list
str(json_list)
List of 5
 $ directed  : logi TRUE
 $ multigraph: logi TRUE
 $ graph     : Named list()
 $ nodes     :'data.frame': 60520 obs. of  15 variables:
  ..$ type             : chr [1:60520] "Entity.Organization.Company" "Entity.Organization.Company" "Entity.Organization.Company" "Entity.Organization.Company" ...
  ..$ country          : chr [1:60520] "Uziland" "Mawalara" "Uzifrica" "Islavaragon" ...
  ..$ ProductServices  : chr [1:60520] "Unknown" "Furniture and home accessories" "Food products" "Unknown" ...
  ..$ PointOfContact   : chr [1:60520] "Rebecca Lewis" "Michael Lopez" "Steven Robertson" "Anthony Wyatt" ...
  ..$ HeadOfOrg        : chr [1:60520] "Émilie-Susan Benoit" "Honoré Lemoine" "Jules Labbé" "Dr. Víctor Hurtado" ...
  ..$ founding_date    : chr [1:60520] "1954-04-24T00:00:00" "2009-06-12T00:00:00" "2029-12-15T00:00:00" "1972-02-16T00:00:00" ...
  ..$ revenue          : num [1:60520] 5995 71767 0 0 4747 ...
  ..$ TradeDescription : chr [1:60520] "Unknown" "Abbott-Gomez is a leading manufacturer and supplier of high-quality furniture and home accessories, catering to"| __truncated__ "Abbott-Harrison is a leading manufacturer of high-quality food products, including baked goods, snacks, and bev"| __truncated__ "Unknown" ...
  ..$ _last_edited_by  : chr [1:60520] "Pelagia Alethea Mordoch" "Pelagia Alethea Mordoch" "Pelagia Alethea Mordoch" "Pelagia Alethea Mordoch" ...
  ..$ _last_edited_date: chr [1:60520] "2035-01-01T00:00:00" "2035-01-01T00:00:00" "2035-01-01T00:00:00" "2035-01-01T00:00:00" ...
  ..$ _date_added      : chr [1:60520] "2035-01-01T00:00:00" "2035-01-01T00:00:00" "2035-01-01T00:00:00" "2035-01-01T00:00:00" ...
  ..$ _raw_source      : chr [1:60520] "Existing Corporate Structure Data" "Existing Corporate Structure Data" "Existing Corporate Structure Data" "Existing Corporate Structure Data" ...
  ..$ _algorithm       : chr [1:60520] "Automatic Import" "Automatic Import" "Automatic Import" "Automatic Import" ...
  ..$ id               : chr [1:60520] "Abbott, Mcbride and Edwards" "Abbott-Gomez" "Abbott-Harrison" "Abbott-Ibarra" ...
  ..$ dob              : chr [1:60520] NA NA NA NA ...
 $ links     :'data.frame': 75817 obs. of  11 variables:
  ..$ start_date       : chr [1:75817] "2016-10-29T00:00:00" "2035-06-03T00:00:00" "2028-11-20T00:00:00" "2024-09-04T00:00:00" ...
  ..$ type             : chr [1:75817] "Event.Owns.Shareholdership" "Event.Owns.Shareholdership" "Event.Owns.Shareholdership" "Event.Owns.Shareholdership" ...
  ..$ _last_edited_by  : chr [1:75817] "Pelagia Alethea Mordoch" "Niklaus Oberon" "Pelagia Alethea Mordoch" "Pelagia Alethea Mordoch" ...
  ..$ _last_edited_date: chr [1:75817] "2035-01-01T00:00:00" "2035-07-15T00:00:00" "2035-01-01T00:00:00" "2035-01-01T00:00:00" ...
  ..$ _date_added      : chr [1:75817] "2035-01-01T00:00:00" "2035-07-15T00:00:00" "2035-01-01T00:00:00" "2035-01-01T00:00:00" ...
  ..$ _raw_source      : chr [1:75817] "Existing Corporate Structure Data" "Oceanus Corporations Monthly - Jun '35" "Existing Corporate Structure Data" "Existing Corporate Structure Data" ...
  ..$ _algorithm       : chr [1:75817] "Automatic Import" "Manual Entry" "Automatic Import" "Automatic Import" ...
  ..$ source           : chr [1:75817] "Avery Inc" "Berger-Hayes" "Bowers Group" "Bowman-Howe" ...
  ..$ target           : chr [1:75817] "Allen, Nichols and Thompson" "Jensen, Morris and Downs" "Barnett Inc" "Bennett Ltd" ...
  ..$ key              : int [1:75817] 0 0 0 0 0 0 0 0 0 0 ...
  ..$ end_date         : chr [1:75817] NA NA NA NA ...
# Summary of the list for a quick overview
summary(json_list)
           Length Class      Mode   
directed    1     -none-     logical
multigraph  1     -none-     logical
graph       0     -none-     list   
nodes      15     data.frame list   
links      11     data.frame list   
# Example: Replacing null values in a specific field
json_list$PointOfContact <- ifelse(is.null(json_list$PointOfContact), "No Contact", json_list$PointOfContact)
mc3_data <- fromJSON(json_string)
mc1_data <-fromJSON("data/mc1.json")
mc2_data <-fromJSON("data/mc2.json")



#graph data model
#you may apply this to MC2, MC3 as well
#refer to MC1 Data Description for the data meanings. 
#MC3 is bit corrupted please artificially fill in something before run 
summary(mc1_data)
           Length Class      Mode   
directed    1     -none-     logical
multigraph  1     -none-     logical
graph       0     -none-     list   
nodes       4     data.frame list   
links      10     data.frame list   
summary(mc2_data)
           Length Class      Mode   
directed    1     -none-     logical
multigraph  1     -none-     logical
graph       0     -none-     list   
nodes      20     data.frame list   
links      17     data.frame list   
summary(mc3_data)
           Length Class      Mode   
directed    1     -none-     logical
multigraph  1     -none-     logical
graph       0     -none-     list   
nodes      15     data.frame list   
links      11     data.frame list