Ruby newbtard here...
I have an csv file (logevents.csv) that has a "message" column. The "message" column contains rows of json data. Using Ruby, I'd like to convert the json data's name:value pairs to columnname:rowvalue in a 2nd csv file. Here's the 1st row of the csv file:
message "{""version"":""0"",""id"":""fdd11d8a-ef17-75ae-cf50-077285bb7e15"",""detail-type"":""Auth0 log"",""source"":""aws.partner/auth0.com/website-dev-c36bb924-cf05-4a5b-8400-7bdfbfe0806c/auth0.logs"",""account"":""654654277766"",""time"":""2024-03-27T12:30:51Z"",""region"":""us-east-2"",""resources"":[],""detail"":{""log_id"":""90020240327123051583073000000000000001223372067726119722"",""data"":{""date"":""2024-03-27T12:30:51.531Z"",""type"":""seacft"",""description"":"""",""connection_id"":"""",""client_id"":""v00a8B5f1sgCDjVhneXMbMmwxlsbYoHq"",""client_name"":""Website Dev"",""ip"":""32.174.36.217"",""user_agent"":""Someday v1.10.3"",""details"":{""code"":""******************************************5kW""},""hostname"":""website-dev.us.auth0.com"",""user_id"":""auth0|648a230ee5ad48ee2ebfb212"",""user_name"":""[email protected]"",""auth0_client"":{""name"":""omniauth-auth0"",""version"":""2.6.0"",""env"":{""ruby"":""2.6.5"",""rails"":""6.1.7.4""}},""$event_schema"":{""version"":""1.0.0""},""log_id"":""90020240327123051583073000000000000001223372067726119722""}}}"
For each row, I'd like above to be written to another csv file but with the name:value pairs pivoted into column:rowvalue with a "," (comma) as the delimiter for the column names and row values, ala:
version,id,detail-type,source,account ....etc 0,fdd11d8a-ef17-75ae-cf50-077285bb7e15,Auth0 log,aws.partner/auth0.com/website-dev-c36bb924-cf05-4a5b-8400-7bdfbfe0806c/auth0.logs,654654277766 ....etc
I have been trying to accomplish this via this ruby script (runtimetest.rb):
require 'csv'
require 'json'
CSV.open("C:/Ruby/dev/logevents2.csv", "w") do |csv| #open new file for write
JSON.parse(File.open("C:/Ruby/dev/logevents.csv").read).each do |hash| #open json to parse
csv << hash.values #write value to file
end
end
But at runtime the csv file contents (logevents.csv) are written on screen with "unexpected token" message:
C:\Users\dclad>runtimetest.rb
C:/Ruby32-x64/lib/ruby/3.2.0/json/common.rb:216:in `parse': unexpected token at '"version"":""0"",""id"":""fdd11d8a-ef17-75ae-cf50-077285bb7e15"",""detail-type"":""Auth0 log"",""source"":""aws.partner/auth0.com/trulab-dev-c36bb924-cf05-4a5b-8400-7bdfbfe0806c/auth0.logs"",""account"":""654654277766"", ........
Tried this, I have been trying to accomplish this via this ruby script (runtimetest.rb):
require 'csv'
require 'json'
CSV.open("C:/Ruby/dev/logevents2.csv", "w") do |csv| #open new file for write
JSON.parse(File.open("C:/Ruby/dev/logevents.csv").read).each do |hash| #open json to parse
csv << hash.values #write value to file
end
end
Was expecting output to be column, row table in 2nd csv:
version,id,detail-type,source,account ....etc 0,fdd11d8a-ef17-75ae-cf50-077285bb7e15,Auth0 log,aws.partner/auth0.com/trulab-dev-c36bb924-cf05-4a5b-8400-7bdfbfe0806c/auth0.logs,654654277766 ....etc
I may be going about this all wrong. Any suggestions would be greatly appreciated!
Best Regards, Donald
UPDATE
I was able to get ruby to parse out a single record json file such that it would write the json name and value pairs (and nested pairs too) to a second file as a header and row .csv file.
require 'csv'
require 'json'
# An attempt to figure out what a row will look like
def array_from(json)
queue, next_item = [], json
while !next_item.nil?
return next_item if next_item.is_a? Array
if next_item.is_a? Hash
next_item.each do |k, v|
queue.push next_item[k]
end
end
next_item = queue.shift
end
return [json]
end
# try to build header columns for the nested elements
def flatten(object, path='')
scalars = [String, Integer, FalseClass, TrueClass]
columns = {}
if [Hash, Array].include? object.class
object.each do |k, v|
new_columns = flatten(v, "#{path}#{k}|") if object.class == Hash
new_columns = flatten(k, "#{path}#{k}|") if object.class == Array
columns = columns.merge new_columns
end
return columns
elsif scalars.include? object.class
# Remove trailing slash from path
end_path = path[0, path.length - 1]
columns[end_path] = object
return columns
else
return {}
end
end
json = JSON.parse(File.open('logevent.json').read)
in_array = array_from json
out_array = []
in_array.each do |row|
out_array[out_array.length] = flatten row
end
headers_written = false
CSV.open('logevent.csv', 'w') do |csv|
out_array.each do |row|
csv << row.keys && headers_written = true if headers_written === false
csv << row.values
end
end
Here is the contents of the input file, "logevent.json":
{
"Message": [
{
"version": "0",
"id": "fdd11d8a-ef17-75ae-cf50-077285bb7e15",
"detail-type": "Auth0 log",
"source": "aws.partner/auth0.com/website-c36bb924-cf05-4a5b-8400-7bdfbfe0806c/auth0.logs",
"account": "654654277766",
"time": "2024-03-27T12:30:51Z",
"region": "us-east-2",
"resources": [],
"detail": {
"log_id": "90020240327123051583073000000000000001223372067726119722",
"data": {
"date": "2024-03-27T12:30:51.531Z",
"type": "seacft",
"description": "",
"connection_id": "",
"client_id": "v00a8B5f1sgCDjVhneXMbMmwxlsbYoHq",
"client_name": "Website Dev",
"ip": "3.17.36.227",
"user_agent": "JohnDoe v1.10.3",
"details": {
"code": "******************************************5kW"
},
"hostname": "website.us.auth0.com",
"user_id": "auth0|648a230ee5ad48ee2ebfb212",
"user_name": "[email protected]",
"auth0_client": {
"name": "omniauth-auth0",
"version": "2.6.0",
"env": {
"ruby": "2.6.5",
"rails": "6.1.7.4"
}
},
"$event_schema": {
"version": "1.0.0"
},
"log_id": "90020240327123051583073000000000000001223372067726119722"
}
}
}
]
}