You'd be much better off usingPlease don't use RegEx, or Bash' builtin tools. They're not designed to parse or create JSON. Use a tooldedicated parser like xidel that can manipulate csv / raw text and understands JSONinstead:
I'm going to assume so_24300508.csvAssuming 'input.csv':
(this is extracted from your JSON sample instead of the CSV sample you provided)
$ xidel -s so_24300508"input.csv --json-mode=deprecatedcsv" --xquerye '
[array{
let $csv:=x:lines($raw) ! array{tokenize(.,",")}
for $region in distinct-values($csv ! tokenize(.,","1)[1])
return {
"name":$region,
"children":[array{
for $country in distinct-values($csv[starts-with($csv[.,$region()] != tokenize$region](.,","2)[2])
return {
"name":$country,
"children":forarray{
$data in $csv[starts-with(.,$region) and contains(.,$country)]
$csv[.() let= $value:=tokenize($data,","$country)] ! {
return { "name":.(3),
"name" "size":$value[3],.(4)
"size":$value[4]}
}
}
]}
}
]}
'
(without --json-mode=deprecated replace [ ] with array{ })
See this code snippet for intermediate steps leading to this query.
Also see this online xidelcgi demo.
Output:
[
{
"name": "Africa",
"children": [
{
"name": "Kenya",
"children": [
{
"name": "NAI",
"size": "109"
},
{
"name": "NAA",
"size": "160"
}
]
}
]
},
{
"name": "Asia",
"children": [
{
"name": "India",
"children": [
{
"name": "NSI",
"size": "100"
},
{
"name": "BSE",
"size": "60"
}
]
},
{
"name": "Pakistan",
"children": [
{
"name": "ISE",
"size": "120"
},
{
"name": "ANO",
"size": "433"
}
]
}
]
},
{
"name": "European Union",
"children": [
{
"name": "United Kingdom",
"children": [
{
"name": "LSE",
"size": "550"
},
{
"name": "PLU",
"size": "123"
}
]
}
]
}
]
See this gist for intermediate steps leading to this query.
Also see this online xidelcgi demo.