Skip to main content
The 2026 Annual Developer Survey is live— take the Survey today!
Completely revising my old answer and make it more concise.
Source Link
Reino

You'd be much better off usingPlease don't use RegEx, or Bash' builtin tools. They're not designed to parse or create JSON. Use a tooldedicated parser like that can manipulate csv / raw text and understands JSONinstead:

I'm going to assume so_24300508.csvAssuming 'input.csv':

(this is extracted from your JSON sample instead of the CSV sample you provided)

$ xidel -s so_24300508"input.csv --json-mode=deprecatedcsv" --xquerye '
  [array{
    let $csv:=x:lines($raw) ! array{tokenize(.,",")}
    for $region in distinct-values($csv ! tokenize(.,","1)[1])
    return {
      "name":$region,
      "children":[array{
        for $country in distinct-values($csv[starts-with($csv[.,$region()] != tokenize$region](.,","2)[2])
        return {
          "name":$country,
          "children":forarray{
 $data in $csv[starts-with(.,$region) and contains(.,$country)]
         $csv[.() let= $value:=tokenize($data,","$country)] ! {
          return {   "name":.(3),
            "name"  "size":$value[3],.(4)
            "size":$value[4]}
          }
        }
      ]}
    }
  ]}
'

(without --json-mode=deprecated replace [ ] with array{ })

See this code snippet for intermediate steps leading to this query.
Also see this online xidelcgi demo.

Output:

[
  {
    "name": "Africa",
    "children": [
      {
        "name": "Kenya",
        "children": [
          {
            "name": "NAI",
            "size": "109"
          },
          {
            "name": "NAA",
            "size": "160"
          }
        ]
      }
    ]
  },
  {
    "name": "Asia",
    "children": [
      {
        "name": "India",
        "children": [
          {
            "name": "NSI",
            "size": "100"
          },
          {
            "name": "BSE",
            "size": "60"
          }
        ]
      },
      {
        "name": "Pakistan",
        "children": [
          {
            "name": "ISE",
            "size": "120"
          },
          {
            "name": "ANO",
            "size": "433"
          }
        ]
      }
    ]
  },
  {
    "name": "European Union",
    "children": [
      {
        "name": "United Kingdom",
        "children": [
          {
            "name": "LSE",
            "size": "550"
          },
          {
            "name": "PLU",
            "size": "123"
          }
        ]
      }
    ]
  }
]

See this gist for intermediate steps leading to this query.
Also see this online xidelcgi demo.

You'd be much better off using a tool like that can manipulate csv / raw text and understands JSON:

I'm going to assume so_24300508.csv :

(this is extracted from your JSON sample instead of the CSV sample you provided)

xidel -s so_24300508.csv --json-mode=deprecated --xquery '
  [
    let $csv:=x:lines($raw)
    for $region in distinct-values($csv ! tokenize(.,",")[1])
    return {
      "name":$region,
      "children":[
        for $country in distinct-values($csv[starts-with(.,$region)] ! tokenize(.,",")[2]) return {
          "name":$country,
          "children":for $data in $csv[starts-with(.,$region) and contains(.,$country)]
          let $value:=tokenize($data,",")
          return {
            "name":$value[3],
            "size":$value[4]
          }
        }
      ]
    }
  ]
'

(without --json-mode=deprecated replace [ ] with array{ })

See this code snippet for intermediate steps leading to this query.
Also see this online xidelcgi demo.

Output:

[
  {
    "name": "Africa",
    "children": [
      {
        "name": "Kenya",
        "children": [
          {
            "name": "NAI",
            "size": "109"
          },
          {
            "name": "NAA",
            "size": "160"
          }
        ]
      }
    ]
  },
  {
    "name": "Asia",
    "children": [
      {
        "name": "India",
        "children": [
          {
            "name": "NSI",
            "size": "100"
          },
          {
            "name": "BSE",
            "size": "60"
          }
        ]
      },
      {
        "name": "Pakistan",
        "children": [
          {
            "name": "ISE",
            "size": "120"
          },
          {
            "name": "ANO",
            "size": "433"
          }
        ]
      }
    ]
  },
  {
    "name": "European Union",
    "children": [
      {
        "name": "United Kingdom",
        "children": [
          {
            "name": "LSE",
            "size": "550"
          },
          {
            "name": "PLU",
            "size": "123"
          }
        ]
      }
    ]
  }
]

Please don't use RegEx, or Bash' builtin tools. They're not designed to parse or create JSON. Use a dedicated parser like instead:

Assuming 'input.csv':

$ xidel -s "input.csv" -e '
  array{
    let $csv:=x:lines($raw) ! array{tokenize(.,",")}
    for $region in distinct-values($csv(1))
    return {
      "name":$region,
      "children":array{
        for $country in distinct-values($csv[.() = $region](2))
        return {
          "name":$country,
          "children":array{
            $csv[.() = ($country)] ! {
              "name":.(3),
              "size":.(4)
            }
          }
        }
      }
    }
  }
'
[
  {
    "name": "Africa",
    "children": [
      {
        "name": "Kenya",
        "children": [
          {
            "name": "NAI",
            "size": "109"
          },
          {
            "name": "NAA",
            "size": "160"
          }
        ]
      }
    ]
  },
  {
    "name": "Asia",
    "children": [
      {
        "name": "India",
        "children": [
          {
            "name": "NSI",
            "size": "100"
          },
          {
            "name": "BSE",
            "size": "60"
          }
        ]
      },
      {
        "name": "Pakistan",
        "children": [
          {
            "name": "ISE",
            "size": "120"
          },
          {
            "name": "ANO",
            "size": "433"
          }
        ]
      }
    ]
  },
  {
    "name": "European Union",
    "children": [
      {
        "name": "United Kingdom",
        "children": [
          {
            "name": "LSE",
            "size": "550"
          },
          {
            "name": "PLU",
            "size": "123"
          }
        ]
      }
    ]
  }
]

See this gist for intermediate steps leading to this query.
Also see this online xidelcgi demo.

Source Link
Reino

You'd be much better off using a tool like that can manipulate csv / raw text and understands JSON:

I'm going to assume so_24300508.csv :

Africa,Kenya,NAI,109
Africa,Kenya,NAA,160
Asia,India,NSI,100
Asia,India,BSE,60
Asia,Pakistan,ISE,120
Asia,Pakistan,ANO,433
European Union,United Kingdom,LSE,550
European Union,United Kingdom,PLU,123

(this is extracted from your JSON sample instead of the CSV sample you provided)

xidel -s so_24300508.csv --json-mode=deprecated --xquery '
  [
    let $csv:=x:lines($raw)
    for $region in distinct-values($csv ! tokenize(.,",")[1])
    return {
      "name":$region,
      "children":[
        for $country in distinct-values($csv[starts-with(.,$region)] ! tokenize(.,",")[2]) return {
          "name":$country,
          "children":for $data in $csv[starts-with(.,$region) and contains(.,$country)]
          let $value:=tokenize($data,",")
          return {
            "name":$value[3],
            "size":$value[4]
          }
        }
      ]
    }
  ]
'

(without --json-mode=deprecated replace [ ] with array{ })

See this code snippet for intermediate steps leading to this query.
Also see this online xidelcgi demo.

Output:

[
  {
    "name": "Africa",
    "children": [
      {
        "name": "Kenya",
        "children": [
          {
            "name": "NAI",
            "size": "109"
          },
          {
            "name": "NAA",
            "size": "160"
          }
        ]
      }
    ]
  },
  {
    "name": "Asia",
    "children": [
      {
        "name": "India",
        "children": [
          {
            "name": "NSI",
            "size": "100"
          },
          {
            "name": "BSE",
            "size": "60"
          }
        ]
      },
      {
        "name": "Pakistan",
        "children": [
          {
            "name": "ISE",
            "size": "120"
          },
          {
            "name": "ANO",
            "size": "433"
          }
        ]
      }
    ]
  },
  {
    "name": "European Union",
    "children": [
      {
        "name": "United Kingdom",
        "children": [
          {
            "name": "LSE",
            "size": "550"
          },
          {
            "name": "PLU",
            "size": "123"
          }
        ]
      }
    ]
  }
]
lang-bash