Return to Answer

Post Timeline

Completely revising my old answer and make it more concise.

Source Link

edited Apr 18, 2025 at 13:45

Reino

reputation score 3545
1
1 gold badge
17
17 silver badges
24
24 bronze badges

You'd be much better off usingPlease don't use RegEx, or Bash' builtin tools. They're not designed to parse or create JSON. Use a tooldedicated parser like xidel that can manipulate csv / raw text and understands JSONinstead:

I'm going to assume so_24300508.csvAssuming 'input.csv':

(this is extracted from your JSON sample instead of the CSV sample you provided)

$ xidel -s so_24300508"input.csv --json-mode=deprecatedcsv" --xquerye '
  [array{
    let $csv:=x:lines($raw) ! array{tokenize(.,",")}
    for $region in distinct-values($csv ! tokenize(.,","1)[1])
    return {
      "name":$region,
      "children":[array{
        for $country in distinct-values($csv[starts-with($csv[.,$region()] != tokenize$region](.,","2)[2])
        return {
          "name":$country,
          "children":forarray{
 $data in $csv[starts-with(.,$region) and contains(.,$country)]
         $csv[.() let= $value:=tokenize($data,","$country)] ! {
          return {   "name":.(3),
            "name"  "size":$value[3],.(4)
            "size":$value[4]}
          }
        }
      ]}
    }
  ]}
'

(without --json-mode=deprecated replace [ ] with array{ })

See this code snippet for intermediate steps leading to this query.
Also see this online xidelcgi demo.

Output:

[
  {
    "name": "Africa",
    "children": [
      {
        "name": "Kenya",
        "children": [
          {
            "name": "NAI",
            "size": "109"
          },
          {
            "name": "NAA",
            "size": "160"
          }
        ]
      }
    ]
  },
  {
    "name": "Asia",
    "children": [
      {
        "name": "India",
        "children": [
          {
            "name": "NSI",
            "size": "100"
          },
          {
            "name": "BSE",
            "size": "60"
          }
        ]
      },
      {
        "name": "Pakistan",
        "children": [
          {
            "name": "ISE",
            "size": "120"
          },
          {
            "name": "ANO",
            "size": "433"
          }
        ]
      }
    ]
  },
  {
    "name": "European Union",
    "children": [
      {
        "name": "United Kingdom",
        "children": [
          {
            "name": "LSE",
            "size": "550"
          },
          {
            "name": "PLU",
            "size": "123"
          }
        ]
      }
    ]
  }
]

See this gist for intermediate steps leading to this query.
Also see this online xidelcgi demo.

You'd be much better off using a tool like xidel that can manipulate csv / raw text and understands JSON:

I'm going to assume so_24300508.csv :

(this is extracted from your JSON sample instead of the CSV sample you provided)

xidel -s so_24300508.csv --json-mode=deprecated --xquery '
  [
    let $csv:=x:lines($raw)
    for $region in distinct-values($csv ! tokenize(.,",")[1])
    return {
      "name":$region,
      "children":[
        for $country in distinct-values($csv[starts-with(.,$region)] ! tokenize(.,",")[2]) return {
          "name":$country,
          "children":for $data in $csv[starts-with(.,$region) and contains(.,$country)]
          let $value:=tokenize($data,",")
          return {
            "name":$value[3],
            "size":$value[4]
          }
        }
      ]
    }
  ]
'

(without --json-mode=deprecated replace [ ] with array{ })

See this code snippet for intermediate steps leading to this query.
Also see this online xidelcgi demo.

Output:

[
  {
    "name": "Africa",
    "children": [
      {
        "name": "Kenya",
        "children": [
          {
            "name": "NAI",
            "size": "109"
          },
          {
            "name": "NAA",
            "size": "160"
          }
        ]
      }
    ]
  },
  {
    "name": "Asia",
    "children": [
      {
        "name": "India",
        "children": [
          {
            "name": "NSI",
            "size": "100"
          },
          {
            "name": "BSE",
            "size": "60"
          }
        ]
      },
      {
        "name": "Pakistan",
        "children": [
          {
            "name": "ISE",
            "size": "120"
          },
          {
            "name": "ANO",
            "size": "433"
          }
        ]
      }
    ]
  },
  {
    "name": "European Union",
    "children": [
      {
        "name": "United Kingdom",
        "children": [
          {
            "name": "LSE",
            "size": "550"
          },
          {
            "name": "PLU",
            "size": "123"
          }
        ]
      }
    ]
  }
]

Please don't use RegEx, or Bash' builtin tools. They're not designed to parse or create JSON. Use a dedicated parser like xidel instead:

Assuming 'input.csv':

$ xidel -s "input.csv" -e '
  array{
    let $csv:=x:lines($raw) ! array{tokenize(.,",")}
    for $region in distinct-values($csv(1))
    return {
      "name":$region,
      "children":array{
        for $country in distinct-values($csv[.() = $region](2))
        return {
          "name":$country,
          "children":array{
            $csv[.() = ($country)] ! {
              "name":.(3),
              "size":.(4)
            }
          }
        }
      }
    }
  }
'
[
  {
    "name": "Africa",
    "children": [
      {
        "name": "Kenya",
        "children": [
          {
            "name": "NAI",
            "size": "109"
          },
          {
            "name": "NAA",
            "size": "160"
          }
        ]
      }
    ]
  },
  {
    "name": "Asia",
    "children": [
      {
        "name": "India",
        "children": [
          {
            "name": "NSI",
            "size": "100"
          },
          {
            "name": "BSE",
            "size": "60"
          }
        ]
      },
      {
        "name": "Pakistan",
        "children": [
          {
            "name": "ISE",
            "size": "120"
          },
          {
            "name": "ANO",
            "size": "433"
          }
        ]
      }
    ]
  },
  {
    "name": "European Union",
    "children": [
      {
        "name": "United Kingdom",
        "children": [
          {
            "name": "LSE",
            "size": "550"
          },
          {
            "name": "PLU",
            "size": "123"
          }
        ]
      }
    ]
  }
]

See this gist for intermediate steps leading to this query.
Also see this online xidelcgi demo.

Source Link

answered Dec 4, 2020 at 17:06

Reino

reputation score 3545
1
1 gold badge
17
17 silver badges
24
24 bronze badges

You'd be much better off using a tool like xidel that can manipulate csv / raw text and understands JSON:

I'm going to assume so_24300508.csv :

Africa,Kenya,NAI,109
Africa,Kenya,NAA,160
Asia,India,NSI,100
Asia,India,BSE,60
Asia,Pakistan,ISE,120
Asia,Pakistan,ANO,433
European Union,United Kingdom,LSE,550
European Union,United Kingdom,PLU,123

(this is extracted from your JSON sample instead of the CSV sample you provided)

xidel -s so_24300508.csv --json-mode=deprecated --xquery '
  [
    let $csv:=x:lines($raw)
    for $region in distinct-values($csv ! tokenize(.,",")[1])
    return {
      "name":$region,
      "children":[
        for $country in distinct-values($csv[starts-with(.,$region)] ! tokenize(.,",")[2]) return {
          "name":$country,
          "children":for $data in $csv[starts-with(.,$region) and contains(.,$country)]
          let $value:=tokenize($data,",")
          return {
            "name":$value[3],
            "size":$value[4]
          }
        }
      ]
    }
  ]
'

(without --json-mode=deprecated replace [ ] with array{ })

See this code snippet for intermediate steps leading to this query.
Also see this online xidelcgi demo.

Output:

[
  {
    "name": "Africa",
    "children": [
      {
        "name": "Kenya",
        "children": [
          {
            "name": "NAI",
            "size": "109"
          },
          {
            "name": "NAA",
            "size": "160"
          }
        ]
      }
    ]
  },
  {
    "name": "Asia",
    "children": [
      {
        "name": "India",
        "children": [
          {
            "name": "NSI",
            "size": "100"
          },
          {
            "name": "BSE",
            "size": "60"
          }
        ]
      },
      {
        "name": "Pakistan",
        "children": [
          {
            "name": "ISE",
            "size": "120"
          },
          {
            "name": "ANO",
            "size": "433"
          }
        ]
      }
    ]
  },
  {
    "name": "European Union",
    "children": [
      {
        "name": "United Kingdom",
        "children": [
          {
            "name": "LSE",
            "size": "550"
          },
          {
            "name": "PLU",
            "size": "123"
          }
        ]
      }
    ]
  }
]

Collectives™ on Stack Overflow