JQ Recipes

TRANSFORM

Barcharts

def barchart($key):
  length as $total
  | map((.[$key] // "null") | tostring)
  | group_by(.)
  | (map({ key: .[0], value: length, title_len: (.[0]|tostring|length) }) ) as $columns
  | $columns
  | sort_by(.value) | reverse
  | (max_by(.title_len)|.title_len) as $padding
  |
  (if (((($columns|length)/$total) > .8) or (($columns|length) > 1000)) then
    [ "IGNORING <\($key)>: \($columns|length) out of \($total) rows", ""]
  else [
    $key,
    ("-" * ($key|length) ),
    map(
    [
      .key, (" " * ($padding-.title_len)),
      "\((.value/$total)*100|tostring|.+".000"|.[0:4])%",
      ( if (.value == 1) then "▊" else ("█" * (((.value/$total)*100) + (.value|log)|round)) end),
      .value
    ] | join(" ")
    ),
    ""
  ] end)
  | flatten
  | join("\n");

def run_barchart:
  . as $data
  | (.[0]|keys) as $cols
  | ($cols | map(. as $col | $data | barchart($col)) | join("\n")) as $barcharts
  | [ $barcharts ]
  | flatten| join("\n")
;
jq -r 'include "recipes"; run_barchart' data/titanic.json
Parents_Children_Aboard
-----------------------
0  75.9% ██████████████████████████████████████████████████████████████████████████████████ 674
1  13.3% ██████████████████ 118
2  9.01% █████████████ 80
5  0.56% ██ 5
3  0.56% ██ 5
4  0.45% ██ 4
6  0.11% ▊ 1

Pclass
------
3  54.9% █████████████████████████████████████████████████████████████ 487
1  24.3% ██████████████████████████████ 216
2  20.7% ██████████████████████████ 184

Sex
---
male    64.5% ███████████████████████████████████████████████████████████████████████ 573
female  35.4% █████████████████████████████████████████ 314

Siblings_Spouses_Aboard
-----------------------
0  68.0% ██████████████████████████████████████████████████████████████████████████ 604
1  23.5% █████████████████████████████ 209
2  3.15% ██████ 28
4  2.02% █████ 18
3  1.80% █████ 16
8  0.78% ███ 7
5  0.56% ██ 5

Survived
--------
0  61.4% ████████████████████████████████████████████████████████████████████ 545
1  38.5% ████████████████████████████████████████████ 342

data clean

Delete keys

jq 'delpaths([paths | select(.[-1] | strings | test("<REGEX>"; "i"))])'

Combine json files

jq -n 'reduce inputs as $s (.; .[input_filename|gsub(".json";"")|split("/")|last] += $s)' ./*.json

Simple groupby

jq --arg key value 'group_by(.[$key]) | map({"\(.[0][$key])": length}) | add'

Example:

$ jq --arg key Status 'group_by(.[$key]) | map({"\(.[0][$key])": length}) | add'
{
  "Disabled": 35,
  "Ready": 191,
  "Running": 2,
  "Status": 99
}

Flatten json

jq -r 'paths(scalars) as $p | "\($p|join("."))=\(getpath($p))"' <FILE>

Example:

$ jq -r 'paths(scalars) as $p | "\($p|join("."))=\(getpath($p))"' data/titanic.json
0.Survived=0
0.Pclass=3
0.Name=Mr. Owen Harris Braund
0.Sex=male
0.Age=22
0.Siblings_Spouses_Aboard=1
0.Parents_Children_Aboard=0
1.Survived=1
1.Pclass=1
1.Name=Mrs. John Bradley (Florence Briggs Thayer) Cumings
1.Sex=female
1.Age=38
1.Siblings_Spouses_Aboard=1
1.Parents_Children_Aboard=0

[Source]

transform

json2ini

Adapted from https://stackoverflow.com/a/76665197

Data used: nested.json

{
  "server": {
    "atx": {
      "user": "annie",
      "port": 22
    }
  },
  "storage": {
    "nyc": {
      "user": "nntrn",
      "port": 22
    }
  }
}
jq --stream -nr ' reduce (inputs | select(has(1))) as [$path, $val]
  ( {}; .[$path[:-1] | join(".")][$path[-1]] = $val )
| to_entries[]
| "[\(.key)]", (.value | to_entries[] | "\(.key) = \(.value)" )
, ""'

Output

[server.atx]
user = annie
port = 22

[storage.nyc]
user = nntrn
port = 22

tsv2json

Adapted from https://stackoverflow.com/a/55996042

Data used: tmp.tsv

foo bar baz
1   2   3
4   5   6
jq -R 'split("[\\s\\t]+";"x") as $head
| [inputs | split("[\\s\\t]+";"x")]
| map( . as $row | reduce (keys|.[]) as $x
    ( {}; . + {"\($head[$x])":$row[$x]} ) )' data/tmp.tsv

Output

[
  {
    "foo": "1",
    "bar": "2",
    "baz": "3"
  },
  {
    "foo": "4",
    "bar": "5",
    "baz": "6"
  }
]

json2csv

# ~/.jq/convert.jq

def json2csv:
  (map(keys) | add | unique) as $cols
  | map(. as $row | $cols | map($row[.])) as $rows
  | $cols, $rows[]
  | @csv;
$ curl -s -o todos.json https://jsonplaceholder.typicode.com/todos
$ jq 'include "convert"; json2csv' todos.json

"completed","id","title","userId"
false,1,"delectus aut autem",1
false,2,"quis ut nam facilis et officia qui",1
false,3,"fugiat veniam minus",1
true,4,"et porro tempora",1
false,5,"laboriosam mollitia et enim quasi adipisci quia provident illum",1
false,6,"qui ullam ratione quibusdam voluptatem quia omnis",1
false,7,"illo expedita consequatur quia in",1
true,8,"quo adipisci enim quam ut ab",1

Summary

def grouped_summary($item):
  {"\($item? // "blank")":group_by(.[$item])|map({"\(.[0][$item]? // "blank")":length})|add};

def summary:
  [ (.[0]|keys)[] as $keys | grouped_summary($keys)]
  | add
  | to_entries
  | map(
      del(select(((.value//"")|keys[0]|length) > 100)) |
      del(select(((.value//"")|values|length) > 400))
    )
  | map(select(.))
  | from_entries;

def summary_wip:
  [ (.[0]|keys)[] as $keys | grouped_summary($keys)]
  | add
  | to_entries
  #| map(del(select(((.value//"")|keys|length) > 400)))
  | map(select(.)|{key,count:(.value|length)})
  | map(.value |= to_entries);

# possibly a better solution
def summary2:
  . as $data
  | (.[0]|keys)
  | map(. as $item | {
      key: $item,
      value: ($data|map(.[$item])|group_by(.)|map({"\(.[0])": length}))|add
    })
  | map(select((.value|to_entries|length)< (.90 * ($data|length))))
  | from_entries;
$ jq 'include "recipes"; summary2' data/titanic2.json
{
  "Parents_Children_Aboard": {
    "0": 674,
    "1": 118,
    "2": 80,
    "3": 5,
    "4": 4,
    "5": 5,
    "6": 1
  },
  "Pclass": {
    "1": 216,
    "2": 184,
    "3": 487
  },
  "Sex": {
    "female": 314,
    "male": 573
  },
  "Siblings_Spouses_Aboard": {
    "0": 604,
    "1": 209,
    "2": 28,
    "3": 16,
    "4": 18,
    "5": 5,
    "8": 7
  },
  "Survived": {
    "0": 545,
    "1": 342
  }
}

tocsv

def tocsv:
  .[0] as $cols | .[1:]
  | map(. as $row
  | $cols
  | with_entries({ "key": .value,"value": $row[.key]})
  );
jq 'reduce inputs as $s (.;
  .[input_filename] += ($s|gsub("\r";"")|gsub("\"";"")|split("\n")|map(split(",")))
) | with_entries(.value|= tocsv)' PROFILES/*.csv

Wrangle

Zip Column Headers

.[0] as $cols
| .[1:]
| map(
  . as $row
  | $cols
  | with_entries({ "key": .value,"value": $row[.key]})
)

Input

[
  ["USER", "TTY", "FROM", "LOGIN@", "IDLE", "JCPU", "PCPU", "WHAT"],
  ["lava1", "pts/0", "157.48.149.102", "05:03", "31.00s", "0.31s", "0.31s", "-bash"],
  ["azureuse", "pts/1", "157.48.149.102", "07:26", "0.00s", "0.07s", "0.05s", "w"]
]

Result

[
  {
    "USER": "lava1",
    "TTY": "pts/0",
    "FROM": "157.48.149.102",
    "LOGIN@": "05:03",
    "IDLE": "31.00s",
    "JCPU": "0.31s",
    "PCPU": "0.31s",
    "WHAT": "-bash"
  },
  {
    "USER": "azureuse",
    "TTY": "pts/1",
    "FROM": "157.48.149.102",
    "LOGIN@": "07:26",
    "IDLE": "0.00s",
    "JCPU": "0.07s",
    "PCPU": "0.05s",
    "WHAT": "w"
  }
]

Using Transpose

def s: [splits(" +")];

[s as $k | inputs | [$k, s] | transpose | map({(.[0]): .[1]}) | add]

Input

USER     TTY      FROM             LOGIN@   IDLE   JCPU   PCPU WHAT
lava1    pts/0    157.48.149.102   05:03   31.00s  0.31s  0.31s -bash
azureuse pts/1    157.48.149.102   07:26    0.00s  0.07s  0.05s w

Result

[
  {
    "USER": "lava1",
    "TTY": "pts/0",
    "FROM": "157.48.149.102",
    "LOGIN@": "05:03",
    "IDLE": "31.00s",
    "JCPU": "0.31s",
    "PCPU": "0.31s",
    "WHAT": "-bash"
  },
  {
    "USER": "azureuse",
    "TTY": "pts/1",
    "FROM": "157.48.149.102",
    "LOGIN@": "07:26",
    "IDLE": "0.00s",
    "JCPU": "0.07s",
    "PCPU": "0.05s",
    "WHAT": "w"
  }
]

Source

Flat string

energy.json

paths(scalars) as $p | "\($p|join("."))=\(getpath($p))"
0.source=Agricultural 'waste'
0.target=Bio-conversion
0.value=124.729
1.source=Bio-conversion
1.target=Liquid
1.value=0.597
2.source=Bio-conversion
2.target=Losses
2.value=26.862
3.source=Bio-conversion
3.target=Solid
3.value=280.322

References