Collecting recursive JSON keys in Postgres
I have JSON documents stored in Postgres under the JSON datatype (Postgres 9.3) and I need to recursively collect key names from a tree.
For example, given this JSON tree
{
"files": {
"folder": {
"file1": {
"property": "blah"
},
"file2": {
"property": "blah"
},
"file3": {
"property": "blah"
},
"file4": {
"property": "blah"
}
}
},
"software": {
"apt": {
"package1": {
"version": 1.2
},
"package2": {
"version": 1.2
},
"package3": {
"version": 1.2
},
"package4": {
"version": 1.2
}
}
}
}
I would like to extract something like [file1, file2, file3, file3, package1, package2, package3, package4]
Basically just a list of keys that I can use for the text search index.
I know I can get a list of keys on the outer most objects using something like
SELECT DISTINCT(json_object_keys(data))
And I know it is possible to recursively climb the tree using something like
WITH RECURSIVE data()
but I'm having trouble with that.
Can anyone please help?
source to share
The trick is to add some final testing with help json_typeof
in the right place.
You should also use jsonb
if you don't need the order of the keys of the objects.
Here is my working environment:
CREATE TABLE test (
id SERIAL PRIMARY KEY,
doc JSON
);
INSERT INTO test (doc) VALUES ('{
"files": {
"folder": {
"file1": {
"property": "blah"
},
"file2": {
"property": "blah"
},
"file3": {
"property": "blah"
},
"file4": {
"property": "blah",
"prop" : {
"clap": "clap"
}
}
}
},
"software": {
"apt": {
"package1": {
"version": 1.2
},
"package2": {
"version": 1.2
},
"package3": {
"version": 1.2
},
"package4": {
"version": 1.2
}
}
}
}');
The recursion stops when the second query returns no rows. This is done by passing an empty object to json_each
.
WITH RECURSIVE doc_key_and_value_recursive(key, value) AS (
SELECT
t.key,
t.value
FROM test, json_each(test.doc) AS t
UNION ALL
SELECT
t.key,
t.value
FROM doc_key_and_value_recursive,
json_each(CASE
WHEN json_typeof(doc_key_and_value_recursive.value) <> 'object' THEN '{}' :: JSON
ELSE doc_key_and_value_recursive.value
END) AS t
)
SELECT *
FROM doc_key_and_value_recursive
WHERE json_typeof(doc_key_and_value_recursive.value) <> 'object';
source to share
I wrote a function for this:
CREATE OR REPLACE FUNCTION jsonb_keys_recursive(_value JSONB)
RETURNS TABLE(key TEXT) LANGUAGE SQL AS $$
WITH RECURSIVE _tree (key, value) AS (
SELECT
NULL AS key,
_value AS value
UNION ALL
SELECT
(CASE WHEN jsonb_typeof(value) = 'object'
THEN jsonb_object_keys(value)
ELSE NULL END) AS key,
(CASE WHEN jsonb_typeof(value) = 'object'
THEN value -> jsonb_object_keys(value)
WHEN jsonb_typeof(value) = 'array'
THEN jsonb_array_elements(value)
ELSE NULL END) AS value
FROM
_tree
WHERE
jsonb_typeof(value) IN ('object', 'array')
)
SELECT DISTINCT key
FROM
_tree
WHERE key IS NOT NULL
$$;
As an example, try:
SELECT jsonb_keys_recursive('{"A":[[[{"C":"B"}]]],"X":"Y"}');
Note that the other two answers don't find keys inside objects inside arrays, my solution does. (The question did not provide any examples of arrays at all, so searching for keys within arrays might not have been what the original searcher needed, but it was what I needed.)
source to share
A slightly more concise version that you can just check out:
WITH RECURSIVE reports (key, value) AS (
SELECT
NULL as key,
'{"k1": {"k2": "v1"}, "k3": {"k4": "v2"}, "k5": "v3"}'::JSONB as value
UNION ALL
SELECT
jsonb_object_keys(value)as key,
value->jsonb_object_keys(value) as value
FROM
reports
WHERE
jsonb_typeof(value) = 'object'
)
SELECT
*
FROM
reports;
This will return a list that you will need to group using a separate one.
source to share