The story about the migration: Oracle to Postgres | Postges Build 2020 | Alicja Kucharczyk & Sushant Pandey
The story about a challenging PoC that proved that Postgres can achieve the same performance as Oracle Exadata. The schema that was migrated wasn’t the simplest one you might see.
of a column value from root to node, with column values separated by char for each row returned by CONNECT BY condition. SELECT LPAD(' ', 2*level-1)||SYS_CONNECT_BY_PATH(last_name, '/') "Path" FROM employees START WITH last_name = 'Kochhar' CONNECT BY PRIOR employee_id = manager_id; Path --------------------------------------------------------------- /Kochhar /Kochhar/Greenberg /Kochhar/Greenberg/Faviet /Kochhar/Greenberg/Chen /Kochhar/Greenberg/Sciarra /Kochhar/Greenberg/Urman /Kochhar/Greenberg/Popp /Kochhar/Whalen /Kochhar/Mavris /Kochhar/Baer /Kochhar/Higgins /Kochhar/Higgins/Gietz https://docs.oracle.com/cd/B19306_01/server.102/b14200/functions164.htm
OR REPLACE PROCEDURE SysConnectByPath IS bar_ids_tab FOO_TYPE := foo_type(111, 20, 3, 4, 5); bar_ids_string VARCHAR(1000); BEGIN SELECT SUBSTR(SYS_CONNECT_BY_PATH(column_value, ','), 2) csv INTO bar_ids_string FROM (SELECT column_value, ROW_NUMBER() OVER (ORDER BY column_value ) rn, COUNT(*) OVER () cnt FROM TABLE (bar_ids_tab)) WHERE rn = cnt START WITH rn = 1 CONNECT BY rn = PRIOR rn + 1; DBMS_OUTPUT.PUT_LINE('out ' || bar_ids_string); END; completed in 45 ms out 3,4,5,20,111
FUNCTION sysconnectbypath() RETURNS VOID AS $body$ DECLARE bar_ids_tab FOO_TYPE := foo_type(111, 20, 3, 4, 5); bar_ids_string VARCHAR(1000);WITH RECURSIVE cte AS ( BEGIN SELECT SUBSTR(column_value, 2) CSV INTO STRICT bar_ids_string FROM (SELECT column_value, ROW_NUMBER() OVER (ORDER BY column_value ) rn, COUNT(*) OVER () cnt FROM TABLE(bar_ids_tab) alias6) alias7 WHERE rn = 1 UNION ALL BEGIN SELECT C.bar_ids_string || ',' || SUBSTR(column_value, 2) CSV INTO STRICT bar_ids_string FROM (SELECT column_value, ROW_NUMBER() OVER (ORDER BY column_value ) rn, COUNT(*) OVER () cnt FROM TABLE(bar_ids_tab) alias6) JOIN cte C ON (C.rn + 1 = alias7.rn) ) SELECT * FROM cte WHERE rn = cnt; ; RAISE NOTICE 'out %', bar_ids_string; END; $body$ LANGUAGE PLPGSQL;
NUMERIC[]; bar_ids_string TEXT; BEGIN bar_ids_tab := '{111, 20, 3, 4, 5}'; SELECT string_agg(x::TEXT, ',') INTO bar_ids_string FROM ( SELECT unnest(bar_ids_tab) AS x ORDER BY x) a; RAISE NOTICE '%', bar_ids_string; END; $$ [00000] 3,4,5,20,111 completed in 3 ms
bar_ids_tab := '{111, 20, 3, 4, 5}'; SELECT string_agg(x::TEXT, ',') INTO bar_ids_string FROM ( SELECT unnest(bar_ids_tab) AS x ORDER BY x) a; RAISE NOTICE '%', bar_ids_string; END; $$ CREATE OR REPLACE TYPE FOO_TYPE IS TABLE OF INTEGER; CREATE OR REPLACE PROCEDURE SysConnectByPath IS bar_ids_tab FOO_TYPE := foo_type(111, 20, 3, 4, 5); bar_ids_string VARCHAR(1000); BEGIN SELECT SUBSTR(SYS_CONNECT_BY_PATH(column_value, ','), 2) csv INTO bar_ids_string FROM (SELECT column_value, ROW_NUMBER() OVER (ORDER BY column_value ) rn, COUNT(*) OVER () cnt FROM TABLE (bar_ids_tab)) WHERE rn = cnt START WITH rn = 1 CONNECT BY rn = PRIOR rn + 1; DBMS_OUTPUT.PUT_LINE('out ' || bar_ids_string); END; [00000] 3,4,5,20,111 completed in 3 ms out 3,4,5,20,111 completed in 45 ms
preceding statements retrieves an entire result set and stores it in one or more collection variables in a single operation (which is more efficient than using a loop statement to retrieve one result row at a time). https://docs.oracle.com/database/121/LNPLS/tuning.htm#LNPLS891
CURSOR get_foo (p_version number) IS SELECT * FROM secret_data WHERE foo_id = p_version ORDER BY bar_id; TYPE my_type IS TABLE of get_foo%ROWTYPE INDEX BY pls_integer; my_tab my_type; some_string VARCHAR2(5000); BEGIN OPEN get_foo(1); -- Process one level at a time. LOOP FETCH get_foo BULK COLLECT INTO my_tab LIMIT 1000; EXIT WHEN my_tab.COUNT = 0; FOR indx IN 1 .. my_tab.COUNT LOOP some_string := ''; FOR prnt_indx IN REVERSE 2..indx - 1 LOOP some_string := some_string || ', tab.' || TO_CHAR(my_tab(prnt_indx).my_flag); END LOOP; END LOOP; END LOOP; CLOSE get_foo; END; CALL BulkCollect(1) completed in 41 ms
FUNCTION bulkcollect(p_version BIGINT) RETURNS VOID AS $body$ DECLARE get_foo CURSOR (p_version BIGINT) FOR SELECT * FROM secret_data WHERE foo_id = p_version ORDER BY bar_id; TYPE MY_TYPE IS TABLE OF RECORD INDEX BY INTEGER; my_tab MY_TYPE; some_string VARCHAR(5000); BEGIN OPEN get_foo(1); -- Process one level at a time. LOOP FETCH get_foo BULK COLLECT INTO my_tab LIMIT 1000; EXIT WHEN my_tab.COUNT = 0; FOR indx IN 1 .. my_tab.COUNT LOOP some_string := ''; FOR prnt_indx IN REVERSE indx..2 - 1 LOOP some_string := some_string || ', tab.' || my_tab[prnt_indx].my_flag::VARCHAR; END LOOP; END LOOP; END LOOP; CLOSE get_foo; END; $body$ LANGUAGE PLPGSQL ;
INTEGER) RETURNS SETOF SECRET_DATA AS $body$ SELECT * FROM secret_data WHERE foo_id = p_version ORDER BY bar_id; $body$ LANGUAGE SQL STABLE; CURSOR get_foo (p_version number) IS SELECT * FROM secret_data WHERE foo_id = p_version ORDER BY bar_id;
DECLARE some_string TEXT; get_foo_row RECORD; i INT; BEGIN DROP TABLE IF EXISTS temp_get_foo; CREATE TEMPORARY TABLE temp_get_foo ON COMMIT DROP AS SELECT row_number() OVER () as rnum, * FROM get_foo(p_version); DELETE FROM temp_get_foo WHERE rnum = 1; ANALYZE temp_get_foo; i := 1; FOR get_foo_row IN SELECT * FROM get_foo(p_version) -- Process one level at a time. LOOP SELECT 'tab.' || string_agg(my_flag, ', tab.' ORDER BY rnum DESC) INTO some_string FROM temp_get_foo WHERE rnum < ( SELECT rnum FROM temp_get_foo WHERE bar_id = get_foo_row.bar_id) AND rnum <> (SELECT max(rnum) FROM temp_get_foo); i := i + 1; END LOOP; END; $body$; CALL BulkCollect(1) completed in 14 ms
CREATE PROCEDURE BulkCollect(p_version INT) LANGUAGE plpgsql AS $body$ DECLARE some_string TEXT; get_foo_row RECORD; i INT; BEGIN DROP TABLE IF EXISTS temp_get_foo; CREATE TEMPORARY TABLE temp_get_foo ON COMMIT DROP AS SELECT row_number() OVER () as rnum, * FROM get_foo(p_version); DELETE FROM temp_get_foo WHERE rnum = 1; ANALYZE temp_get_foo; i := 1; FOR get_foo_row IN SELECT * FROM get_foo(p_version) -- Process one level at a time. LOOP SELECT 'tab.' || string_agg(my_flag, ', tab.' ORDER BY rnum DESC) INTO some_string FROM temp_get_foo WHERE rnum < ( SELECT rnum FROM temp_get_foo WHERE bar_id = get_foo_row.bar_id) AND rnum <> (SELECT max(rnum) FROM temp_get_foo); i := i + 1; END LOOP; END; $body$; CREATE OR REPLACE PROCEDURE BulkCollect(p_version IN NUMBER) IS CURSOR get_foo (p_version number) IS SELECT * FROM secret_data WHERE foo_id = p_version ORDER BY bar_id; TYPE my_type IS TABLE of get_foo%ROWTYPE INDEX BY pls_integer; my_tab my_type; some_string VARCHAR2(5000); BEGIN OPEN get_foo(1); -- Process one level at a time. LOOP FETCH get_foo BULK COLLECT INTO my_tab LIMIT 1000; EXIT WHEN my_tab.COUNT = 0; FOR indx IN 1 .. my_tab.COUNT LOOP some_string := ''; FOR prnt_indx IN REVERSE 2..indx - 1 LOOP some_string := some_string || ', tab.' || TO_CHAR(my_tab(prnt_indx).my_flag); END LOOP; END LOOP; END LOOP; CLOSE get_foo; END;
output along with other output params Replacements Reduced loop nesting with improvements SETOF Record as output Improved logic for dynamic queries to make them performant