I had assistance in creating a Postgres Pedigree process for plant breeding in 2012 via PostgreSQL Recursive via 2 parent/child tables. The pedigree parent/child hierarchy is defined by the family/plant ids. Each plant "links" to the family table via the "id_family" foreign key. Plants with Root level parents have an ID of 1 which maps to 'NA' and have the Family is_root value set to 'Y'. The problem is that you can't determine the actual child decedents in the path output.
The Parent/Child Mapping Use Case: Determine the F2+ parent (non-root level) to child relationships by associating the ptst_plant id_family value that maps back to the previous family, then prefix the child plant with the @ character to show that it is the descendant plant.
Parent/Child Mapping Workflow via ptst-pedigree.sql process:
Examples below with the desired @ character plant prefix:
F1 family1AA=(f1A x m2A) >F2 family3AE=(@f7A x m1E) >F3 family5AEAG=(@f1AE x m1AG)
F1 family2AA=(f3A x m4A) >F2 family4AG=(@f8A x m1G) >F3 family5AEAG=(f1AE x @m1AG)
Below are the test tables/data using Postgresql 16.6:
DROP TABLE if exists ptst_family CASCADE;
DROP TABLE if exists ptst_plant CASCADE;
DROP TABLE if exists ptst_pedigree CASCADE;
CREATE TABLE ptst_family (
id serial,
family_key VARCHAR(20) UNIQUE,
female_plant_id INTEGER NOT NULL DEFAULT 1,
male_plant_id INTEGER NOT NULL DEFAULT 1,
is_root VARCHAR NOT NULL DEFAULT '0', -- Root level familes are always the first level pedigree (F1)
CONSTRAINT ptst_family_pk PRIMARY KEY (id)
);
CREATE TABLE ptst_plant (
id serial,
plant_key VARCHAR(20) UNIQUE,
id_family INTEGER NOT NULL,
CONSTRAINT ptst_plant_pk PRIMARY KEY (id),
CONSTRAINT ptst_plant_id_family_fk FOREIGN KEY(id_family) REFERENCES ptst_family(id)
);
CREATE TABLE ptst_pedigree (
id serial,
pedigree_key VARCHAR NOT NULL,
path VARCHAR NOT NULL UNIQUE
);
-- FAMILY Table DATA:
insert into ptst_family (id, family_key, female_plant_id, male_plant_id, is_root) VALUES (1,'NA',1,1,'Y'); -- Default place holder record
-- F1 Root level Alba families
insert into ptst_family (id, family_key, female_plant_id, male_plant_id, is_root) VALUES (2,'family1AA',2,3,'Y');
insert into ptst_family (id, family_key, female_plant_id, male_plant_id, is_root) VALUES (3,'family2AA',4,5,'Y');
-- F2 Hybrid Families
insert into ptst_family (id, family_key, female_plant_id, male_plant_id, is_root) VALUES (5,'family3AE',6,8,'N');
insert into ptst_family (id, family_key, female_plant_id, male_plant_id, is_root) VALUES (6,'family4AG',7,9,'N');
-- F3 Double Hybrid family:
insert into ptst_family (id, family_key, female_plant_id, male_plant_id, is_root) VALUES (9,'family5AEAG',10,11,'N');
-- PLANT Table DATA:
insert into ptst_plant (id, plant_key, id_family) VALUES (1,'NA',1); -- Default place holder record
insert into ptst_plant (id, plant_key, id_family) VALUES (2,'f1A',1);
insert into ptst_plant (id, plant_key, id_family) VALUES (3,'m2A',1);
insert into ptst_plant (id, plant_key, id_family) VALUES (4,'f3A',1);
insert into ptst_plant (id, plant_key, id_family) VALUES (5,'m4A',1);
-- Female Alba progeny:
insert into ptst_plant (id, plant_key, id_family) VALUES (6,'f7A',2);
insert into ptst_plant (id, plant_key, id_family) VALUES (7,'f8A',3);
-- Male/female Aspen Root level parents:
insert into ptst_plant (id, plant_key, id_family) VALUES (8,'m1E',1);
insert into ptst_plant (id, plant_key, id_family) VALUES (9,'m1G',1);
-- F1 Hybrid progeny:
insert into ptst_plant (id, plant_key, id_family) VALUES (10,'f1AE',5);
insert into ptst_plant (id, plant_key, id_family) VALUES (11,'m1AG',6);
Below is the pedigree.sql script developed in 2012:
WITH RECURSIVE expanded_family AS (
SELECT
f.id,
f.family_key,
pf.id_family pf_family,
pm.id_family pm_family,
f.is_root,
f.family_key || '=(' || pf.plant_key || ' x ' || pm.plant_key || ')' pretty_print
FROM ptst_family f
JOIN ptst_plant pf ON f.female_plant_id = pf.id
JOIN ptst_plant pm ON f.male_plant_id = pm.id
),
search_tree AS
(
SELECT
f.id,
f.family_key,
f.id family_root,
1 depth,
'>F1 ' || f.pretty_print path
FROM expanded_family f
WHERE
f.id != 1
AND f.is_root = 'Y'
UNION ALL
SELECT
f.id,
f.family_key,
st.family_root,
st.depth + 1,
st.path || ' >F' || st.depth+1 || ' ' || f.pretty_print
FROM search_tree st
JOIN expanded_family f
ON f.pf_family = st.id
OR f.pm_family = st.id
WHERE
f.id <> 1
)
SELECT
family_key,
path
FROM
(
SELECT
family_key,
rank() over (partition by family_root order by depth desc),
path
FROM search_tree
) AS ranked
-- WHERE rank = 1
WHERE path NOT LIKE '%(N/A x N/A)%' -- Remove rows with no filial output
ORDER BY family_key, path
Below is my Pedigree table desired output with the "@" prefixed child decedents:
ID | Pedigree_key | Path
1 family2AA >F1 family2AA=(f3A x m4A)
2 family3AE >F1 family1AA=(f1A x m2A) >F2 family3AE=(@f7A x m1E)
3 family4AG >F1 family2AA=(f3A x m4A) >F2 family4AG=(@f8A x m1G)
4 family5AEAG >F1 family1AA=(f1A x m2A) >F2 family3AE=(@f7A x m1E) >F3 family5AEAG=(@f1AE x m1AG)
5 family5AEAG >F1 family2AA=(f3A x m4A) >F2 family4AG=(@f8A x m1G) >F3 family5AEAG=(f1AE x @m1AG)
I had assistance in creating a Postgres Pedigree process for plant breeding in 2012 via PostgreSQL Recursive via 2 parent/child tables. The pedigree parent/child hierarchy is defined by the family/plant ids. Each plant "links" to the family table via the "id_family" foreign key. Plants with Root level parents have an ID of 1 which maps to 'NA' and have the Family is_root value set to 'Y'. The problem is that you can't determine the actual child decedents in the path output.
The Parent/Child Mapping Use Case: Determine the F2+ parent (non-root level) to child relationships by associating the ptst_plant id_family value that maps back to the previous family, then prefix the child plant with the @ character to show that it is the descendant plant.
Parent/Child Mapping Workflow via ptst-pedigree.sql process:
Examples below with the desired @ character plant prefix:
F1 family1AA=(f1A x m2A) >F2 family3AE=(@f7A x m1E) >F3 family5AEAG=(@f1AE x m1AG)
F1 family2AA=(f3A x m4A) >F2 family4AG=(@f8A x m1G) >F3 family5AEAG=(f1AE x @m1AG)
Below are the test tables/data using Postgresql 16.6:
DROP TABLE if exists ptst_family CASCADE;
DROP TABLE if exists ptst_plant CASCADE;
DROP TABLE if exists ptst_pedigree CASCADE;
CREATE TABLE ptst_family (
id serial,
family_key VARCHAR(20) UNIQUE,
female_plant_id INTEGER NOT NULL DEFAULT 1,
male_plant_id INTEGER NOT NULL DEFAULT 1,
is_root VARCHAR NOT NULL DEFAULT '0', -- Root level familes are always the first level pedigree (F1)
CONSTRAINT ptst_family_pk PRIMARY KEY (id)
);
CREATE TABLE ptst_plant (
id serial,
plant_key VARCHAR(20) UNIQUE,
id_family INTEGER NOT NULL,
CONSTRAINT ptst_plant_pk PRIMARY KEY (id),
CONSTRAINT ptst_plant_id_family_fk FOREIGN KEY(id_family) REFERENCES ptst_family(id)
);
CREATE TABLE ptst_pedigree (
id serial,
pedigree_key VARCHAR NOT NULL,
path VARCHAR NOT NULL UNIQUE
);
-- FAMILY Table DATA:
insert into ptst_family (id, family_key, female_plant_id, male_plant_id, is_root) VALUES (1,'NA',1,1,'Y'); -- Default place holder record
-- F1 Root level Alba families
insert into ptst_family (id, family_key, female_plant_id, male_plant_id, is_root) VALUES (2,'family1AA',2,3,'Y');
insert into ptst_family (id, family_key, female_plant_id, male_plant_id, is_root) VALUES (3,'family2AA',4,5,'Y');
-- F2 Hybrid Families
insert into ptst_family (id, family_key, female_plant_id, male_plant_id, is_root) VALUES (5,'family3AE',6,8,'N');
insert into ptst_family (id, family_key, female_plant_id, male_plant_id, is_root) VALUES (6,'family4AG',7,9,'N');
-- F3 Double Hybrid family:
insert into ptst_family (id, family_key, female_plant_id, male_plant_id, is_root) VALUES (9,'family5AEAG',10,11,'N');
-- PLANT Table DATA:
insert into ptst_plant (id, plant_key, id_family) VALUES (1,'NA',1); -- Default place holder record
insert into ptst_plant (id, plant_key, id_family) VALUES (2,'f1A',1);
insert into ptst_plant (id, plant_key, id_family) VALUES (3,'m2A',1);
insert into ptst_plant (id, plant_key, id_family) VALUES (4,'f3A',1);
insert into ptst_plant (id, plant_key, id_family) VALUES (5,'m4A',1);
-- Female Alba progeny:
insert into ptst_plant (id, plant_key, id_family) VALUES (6,'f7A',2);
insert into ptst_plant (id, plant_key, id_family) VALUES (7,'f8A',3);
-- Male/female Aspen Root level parents:
insert into ptst_plant (id, plant_key, id_family) VALUES (8,'m1E',1);
insert into ptst_plant (id, plant_key, id_family) VALUES (9,'m1G',1);
-- F1 Hybrid progeny:
insert into ptst_plant (id, plant_key, id_family) VALUES (10,'f1AE',5);
insert into ptst_plant (id, plant_key, id_family) VALUES (11,'m1AG',6);
Below is the pedigree.sql script developed in 2012:
WITH RECURSIVE expanded_family AS (
SELECT
f.id,
f.family_key,
pf.id_family pf_family,
pm.id_family pm_family,
f.is_root,
f.family_key || '=(' || pf.plant_key || ' x ' || pm.plant_key || ')' pretty_print
FROM ptst_family f
JOIN ptst_plant pf ON f.female_plant_id = pf.id
JOIN ptst_plant pm ON f.male_plant_id = pm.id
),
search_tree AS
(
SELECT
f.id,
f.family_key,
f.id family_root,
1 depth,
'>F1 ' || f.pretty_print path
FROM expanded_family f
WHERE
f.id != 1
AND f.is_root = 'Y'
UNION ALL
SELECT
f.id,
f.family_key,
st.family_root,
st.depth + 1,
st.path || ' >F' || st.depth+1 || ' ' || f.pretty_print
FROM search_tree st
JOIN expanded_family f
ON f.pf_family = st.id
OR f.pm_family = st.id
WHERE
f.id <> 1
)
SELECT
family_key,
path
FROM
(
SELECT
family_key,
rank() over (partition by family_root order by depth desc),
path
FROM search_tree
) AS ranked
-- WHERE rank = 1
WHERE path NOT LIKE '%(N/A x N/A)%' -- Remove rows with no filial output
ORDER BY family_key, path
Below is my Pedigree table desired output with the "@" prefixed child decedents:
ID | Pedigree_key | Path
1 family2AA >F1 family2AA=(f3A x m4A)
2 family3AE >F1 family1AA=(f1A x m2A) >F2 family3AE=(@f7A x m1E)
3 family4AG >F1 family2AA=(f3A x m4A) >F2 family4AG=(@f8A x m1G)
4 family5AEAG >F1 family1AA=(f1A x m2A) >F2 family3AE=(@f7A x m1E) >F3 family5AEAG=(@f1AE x m1AG)
5 family5AEAG >F1 family2AA=(f3A x m4A) >F2 family4AG=(@f8A x m1G) >F3 family5AEAG=(f1AE x @m1AG)
I discovered that I can resolve this via a separate update script to change the Pedigree table Path string by searching for the F2+ families against my existing avw_plant VIEW to verify the parent plant then prefix the descendant child with the "@" character. This was a valuable exercise since it forced me to dig deep to understand the problem from different perspectives then prove it with a simple POC like the example below, thanks for listening!
Pedigree Path: >F1 41XAA91=(A10 x A73) >F2 99XAA10=(30AA5MF x AA4102)
psql r4p -c "select plant_key, family_key from avw_plant where family_key = '41XAA91' and plant_key = 'AA4102';"
Updated Path: >F1 41XAA91=(A10 x A73) >F2 99XAA10=(30AA5MF x @AA4102)