...

View Full Version : Creating Adjacency Model from a delimited dataset



StupidRalph
03-09-2008, 03:50 AM
I"m trying to normalize a dataset that uses a colon as a delimiter (:) to signify a child element. I added a third column (`parent_id`) to this table so I can hopefully get rid of the delimiters and simply use the `id` of the immediate parent in the `parent_id` column (The Adjacency Model). I'm just getting tripped up on writing the PHP.



/*This is the current dataset set up*/
+----+-----------------------------------------+-----------+
| id | description | parent_id |
+----+-----------------------------------------+-----------+
| 15 | Animals | NULL |
| 16 | Animals:Amphibians | NULL |
| 17 | Animals:Antique | NULL |
| 18 | Animals:Birds:Birds Of Prey | NULL |
| 19 | Animals:Birds:Game Birds | NULL |
| 20 | Animals:Birds:North American | NULL |
| 21 | Animals:Birds:Tropical | NULL |
| 22 | Animals:Birds:Waterfowl | NULL |
| 23 | Animals:Birds | NULL |
| 24 | Animals:Cats | NULL |
| 25 | Animals:Dogs | NULL |
| 26 | Animals:Farm | NULL |
| 27 | Animals:Fish/Marine Life | NULL |
| 28 | Animals:Fish/Marine Life:Shells | NULL |
| 29 | Animals:Horses | NULL |
| 30 | Animals:Insects | NULL |
| 31 | Animals:Insects:Butterflies | NULL |
| 32 | Animals:Other Pets | NULL |
| 33 | Animals:Reptiles | NULL |
| 34 | Animals:Wildlife | NULL |
| 35 | Animals:Wildlife:Forest | NULL |
| 36 | Animals:Wildlife:Jungle | NULL |
| 37 | Animation | NULL |
| 38 | Architecture | NULL |
| 39 | Architecture:Architectural Detail | NULL |
| 40 | Architecture:Buildings | NULL |
| 41 | Architecture:Buildings:English Cottages | NULL |
+----+-----------------------------------------+-----------+




/*This is my desired outcome*/
+----+-------------------------+-----------+
| id | description | parent_id |
+----+-------------------------+-----------+
| 15 | Animals | NULL |
| 16 | Amphibians | 15 |
| 17 | Antique | 15 |
| 18 | Birds Of Prey | 23 |
| 19 | Game Birds | 23 |
| 20 | North American | 23 |
| 21 | Tropical | 23 |
| 22 | Waterfowl | 23 |
| 23 | Birds | 15 |
| 24 | Cats | 15 |
| 25 | Dogs | 15 |
| 26 | Farm | 15 |
| 27 | Fish/Marine Life | 15 |
| 28 | Fish/Marine Life:Shells | 15 |
| 29 | Horses | 15 |
| 30 | Insects | 15 |
| 31 | Butterflies | 30 |
| 32 | Other Pets | 15 |
| 33 | Reptiles | 15 |
| 34 | Wildlife | 15 |
| 35 | Forest | 34 |
| 36 | Jungle | 34 |
| 37 | Animation | NULL |
| 38 | Architecture | NULL |
| 39 | Architectural Detail | 38 |
| 40 | Buildings | 38 |
| 41 | English Cottages | 40 |
+----+-------------------------+-----------+


This is what I've got so far



$sql = "SELECT * FROM catalog_subject_copy LIMIT 0,50";
$q = mysql_query($sql) or die(mysql_error() . "<br />SQL: " . $sql);

$subjectListArr= array();

while ($r = mysql_fetch_assoc($q)) {
$subjectListArr[$r['id']] = $r['description']; //store id as description index
}

foreach ($subjectListArr as $id => $description) {

$depth = substr_count($description, ":"); //how many parents does the subject have

/*
//you can ignore this if statement for now.
if ($description == 'Root Element') {
$depth = -1;
$parent_id = NULL;
}
*/


if ($depth >= 1) { //this is a child element
$lineage = explode( ":", $description); //creates an array of complete lineage

$parent = $lineage[$depth - 1]; //this is the immediate parent.
$child = $lineage[$depth]; //this is the current element
}


I'm positive I can finish this script I'm just overthinking it.

To be done:
I have to search the $subjectListArr for the value of the parent and get its index to put it in the current element's parent node. I'm just confused as to how to accomplish this.
I'm not partial to this code so if you have another suggestion please do so.

Inigoesdr
03-09-2008, 08:34 AM
This is just an example, but it seems to do what you're looking for:

$parents = array();

$result = mysql_query('SELECT * FROM `test3` WHERE 1 ORDER BY `description`') or die(mysql_error());
while($row = mysql_fetch_assoc($result))
{
if(strpos($row['description'], ':') === false)
{
$parents[$row['description']] = $row['id'];
continue; // skip updating top-level categories
}

$temp = explode(':', $row['description']);
$parent = $temp[count($temp) - 2];
if(in_array($parent, $parents))
$parent_id = $parents[$parent];
else
$parent_id = getUnknownParent($parent, $parents);

$description = $temp[count($temp) - 1];
$uresult = mysql_query('UPDATE `test3` SET `description` = \'' . mysql_real_escape_string($description) . '\', `parent_id` = ' . $parent_id . ' WHERE `id` = ' . $row['id']) or die("Update query failed for id: {$row['id']}. " . mysql_error());
}

function getUnknownParent($parent, &$parents)
{
$result = mysql_query('SELECT `id` FROM `test3` WHERE `description` = \'' . mysql_real_escape_string($parent) . '\' LIMIT 1');
if($result)
{
$parent_id = mysql_result($result, 0);
$parents[$parent] = $parent_id;
return $parent_id;
}
else
{
return null;
}
}
Table name is "test3".

StupidRalph
03-09-2008, 11:06 AM
Thanks that really set me on the right path. Well, actually its exactly what I needed. The only thing I had to add was a WHERE condition to the UPDATE query.



UPDATE `catalog_subject_copy` SET `description` = \'' . mysql_real_escape_string($description) . '\', `parent_id` = ' . $parent_id . ' WHERE `id` = ' . $row['id'];



EZ Archive Ads Plugin for vBulletin Copyright 2006 Computer Help Forum