Flatten parent relationships with children in SQL Server

I have two tables in SQL Server: household and people. The house is the house and people represent the people living in the house:

Household

Id       Address        City        State          Zip
------------------------------------------------------
1        123 Main       Anytown     CA           90121

      

People

Id        HouseholdId       Name        Age
-------------------------------------------
1         1                 John         32
2         1                 Jane         29

      

I want to query two tables and get the result as shown below, but I'm not sure how best to approach this:

Id        Address        City        State        Zip        Person1Name        Person1Age        Person2Name     Person2Age
----------------------------------------------------------------------------------------------------------------------------
1         123 Main       Anytown     CA           90121      John                       32        Jane                    29

      

Of course, "PersonXName and PersonXAge" should be repeated depending on the number of people. How can I write a query that accomplishes this? Simplicity is preferred over performance as this is a one-off report I need to come up with.

+3


source to share


5 answers


This is done using a dynamic cross-tab. For reference: http://www.sqlservercentral.com/articles/Crosstab/65048/

CREATE TABLE HouseHold(
    ID      INT,
    Address VARCHAR(20),
    City    VARCHAR(20),
    State   CHAR(2),
    Zip     VARCHAR(10)
)
CREATE TABLE People(
    ID          INT,
    HouseHoldID INT,
    Name        VARCHAR(20),
    Age         INT
)
INSERT INTO HouseHold VALUES
(1, '123 Main', 'Anytown', 'CA', '90121');
INSERT INTO People VALUES
(1, 1, 'John', 32),
(2, 1, 'Jane', 29);

DECLARE @sql1 VARCHAR(4000) = ''
DECLARE @sql2 VARCHAR(4000) = ''
DECLARE @sql3 VARCHAR(4000) = ''

SELECT @sql1 =
'SELECT
     ID
    ,Address
    ,City
    ,State
    ,Zip'
+ CHAR(10)

SELECT @sql2 = @sql2 +
'   ,MAX(CASE WHEN RN = ' + CONVERT(VARCHAR(10), RN) + ' THEN Name END) AS [Person' + CONVERT(VARCHAR(10), RN) + 'Name]
    ,MAX(CASE WHEN RN = ' + CONVERT(VARCHAR(10), RN) + ' THEN Age END) AS [Person' + CONVERT(VARCHAR(10), RN) + 'Age]
'
FROM(
    SELECT DISTINCT RN = ROW_NUMBER() OVER(PARTITION BY p.HouseHoldID ORDER BY p.ID)
    FROM People p   
)t

SELECT @sql3 =
'FROM(
    SELECT
        h.*
        ,p.Name
        ,p.Age
        ,RN = ROW_NUMBER() OVER(PARTITION BY h.ID ORDER BY p.ID)
    FROM Household h
    INNER JOIN People p ON p.HouseHoldId = h.ID
)t
GROUP BY ID, Address, City, State, Zip
ORDER BY ID'

PRINT(@sql1 + @sql2 + @sql3)
EXEC (@sql1 + @sql2 + @sql3)

DROP TABLE HouseHold
DROP TABLE People

      



RESULT

ID          Address              City                 State Zip        Person1Name          Person1Age  Person2Name          Person2Age
----------- -------------------- -------------------- ----- ---------- -------------------- ----------- -------------------- -----------
1           123 Main             Anytown              CA    90121      John                 32          Jane                 29

      

+3


source


This is adapted from a script I am using with a similar requirement. You probably don't want to use if the People table has a million rows, but works well enough for my use with roughly 20,000 rows:



DECLARE @id int, @householdid int, @name varchar(50), @age int, @currentid   int, @peoplecount int;
DECLARE @colsql nvarchar(1000), @datasql nvarchar(1000), @RunSql nvarchar(1000);

CREATE TABLE #ReturnTable (HouseholdId int, Address varchar(50))

INSERT #ReturnTable
SELECT Id, Address
FROM Household;

-- these are split into two dynamic queries
-- so that columns exist when we try the insert
SET @colsql = 'IF (SELECT COUNT(*)
  FROM TempDB.INFORMATION_SCHEMA.COLUMNS 
  WHERE COLUMN_NAME = ''Person{Number}Name'' 
    AND TABLE_NAME LIKE ''#ReturnTable'') = 0
BEGIN
  ALTER TABLE #ReturnTable
  ADD Person{Number}Name VARCHAR(50)
END

IF (SELECT COUNT(*)
  FROM TempDB.INFORMATION_SCHEMA.COLUMNS 
  WHERE COLUMN_NAME = ''Person{Number}Age'' 
    AND TABLE_NAME LIKE ''#ReturnTable'') = 0
BEGIN
  ALTER TABLE #ReturnTable
  ADD Person{Number}Age INT
END'

set @datasql =
'UPDATE #ReturnTable
SET Person{Number}Name = @name,
  Person{Number}Age = @age
WHERE HouseholdId = @householdid'

DECLARE PeopleCursor CURSOR FOR
SELECT p.Id, p.HouseholdId, p.Name, p.Age
FROM People p
ORDER BY p.HouseholdId, p.Age

OPEN PeopleCursor;

FETCH NEXT FROM PeopleCursor
INTO @id, @householdid, @name, @age

SET @currentid = @id

SET @peoplecount = 1;

WHILE @@FETCH_STATUS = 0
BEGIN
    IF @currentid <> @id
    BEGIN
       SET @peoplecount = 1
       SET @currentid = @id
    END
    ELSE SET @peoplecount = @peoplecount + 1;

    SET @RunSql = REPLACE(@colsql, '{Number}', CAST(@peoplecount AS VARCHAR(3)));

    EXEC dbo.sp_ExecuteSql @RunSql

    SET @RunSql = REPLACE(@datasql, '{Number}', CAST(@peoplecount AS VARCHAR(3)));

    EXEC dbo.sp_ExecuteSql @RunSql, N'@householdid int, @name varchar(50), @age int', @householdid = @householdid, @name = @name, @age = @age;

    FETCH NEXT FROM PeopleCursor
    INTO @id, @householdid, @name, @age
END

CLOSE PeopleCursor
DEALLOCATE PeopleCursor

SELECT *
FROM #ReturnTable

drop table #ReturnTable

      

+1


source


Several changes are required. If you guys can help me solve this problem it would be great ...

 DECLARE @col1 nvarchar(max) = '', @col2 nvarchar(max) = ''
 declare @colname nvarchar(max),@query nvarchar(max), @cols nvarchar(max)=''

 DECLARE openall CURSOR for
 SELECT ROW_NUMBER() OVER(ORDER BY NAME) rowno FROM People 

    OPEN openall
  fetch next from openall into @colname

while @@FETCH_STATUS = 0
    begin

        set @col1 += 'Person'+ @colname +'Name,'
        set @col2 += 'Person'+ @colname+'Age,'

        fetch next from openall into @colname
     end     

  set @col1 = LEFT(@col1,LEN(@col1)-1)
  set @col2 = LEFT(@col2,LEN(@col2)-1)

 set @query = 'SELECT ID, Address, City, State, Zip, ' + @col1 + ', ' + @col2 + ' 
       FROM ( 
        SELECT h.ID, Address, City, State, Zip,p.name,p.age from Household h 
         inner join people p on h.id = p.householdid
           ) x
        pivot
       (
      sum(age) for
      name in (' + @col1 + ', ' + @col2 + ')
      ) p '

     execute(@query)
     close openall
     deallocate openall

      

0


source


This "smoothing" operation is referred to as a multi-column dynamic bar. It is dynamic because the pivot column values ​​are not known at design time, and it is multicolored because you have the pivot column values ​​"age" and "name".

To make a multi-column dynamic pivot in SQL you need to use dynamic sql and resort to case statements. SQL Pivot statement does not work with multi-column pivot. This is collapsed for implementation.

I don't think SQL is the best language for doing multi-column dynamic bar. I think this is better done on the client side.

The following C # method returns a data table containing the query you are looking for:

public DataTable GetPivotedPeople()
{
    using (var ds = new MyDataService())
    {
        return ds.PersonRepository
            .Query("Household")
            .OrderBy(PersonFields.HouseHoldId, PersonFields.Address, PersonFields.City, PersonFields.State, PersonFields.Zip)
            .Pivot(
                new PivotTransform
                {
                    PivotColumnName = PersonFields.PersonId,
                    ValueColumnName = PersonFields.Name,
                    GetPivotedColumnName = (personId) => "Person" + personId.ToString() + "Name"
                },
                new PivotTransform
                {
                    PivotColumnName = PersonFields.PersonId,
                    ValueColumnName = PersonFields.Age,
                    GetPivotedColumnName = (personId) => "Person" + personId.ToString() + "Age"
                }
            );
    }
}

      

This is the contents of the returned data table:

+-------------+----------+---------+-------+-------+-------------+-------------+------------+------------+
| HouseHoldId | Address  |   Ciy   | State |  Zip  | Person1Name | Person2Name | Person1Age | Person2Age |
+-------------+----------+---------+-------+-------+-------------+-------------+------------+------------+
|           1 | 123 Main | Anytown | CA    | 90121 | John        | Jane        |         32 |         29 |
+-------------+----------+---------+-------+-------+-------------+-------------+------------+------------+

      

It uses EntityLite , a micro ORM that I developed. It queries the Person_HouseHold view and rotates the rows on the client side. Here you have your sql script:

CREATE DATABASE DynamicPivot
GO
USE DynamicPivot
GO
CREATE TABLE Households
(
    HouseholdId int IDENTITY(1,1) PRIMARY KEY,
    [Address] nvarchar(128) NOT NULL,
    City nvarchar(128) NOT NULL,
    [State] nvarchar(128) NOT NULL,
    Zip nvarchar(128) NOT NULL
);
INSERT INTO Households ([Address], City, [State], Zip) 
VALUES (N'123 Main', N'Anytown', N'CA', N'90121');
GO

CREATE TABLE People
(
    PersonId int IDENTITY(1,1) PRIMARY KEY,
    HouseHoldId int NOT NULL CONSTRAINT FK_People_Households REFERENCES HouseHolds(HouseholdId),
    Name nvarchar(128) NOT NULL,
    Age int NOT NULL

);
INSERT INTO People(HouseHoldId, Name, Age) VALUES
(1, N'John', 32), (1, 'Jane', 29)

GO

CREATE VIEW Person_Household
AS
    SELECT 
        P.PersonId, P.HouseHoldId, P.Name, P.Age,
        H.[Address], H.City, H.[State], H.Zip
    FROM
        dbo.People P INNER JOIN dbo.Households H
            ON P.HouseHoldId = H.HouseholdId

      

I wrote this article on CodeProject. It explains how to do pivot using sql and how to do client side pivot using EntityLite or using raw ADO.NET. So, to do a client side pivot, you don't need to use EntityLite.

0


source


With that in mind, you might want to consider this:

get the maximum number of people living in one house (select the number)

join the people table to the household table for each person.

SELECT * 
FROM Household 
LEFT JOIN People p1 
ON p1.HouseHoldId = Household.Id
LEFT JOIN People p2 
ON p2.HouseHoldId = Household.Id

      

Depending on indexes, optimization settings, and many other conditions, this can even be a very effective solution.

-1


source







All Articles