How to fill gaps in a SQL table using one statement, not multiple steps

Imagine I have the following tables:

enter image description here

I want to fill in the blanks in a table like so:

enter image description here

Now I can do this using a few steps with CTEs and temp tables creating groups and max / min values ​​and then inserting them.

However, I'm wondering if there is a more elegant (!) Way to do this in a single expression that joins the original itself with months .

The bonus points for this are in ansi sql as opposed to tsql, but acceptable.

===

here is some sql to create a test

drop table months;
create table months (year int, month int);

insert into months(year, month) values
(2000, 200007),   (2001, 200101),  (2002, 200201),
(2000, 200008),   (2001, 200102),  (2002, 200202),
(2000, 200009),   (2001, 200103),  (2002, 200203),
(2000, 200010),   (2001, 200104),  (2002, 200204),
(2000, 200011),   (2001, 200105),  (2002, 200205),
(2000, 200012),   (2001, 200106),  (2002, 200206),
                  (2001, 200107),  (2002, 200007),
                  (2001, 200108),  (2002, 200208),
                  (2001, 200109),  (2002, 200208),
                  (2001, 200110),
                  (2001, 200111),
                  (2001, 200112)

drop table source;
create table source (name varchar(10), month int, item int, val float);

insert into source(name, month, item, val) values
('bob', 200101, 1, 1.5),
('bob', 200102, 2, 1.5),
('bob', 200103, 2, 2),
('bob', 200108, 1, 0),
('bob', 200109, 10, 6.6),
('bob', 200110, 11, 2.2),
('bob', 200111, 9, 1),
('bob', 200207, 23, 0)

      

+3


source to share


2 answers


SQLFiddle Sample Example for SQL Server 2008

Perhaps not the most elegant, but this is one way to do it

Table and data

create table sources 
(
  source varchar(20),
  month int,
  item int,
  val decimal
);

insert into sources values
('bob', 200101, 1, 1.5),
('bob', 200102, 2, 1.5),
('bob', 200104, 2, 1.5);

      

Please note that 200103 is not available. We'll fill this in with SQL.

code

-- Recursive CTE to create all dates between 2001-01-01 and 2001-03-02
WITH Dates as
(
  select cast('2001-01-01' as date) as CalendarDate
  union all     
  select dateadd(day , 1, CalendarDate) AS CalendarDate
  from Dates
  where dateadd (day, 1, CalendarDate) < '2001-03-02'
),

-- CTE to get distinct yyyymm
FormattedDates as
(
  select 
    distinct cast(replace(left(CalendarDate,7), '-', '') as int) dt
  from Dates
)

-- compare list of yyyymm with sources and fill missing information
select 
  'bob', 
  FormattedDates.dt, 
  (select top 1 item from sources where month < formatteddates.dt order by month desc), 
  0
from FormattedDates
left join sources on FormattedDates.dt = sources.month
where source is null

-- add sources to the mix
union
select * from sources;

      

How it works

First we use recursion to list all dates between two dates with CTE

WITH Dates as
(
  select cast('2001-01-01' as date) as CalendarDate
  union all     
  select dateadd(day , 1, CalendarDate) AS CalendarDate
  from Dates
  where dateadd (day, 1, CalendarDate) < '2001-03-02'
)
select * from Dates;

      



It is a recursive CTE (Common Table Expression) that starts with a date and repeats until the end date is reached. It outputs dates from 2001-01-01 to 2001-03-01.

We are roughly converting dates to yyyymm format using cast(replace(left(CalendarDate,7), '-', '') as int)

. But that would create several 200101, 200102, etc. Therefore, we use different ones.

WITH Dates as
(
  SELECT cast('2001-01-01' as date) as CalendarDate

  UNION ALL

  SELECT dateadd(day , 1, CalendarDate) AS CalendarDate
  FROM Dates
  WHERE dateadd (day, 1, CalendarDate) < '2001-03-02'
)
select 
  distinct cast(replace(left(CalendarDate,7), '-', '') as int) dt
from Dates;

      

This will be listed as 200101, 200102 and 200103. The following code has the same effect:

-- Recursive CTE to create all dates between 2001-01-01 and 2001-03-02
WITH Dates as
(
  select cast('2001-01-01' as date) as CalendarDate
  union all     
  select dateadd(day , 1, CalendarDate) AS CalendarDate
  from Dates
  where dateadd (day, 1, CalendarDate) < '2001-03-02'
),

-- CTE to get distinct yyyymm
FormattedDates as
(
  select 
    distinct cast(replace(left(CalendarDate,7), '-', '') as int) dt
  from Dates
)

-- List out the distinct dates
select dt from FormattedDates;

      

We left these dates with the one in our source table and select only those records where there is no entry in the source table.

-- Recursive CTE to create all dates between 2001-01-01 and 2001-03-02
WITH Dates as
(
  select cast('2001-01-01' as date) as CalendarDate
  union all     
  select dateadd(day , 1, CalendarDate) AS CalendarDate
  from Dates
  where dateadd (day, 1, CalendarDate) < '2001-03-02'
),

-- CTE to get distinct yyyymm
FormattedDates as
(
  select 
    distinct cast(replace(left(CalendarDate,7), '-', '') as int) dt
  from Dates
)

-- compare list of yyyymm with sources and fill missing information
select 
  'bob', 
  FormattedDates.dt, 
  (select top 1 item from sources where month < formatteddates.dt order by month desc), 
  0
from FormattedDates
left join sources on FormattedDates.dt = sources.month
where source is null;

      

This will be indicated 'bob', 200103, 2, 0

. Great, so we have missing information. Let's combine it with data from sources to get a complete dataset.

All there is to add now union all select * from sources;

to get what you want.

+1


source


Using the provided example data,

if object_id('#months','U') is not null drop table #months;
create table #months (year int, month int);

insert into #months(year, month) values
(2000, 200007),   (2001, 200101),  (2002, 200201),
(2000, 200008),   (2001, 200102),  (2002, 200202),
(2000, 200009),   (2001, 200103),  (2002, 200203),
(2000, 200010),   (2001, 200104),  (2002, 200204),
(2000, 200011),   (2001, 200105),  (2002, 200205),
(2000, 200012),   (2001, 200106),  (2002, 200206),
                  (2001, 200107),  (2002, 200007),
                  (2001, 200108),  (2002, 200208),
                  (2001, 200109),  (2002, 200208),
                  (2001, 200110),
                  (2001, 200111),
                  (2001, 200112)

if object_id('#source','U') is not null drop table #source;
create table #source (name varchar(10), month int, item int, val float);

insert into #source(name, month, item, val) values
('bob', 200101, 1, 1.5),
('bob', 200102, 2, 1.5),
('bob', 200103, 2, 2),
('bob', 200108, 1, 0),
('bob', 200109, 10, 6.6),
('bob', 200110, 11, 2.2),
('bob', 200111, 9, 1),
('bob', 200207, 23, 0)
;

      

this SQL:

with source as (
    select
         this.*
        ,( select top 1 month 
           from #source 
           where month > this.month 
             and name  = this.name 
           order by month
         ) as NextMonth
    from #source        this
),
months as (
    select 
        m.month, n.name
    from #months m
    cross join (select name from #source group by name) n
    group by 
        m.month, n.name
)
select 
    s.name, 
    m.month,
    s.item, 
    case when m.month = s.month then s.val else 0 end val
from months m
left join source s
   on s.month <= m.month and m.month < s.NextMonth 
  and s.name = m.name
where s.name is not null
order by 
     s.name
    ,m.month;
go

      



gives at will:

name       month       item        val
---------- ----------- ----------- ----------------------
bob        200101      1           1.5
bob        200102      2           1.5
bob        200103      2           2
bob        200104      2           2
bob        200105      2           2
bob        200106      2           2
bob        200107      2           2
bob        200108      1           0
bob        200109      10          6.6
bob        200110      11          2.2
bob        200111      9           1
bob        200112      9           1
bob        200201      9           1
bob        200202      9           1
bob        200203      9           1
bob        200204      9           1
bob        200205      9           1
bob        200206      9           1

      

Edit: Minor fixes to ensure val is 0 for inserted rows

+1


source







All Articles