Since Sql Server doesn't have an aggregate for Median, I figured, that I'll create my first SQL CLR to handle this problem.
So, I created my intial project, using the .Net Framework 3.5 and C# SQL CLR aggregate template, and started coding:
Some of the intial changes that I've made from the default:
1) The SqlUserDefinedAggregate default was Format.Native. The problem with using the default Serializer is that I needed a way to store my values, preferably in a List. By changing the format to Format.UserDefined -- I was able to create my own serializer. In this case I used the IBinarySerialize interface.
2) I also changed the default IsInvariant attributes.
Once the code is compiled, you will need to place the dll on the same machine as the SQL Server is located.
Using the following query to set the assembly and aggregate name:
Once done, time to do some testing. An example:
So, I created my intial project, using the .Net Framework 3.5 and C# SQL CLR aggregate template, and started coding:
Code Snippet
using System;
using System.Data;
using System.Data.SqlClient;
using System.Data.SqlTypes;
using Microsoft.SqlServer.Server;
using System.Collections.Generic;
[Serializable]
[Microsoft.SqlServer.Server.SqlUserDefinedAggregate(
Format.UserDefined,
IsInvariantToDuplicates = false,
IsInvariantToNulls = false,
IsInvariantToOrder = false,
MaxByteSize = 8000)]
public struct Median : IBinarySerialize
{
//Variables to hold the values;
private List<double> ld;
public void Init()
{
ld = new List<double>();
}
public void Accumulate(SqlDouble Value)
{
if (!Value.IsNull)
{
ld.Add(Value.Value);
}
}
///
/// Merge the partially computed aggregate with this aggregate.
///
/// The other partial results to be merged
public void Merge(Median Group)
{
this.ld.AddRange(Group.ld.ToArray());
}
///
/// Called at the end of aggregation, to return the results.
///
/// The median of all inputted values
public SqlDouble Terminate()
{
//debug: return (SqlDouble)ld.Count;
//special case 0 values
if (ld.Count == 0)
return SqlDouble.Null;
ld.Sort();
int index = (int)ld.Count / 2;
if (ld.Count % 2 == 0)
{
return (SqlDouble)(((double)ld[index] + (double)ld[index - 1]) / 2);
}
else
{
return (SqlDouble)((double)ld[index]);
}
}
#region IBinarySerialize Members
public void Read(System.IO.BinaryReader r)
{
int cnt = r.ReadInt32();
this.ld = new List<double>(cnt);
for(int i = 0; i < cnt; i++)
{
this.ld.Add(r.ReadDouble());
}
}
public void Write(System.IO.BinaryWriter w)
{
w.Write(this.ld.Count);
foreach (double d in this.ld)
{
w.Write(d);
}
}
#endregion
}
using System.Data;
using System.Data.SqlClient;
using System.Data.SqlTypes;
using Microsoft.SqlServer.Server;
using System.Collections.Generic;
[Serializable]
[Microsoft.SqlServer.Server.SqlUserDefinedAggregate(
Format.UserDefined,
IsInvariantToDuplicates = false,
IsInvariantToNulls = false,
IsInvariantToOrder = false,
MaxByteSize = 8000)]
public struct Median : IBinarySerialize
{
//Variables to hold the values;
private List<double> ld;
public void Init()
{
ld = new List<double>();
}
public void Accumulate(SqlDouble Value)
{
if (!Value.IsNull)
{
ld.Add(Value.Value);
}
}
///
/// Merge the partially computed aggregate with this aggregate.
///
/// The other partial results to be merged
public void Merge(Median Group)
{
this.ld.AddRange(Group.ld.ToArray());
}
///
/// Called at the end of aggregation, to return the results.
///
///
public SqlDouble Terminate()
{
//debug: return (SqlDouble)ld.Count;
//special case 0 values
if (ld.Count == 0)
return SqlDouble.Null;
ld.Sort();
int index = (int)ld.Count / 2;
if (ld.Count % 2 == 0)
{
return (SqlDouble)(((double)ld[index] + (double)ld[index - 1]) / 2);
}
else
{
return (SqlDouble)((double)ld[index]);
}
}
#region IBinarySerialize Members
public void Read(System.IO.BinaryReader r)
{
int cnt = r.ReadInt32();
this.ld = new List<double>(cnt);
for(int i = 0; i < cnt; i++)
{
this.ld.Add(r.ReadDouble());
}
}
public void Write(System.IO.BinaryWriter w)
{
w.Write(this.ld.Count);
foreach (double d in this.ld)
{
w.Write(d);
}
}
#endregion
}
Some of the intial changes that I've made from the default:
1) The SqlUserDefinedAggregate default was Format.Native. The problem with using the default Serializer is that I needed a way to store my values, preferably in a List. By changing the format to Format.UserDefined -- I was able to create my own serializer. In this case I used the IBinarySerialize interface.
2) I also changed the default IsInvariant attributes.
- IsInvariantToDuplicates - set to false since I want duplicates.
- IsInvariantToNull - set to false since I will handle Null values in my code.
- IsInvariantToOrder - set to false, I can do this in code.
Once the code is compiled, you will need to place the dll on the same machine as the SQL Server is located.
Using the following query to set the assembly and aggregate name:
Code Snippet
IF EXISTS (SELECT name FROM sysobjects WHERE name = 'Median')
DROP AGGREGATE Median
go
IF EXISTS (SELECT name FROM sys.assemblies WHERE name = 'MyClrCode')
DROP ASSEMBLY MyClrCode
go
CREATE ASSEMBLY MyClrCode
FROM 'C:\Median.dll'
WITH PERMISSION_SET = SAFE
GO
CREATE AGGREGATE dbo.Median
(@input float)
RETURNS float
EXTERNAL NAME MyClrCode.Median
DROP AGGREGATE Median
go
IF EXISTS (SELECT name FROM sys.assemblies WHERE name = 'MyClrCode')
DROP ASSEMBLY MyClrCode
go
CREATE ASSEMBLY MyClrCode
FROM 'C:\Median.dll'
WITH PERMISSION_SET = SAFE
GO
CREATE AGGREGATE dbo.Median
(@input float)
RETURNS float
EXTERNAL NAME MyClrCode.Median
Once done, time to do some testing. An example:
Code Snippet
--Test 1 value
select
dbo.Median(x.y)
from
(
select 1 as y
)x
GO
--Test Accuracy
select
dbo.Median(x.y)
from
(
select 1 as y
union all
select 2 as y
)x
GO
select
dbo.Median(x.y)
from
(
select 1 as y
)x
GO
--Test Accuracy
select
dbo.Median(x.y)
from
(
select 1 as y
union all
select 2 as y
)x
GO