Finding the intersection of two dictionaries
I have two type dictionaries Dic1<string,string>
and Dic2<string,string>
I want to generate new list
values for keys existing in Dic1
both and Dic2
So, for example if
Dic1: <12,"hi">, <14,"bye">
Dic2: <12,"hello">, <18,"bye">
Then the list should be: "hi" , "hello"
I tried working with Dic1.Keys.Intersect
but couldn't figure it out yet.
What I tried: Dic1.Keys.Intersect(Dic2.Keys).ToList(t => t.Values);
source to share
Here you are:
var dic1 = new Dictionary<int, string> { { 12, "hi" }, { 14, "bye" } };
var dic2 = new Dictionary<int, string> { { 12, "hello" }, { 18, "bye" } };
HashSet<int> commonKeys = new HashSet<int>(dic1.Keys);
commonKeys.IntersectWith(dic2.Keys);
var result =
dic1
.Where(x => commonKeys.Contains(x.Key))
.Concat(dic2.Where(x => commonKeys.Contains(x.Key)))
// .Select(x => x.Value) // With this additional select you'll get only the values.
.ToList();
The list of results contains { 12, "hi" }
and{ 12, "hello" }
HashSet
very useful for intersections.
Just from curiostiy, I compared all six solutions (I hope I haven't missed anyone) and the timing looks like this:
@EZI Intersect2 GroupBy ~149ms
@Selman22 Intersect3 Keys.Intersect ~41ms
@dbc Intersect4 Where1 ~22ms
@dbc Intersect5 Where2 ~18ms
@dbc Intersect5 Classic ~11ms
@t3chb0t Intersect1 HashSet ~66ms
class Program
{
static void Main(string[] args)
{
var dic1 = new Dictionary<int, string>();
var dic2 = new Dictionary<int, string>();
Random rnd = new Random(DateTime.Now.Millisecond);
for (int i = 0; i < 100000; i++)
{
int id = 0;
do { id = rnd.Next(0, 1000000); } while (dic1.ContainsKey(id));
dic1.Add(id, "hi");
do { id = rnd.Next(0, 1000000); } while (dic2.ContainsKey(id));
dic2.Add(id, "hello");
}
List<List<string>> results = new List<List<string>>();
using (new AutoStopwatch(true)) { results.Add(Intersect1(dic1, dic2)); }
Console.WriteLine("Intersect1 elapsed in {0}ms (HashSet)", AutoStopwatch.Stopwatch.ElapsedMilliseconds);
using (new AutoStopwatch(true)) { results.Add(Intersect2(dic1, dic2)); }
Console.WriteLine("Intersect2 elapsed in {0}ms (GroupBy)", AutoStopwatch.Stopwatch.ElapsedMilliseconds);
using (new AutoStopwatch(true)) { results.Add(Intersect3(dic1, dic2)); }
Console.WriteLine("Intersect3 elapsed in {0}ms (Keys.Intersect)", AutoStopwatch.Stopwatch.ElapsedMilliseconds);
using (new AutoStopwatch(true)) { results.Add(Intersect4(dic1, dic2)); }
Console.WriteLine("Intersect4 elapsed in {0}ms (Where1)", AutoStopwatch.Stopwatch.ElapsedMilliseconds);
using (new AutoStopwatch(true)) { results.Add(Intersect5(dic1, dic2)); }
Console.WriteLine("Intersect5 elapsed in {0}ms (Where2)", AutoStopwatch.Stopwatch.ElapsedMilliseconds);
using (new AutoStopwatch(true)) { results.Add(Intersect7(dic1, dic2)); }
Console.WriteLine("Intersect7 elapsed in {0}ms (Old style :-)", AutoStopwatch.Stopwatch.ElapsedMilliseconds);
Console.ReadKey();
}
static List<string> Intersect1(Dictionary<int, string> dic1, Dictionary<int, string> dic2)
{
HashSet<int> commonKeys = new HashSet<int>(dic1.Keys);
commonKeys.IntersectWith(dic2.Keys);
var result =
dic1
.Where(x => commonKeys.Contains(x.Key))
.Concat(dic2.Where(x => commonKeys.Contains(x.Key)))
.Select(x => x.Value)
.ToList();
return result;
}
static List<string> Intersect2(Dictionary<int, string> dic1, Dictionary<int, string> dic2)
{
var result = dic1.Concat(dic2)
.GroupBy(x => x.Key)
.Where(g => g.Count() > 1)
.SelectMany(g => g.Select(x => x.Value))
.ToList();
return result;
}
static List<string> Intersect3(Dictionary<int, string> dic1, Dictionary<int, string> dic2)
{
var result =
dic1
.Keys
.Intersect(dic2.Keys)
.SelectMany(key => new[] { dic1[key], dic2[key] })
.ToList();
return result;
}
static List<string> Intersect4(Dictionary<int, string> dic1, Dictionary<int, string> dic2)
{
var result =
dic1.
Where(pair => dic2.ContainsKey(pair.Key))
.SelectMany(pair => new[] { dic2[pair.Key], pair.Value }).ToList();
return result;
}
static List<string> Intersect5(Dictionary<int, string> dic1, Dictionary<int, string> dic2)
{
var result =
dic1
.Keys
.Where(dic2.ContainsKey).SelectMany(k => new[] { dic1[k], dic2[k] })
.ToList();
return result;
}
static List<string> Intersect7(Dictionary<int, string> dic1, Dictionary<int, string> dic2)
{
var list = new List<string>();
foreach (var key in dic1.Keys)
{
if (dic2.ContainsKey(key))
{
list.Add(dic1[key]);
list.Add(dic2[key]);
}
}
return list;
}
}
class AutoStopwatch : IDisposable
{
public static readonly Stopwatch Stopwatch = new Stopwatch();
public AutoStopwatch(bool start)
{
Stopwatch.Reset();
if (start) Stopwatch.Start();
}
public void Dispose()
{
Stopwatch.Stop();
}
}
source to share
You can filter keys in Dic1 with Where
and then convert them to values like so:
var values = Dic1.Keys.Where(Dic2.ContainsKey).SelectMany(k => new[] { Dic1[k], Dic2[k] })
.ToList();
This should be as efficient as lookups on Dic1
and Dic2
, which is usually log (n) or better, and does not require creating any temporary hash sets or lookup tables.
Here's a version that avoids one of the dictionary lookups at the expense of being a little less pretty:
var values = Dic1.Where(pair => Dic2.ContainsKey(pair.Key)).SelectMany(pair => new[] { pair.Value, Dic2[pair.Key] })
.ToList();
Update
My timing tests (using the handy t3chb0t test harness) show the first version is faster. It's easier, so of the two, prefer this. But the fastest I've found so far doesn't use Linq at all, at 7ms for 1,000,000 intersections versus 13 for the Linq version:
static List<string> Intersect7(Dictionary<int, string> dic1, Dictionary<int, string> dic2)
{
var list = new List<string>();
foreach (var key in dic1.Keys)
{
if (dic2.ContainsKey(key))
{
list.Add(dic1[key]);
list.Add(dic2[key]);
}
}
return list;
}
This is old style, although you probably don't want it.
source to share