As promised in my previous post, today I am showing a short sample of Jaro-Winkler string distance algorithm to suggest words on Windows Phone 7.1 – code named “Mango”.
Here is what we are trying to achieve in this project:
One of the new features of this release for Windows Phone is ability to work with local database using SQL Compact Edition (SQL CE). Alex Golesh posted several articles about new features and one in particular about working with databases. I suggest to read this if you plan to experiment with local database on Windows Phone. He gives several tips on how to generate/map existing tables into .NET plain old CLR classes (e.g. POCO) using sqlMetal tool. And then how to copy existing database from application’s deployment path into isolated storage. It looks like for now *.sdf (SQL CE data files) can only be accessed from isolated storage.
Here is are the steps to prepare your database.
1. Download a dictionary csv file. I used this free dictionary under GNU license. There is also Linux version available.
2. Import file into regular SQL table using SSIS Import/export wizard. Also free in here.
3. Clean data. Keep only words, remove phonetics, word definitions and other symbols. Remove duplicates, as some words have multiple meanings, thus appearing multiple times.
4. Add words’ lengths into a separate column. I called it Length.
5. Sort the whole table by Length and then by Word.
6. Add indexes on Length, Word and on Primary Key field of your choice.
7. Generate data insert script for sdf database. This is much simpler than doing replication, besides if you only have SQL Express then you may not publish your database for replication. SQL Express and SQL CE may only act as subscribers. Here a simple example:
8. If you haven’t already downloaded and installed SQL CE please do so.
9. Once SQL CE is installed you may create a new sdf database from SQL Server Manager Studio:
10. Create a table structure inside sdf similar to this:
11. Now you may use sqlMetal mentioned in Alex’s article to generate .NET classes. I have actually typed it myself and didn’t include compiler directive for indexes, but it is described in the article if you need to do it.
using System;
using System.Net;
using System.Data.Linq;
using System.Data.Linq.Mapping;
using System.ComponentModel;
using System.Collections.ObjectModel;
using Microsoft.Phone.Data.Linq;
using Microsoft.Phone.Data.Linq.Mapping;
namespace JaroWinklerWP7.DataModels
{
[Table(Name = "Word")]
public class Words : INotifyPropertyChanged, INotifyPropertyChanging
{
// Define ID: private field, public property and database column.
private int _id;
/// <summary>
/// Save Jaro Distance in here once it is calculated
/// </summary>
public decimal JaroDistance { get; set; }
[Column(IsPrimaryKey = true, IsDbGenerated = true, DbType = "INT NOT NULL Identity", CanBeNull = false, AutoSync = AutoSync.Default, UpdateCheck = UpdateCheck.Never)]
public int Id
{
get
{
return _id;
}
private set
{
if (_id != value)
{
NotifyPropertyChanging("Id");
_id = value;
NotifyPropertyChanged("Id");
}
}
}
// Define item name: private field, public property and database column.
private string _word;
[Column]
public string Word
{
get
{
return _word;
}
private set
{
if (_word != value)
{
NotifyPropertyChanging("Word");
_word = value;
NotifyPropertyChanged("Word");
}
}
}
// Define completion value: private field, public property and database column.
private int _length;
[Column]
public int Length
{
get
{
return _length;
}
private set
{
if (_length != value)
{
NotifyPropertyChanging("Length");
_length = value;
NotifyPropertyChanged("Length");
}
}
}
#region INotifyPropertyChanged Members
public event PropertyChangedEventHandler PropertyChanged;
// Used to notify the page that a data context property changed
private void NotifyPropertyChanged(string propertyName)
{
if (PropertyChanged != null)
{
PropertyChanged(this, new PropertyChangedEventArgs(propertyName));
}
}
#endregion
#region INotifyPropertyChanging Members
public event PropertyChangingEventHandler PropertyChanging;
// Used to notify the data context that a data context property is about to change
private void NotifyPropertyChanging(string propertyName)
{
if (PropertyChanging != null)
{
PropertyChanging(this, new PropertyChangingEventArgs(propertyName));
}
}
#endregion
}
}
12. Create Windows Phone 7.1 project and add the above model.
13. Add data context class, which has methods for copying database to Isolated Storage:
using System;
using System.Net;
using System.Data.Linq;
using System.Data.Linq.Mapping;
using System.ComponentModel;
using System.Collections.ObjectModel;
using System.IO;
using System.Windows;
using System.IO.IsolatedStorage;
namespace JaroWinklerWP7.DataModels
{
public class DictionaryDataContext : DataContext
{
// Specify the connection string as a static, used in main page and app.xaml.
public static string DBConnectionString = "Data Source='isostore:/Words.sdf'";
// Pass the connection string to the base class.
public DictionaryDataContext(string connectionString)
: base(connectionString)
{ }
// Specify a single table for the to-do items.
public Table<Words> Words;
public static void CreateExternalDatabase(string DBName)
{
Stream str = Application.GetResourceStream(new Uri("DataModels/" + DBName, UriKind.Relative)).Stream;
using (IsolatedStorageFile isoStore = IsolatedStorageFile.GetUserStoreForApplication())
{
IsolatedStorageFileStream outFile = isoStore.CreateFile(DBName);
outFile.Write(ReadToEnd(str), 0, (int)str.Length);
str.Close();
outFile.Close();
}
}
public static byte[] ReadToEnd(Stream stream)
{
long originalPosition = stream.Position;
stream.Position = 0;
try
{
byte[] readBuffer = new byte[4096];
int totalBytesRead = 0;
int bytesRead = 0;
while ((bytesRead = stream.Read(readBuffer, totalBytesRead, readBuffer.Length - totalBytesRead)) > 0)
{
totalBytesRead += bytesRead;
if (totalBytesRead == readBuffer.Length)
{
int nextByte = stream.ReadByte();
if (nextByte != -1)
{
byte[] temp = new byte[readBuffer.Length * 2];
Buffer.BlockCopy(readBuffer, 0, temp, 0, readBuffer.Length);
Buffer.SetByte(temp, totalBytesRead, (byte)nextByte);
readBuffer = temp; totalBytesRead++;
}
}
}
byte[] buffer = readBuffer;
if (readBuffer.Length != totalBytesRead)
{
buffer = new byte[totalBytesRead];
Buffer.BlockCopy(readBuffer, 0, buffer, 0, totalBytesRead);
}
return buffer;
}
finally
{
stream.Position = originalPosition;
}
}
}
}
14. Add logic into main view model. This model is going to be data bound to UI.
using System;
using System.ComponentModel;
using System.Collections.Generic;
using System.Diagnostics;
using System.Text;
using System.Windows;
using System.Windows.Controls;
using System.Windows.Data;
using System.Windows.Documents;
using System.Windows.Input;
using System.Windows.Media;
using System.Windows.Media.Imaging;
using System.Windows.Shapes;
using System.Collections.ObjectModel;
using JaroWinklerWP7.DataModels;
using JaroWinklerWP7.ViewModels;
using System.Linq;
using Microsoft.Phone.Data.Linq;
using Microsoft.Phone.Data.Linq.Mapping;
namespace JaroWinklerWP7
{
public class MainViewModel : INotifyPropertyChanged
{
#region Fields
/// <summary>
/// A collection of suggestions.
/// </summary>
public ObservableCollection<Words> _suggestions = new ObservableCollection<Words>();
private DictionaryDataContext _db = null;
/// <summary>
/// word typed from the UI
/// </summary>
private string _typedWord = String.Empty;
private string _firstWord = String.Empty;
#endregion //Fields
#region Properties
/// <summary>
/// This is a list of suggestions from Jaro-Winkler search
/// </summary>
/// <returns></returns>
public ObservableCollection<Words> Suggestions
{
get { return _suggestions; }
private set { _suggestions = value; }
}
/// <summary>
/// Word Typed from the UI
/// </summary>
public string TypedWord
{
get
{
return _typedWord;
}
set
{
if (value != _typedWord)
{
_typedWord = value;
NotifyPropertyChanged("TypedWord");
}
}
}
public string FirstWord
{
get
{
return _firstWord;
}
set
{
if (value != _firstWord)
{
_firstWord = value;
NotifyPropertyChanged("FirstWord");
}
}
}
public bool IsDataLoaded
{
get;
private set;
}
#endregion //Properties
#region Constructor
public MainViewModel()
{
this.PropertyChanged += new PropertyChangedEventHandler(TypedWord_PropertyChanged);
// Create the database if it does not exist.
DictionaryDataContext.CreateExternalDatabase("Words.sdf");
DictionaryDataContext db = new DictionaryDataContext(DictionaryDataContext.DBConnectionString);
if (db.DatabaseExists() == false)
{
//Create the database
db.CreateDatabase();
}
_db = db;
_db.ObjectTrackingEnabled = false;
}
#endregion //Constructor
#region Methods
/// <summary>
/// Creates and adds a few ItemViewModel objects into the Items collection.
/// </summary>
public void LoadData()
{
// Sample data; replace with real data
// run search off of the data query and Jaro Winkler
_suggestions.Clear();
// when property changes we would like to refresh suggestions.
// but only if the length is 2 or more
if (_typedWord.Length < 2)
{
this.IsDataLoaded = true;
return;
}
var words = (from w in _db.Words
where w.Length >= _typedWord.Length
select w);
List<Words> temp = new List<Words>();
foreach (Words w in words)
{
decimal distance = StringFunctions.StringDistance(_typedWord, w.Word);
if (distance > (decimal)0.85)
{
w.JaroDistance = distance;
temp.Add(w);
}
}
var sortedSuggestions = (from Words w in temp
orderby w.JaroDistance descending
select w).Take(10);
foreach (Words w in sortedSuggestions)
{
_suggestions.Add(w);
}
}
public event PropertyChangedEventHandler PropertyChanged;
private void NotifyPropertyChanged(String propertyName)
{
PropertyChangedEventHandler handler = PropertyChanged;
if (null != handler)
{
handler(this, new PropertyChangedEventArgs(propertyName));
}
}
private void TypedWord_PropertyChanged(object sender, PropertyChangedEventArgs e)
{
LoadData();
}
#endregion //Methods
}
}
15. Now add binding inside MainPage code behind file.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net;
using System.Windows;
using System.Windows.Controls;
using System.Windows.Documents;
using System.Windows.Input;
using System.Windows.Media;
using System.Windows.Media.Animation;
using System.Windows.Shapes;
using Microsoft.Phone.Controls;
using System.Windows.Data;
namespace JaroWinklerWP7
{
public partial class MainPage : PhoneApplicationPage
{
// Constructor
public MainPage()
{
InitializeComponent();
// Set the data context of the listbox control to the sample data
DataContext = App.ViewModel;
this.Loaded += new RoutedEventHandler(MainPage_Loaded);
}
// Handle selection changed on ListBox
private void MainListBox_SelectionChanged(object sender, SelectionChangedEventArgs e)
{
// If selected index is -1 (no selection) do nothing
if (MainListBox.SelectedIndex == -1)
return;
// Navigate to the new page
NavigationService.Navigate(new Uri("/DetailsPage.xaml?selectedItem=" + MainListBox.SelectedIndex, UriKind.Relative));
// Reset selected index to -1 (no selection)
MainListBox.SelectedIndex = -1;
}
// Load data for the ViewModel Items
private void MainPage_Loaded(object sender, RoutedEventArgs e)
{
if (!App.ViewModel.IsDataLoaded)
{
App.ViewModel.LoadData();
}
}
private void WordTextBox_TextChanged(object sender, TextChangedEventArgs e)
{
// itemNameTextBox is an instance of a TextBox
BindingExpression be = WordTextBox.GetBindingExpression(TextBox.TextProperty);
be.UpdateSource();
}
}
}
16. Add XAML file.
<phone:PhoneApplicationPage
x:Class="JaroWinklerWP7.MainPage"
xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
xmlns:phone="clr-namespace:Microsoft.Phone.Controls;assembly=Microsoft.Phone"
xmlns:shell="clr-namespace:Microsoft.Phone.Shell;assembly=Microsoft.Phone"
xmlns:d="http://schemas.microsoft.com/expression/blend/2008"
xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"
mc:Ignorable="d" d:DesignWidth="480" d:DesignHeight="768"
d:DataContext="{d:DesignData SampleData/MainViewModelSampleData.xaml}"
FontFamily="{StaticResource PhoneFontFamilyNormal}"
FontSize="{StaticResource PhoneFontSizeNormal}"
Foreground="{StaticResource PhoneForegroundBrush}"
SupportedOrientations="Portrait" Orientation="Portrait"
shell:SystemTray.IsVisible="True">
<!--Data context is set to sample data above and LayoutRoot contains the root grid where all other page content is placed-->
<Grid x:Name="LayoutRoot" Background="Transparent" DataContext="{Binding}">
<Grid.RowDefinitions>
<RowDefinition Height="Auto"/>
<RowDefinition Height="*"/>
</Grid.RowDefinitions>
<!--TitlePanel contains the name of the application and page title-->
<StackPanel x:Name="TitlePanel" Grid.Row="0" Margin="12,17,0,28">
<TextBlock x:Name="ApplicationTitle" Text="Jaro Winkler Word Suggestions" Style="{StaticResource PhoneTextNormalStyle}"/>
<TextBlock x:Name="PageTitle" Text="TYPE A WORD" Margin="9,-7,0,0" Style="{StaticResource PhoneTextTitle1Style}"/>
<TextBox x:Name="WordTextBox" Text="{Binding TypedWord, Mode=TwoWay, UpdateSourceTrigger=Explicit}" Background="Transparent" FontSize="24" TextChanged="WordTextBox_TextChanged"/>
</StackPanel>
<!--ContentPanel contains ListBox and ListBox ItemTemplate. Place additional content here-->
<Grid x:Name="ContentPanel" Grid.Row="1" Margin="12,0,12,0">
<ListBox
x:Name="MainListBox"
Margin="0,0,-12,0"
ItemsSource="{Binding Suggestions, Mode=OneWay,UpdateSourceTrigger=Default}"
HorizontalAlignment="Center"
HorizontalContentAlignment="Center"
>
<ListBox.ItemTemplate>
<DataTemplate>
<StackPanel Orientation="Horizontal" HorizontalAlignment="Stretch" Margin="10">
<StackPanel Orientation="Vertical">
<TextBlock Text="{Binding Path=Word}"
Width="Auto"
FontSize="40"
TextAlignment="Center"
HorizontalAlignment="Stretch"
VerticalAlignment="Center"/>
<TextBlock Text="{Binding Path=JaroDistance, StringFormat=Jaro Distance:\{0:N2\}}"
Margin="20,0,0,0"
Width="Auto"
FontSize="16"
TextAlignment="Center"
HorizontalAlignment="Stretch"
VerticalAlignment="Bottom"/>
</StackPanel>
</StackPanel>
</DataTemplate>
</ListBox.ItemTemplate>
</ListBox>
</Grid>
</Grid>
</phone:PhoneApplicationPage>
Compile and start typing . Some words are missing from my dictionary, but it still works.
Please let me know if you will run into some problems.
… and as usual this was ISolvable<Problem>.
Happy coding!