I have implemented my own Lucene.Net.Analysis.TokenFilter
:
public sealed class UmlautsFoldingFilter : Lucene.Net.Analysis.TokenFilter
{
private readonly Lucene.Net.Analysis.Tokenattributes.ITermAttribute _termAttribute;
private char[] _output = new char[512];
private int _outputPosition;
public UmlautsFoldingFilter(Lucene.Net.Analysis.TokenStream input)
: base(input)
{
this._termAttribute = this.AddAttribute<Lucene.Net.Analysis.Tokenattributes.ITermAttribute>();
}
public override bool IncrementToken()
{
if (!this.input.IncrementToken())
{
return false;
}
var termBuffer = this._termAttribute.TermBuffer();
var termLength = this._termAttribute.TermLength();
this.FoldUmlaut(termBuffer,
termLength);
this._termAttribute.SetTermBuffer(this._output,
0,
this._outputPosition);
return true;
}
private void FoldUmlaut(char[] termBuffer,
int termLength)
{
var targetSize = 4 * termLength;
if (this._output.Length < targetSize)
{
this._output = new char[Lucene.Net.Util.ArrayUtil.GetNextSize(targetSize)];
}
this._outputPosition = 0;
for (var index = 0;
index < termLength;
++index)
{
var ch = termBuffer[index];
switch (ch)
{
case 'Ä':
this._output[this._outputPosition++] = 'A';
this._output[this._outputPosition++] = 'E';
continue;
case 'Ö':
this._output[this._outputPosition++] = 'O';
this._output[this._outputPosition++] = 'E';
continue;
case 'Ü':
this._output[this._outputPosition++] = 'U';
this._output[this._outputPosition++] = 'E';
continue;
case 'ä':
this._output[this._outputPosition++] = 'a';
this._output[this._outputPosition++] = 'e';
continue;
case 'ö':
this._output[this._outputPosition++] = 'o';
this._output[this._outputPosition++] = 'e';
continue;
case 'ü':
this._output[this._outputPosition++] = 'u';
this._output[this._outputPosition++] = 'e';
continue;
default:
this._output[this._outputPosition++] = ch;
continue;
}
}
}
}