主题:Regex 问题,高手进!
using System;
using System.IO;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
namespace tokenizer
{
class Program
{
static void Main(string[] args)
{
MatchCollection mc;
int LineLength;
StreamReader MyReader ;
String MyLine;
StreamWriter MyWriter;
Regex r = new Regex("ad|an|av|a|c|dd|dh|do|dv|dw|d|e|h|i|l|k|md|mk|mm|mo|mr|m|nd|nk|nm|nr|nt|nv|nz|n|o|q|ra|rb|re|rk|ro|rs|ry|r|tt|
t|v|w|y");
if (args.Length !=2)
{
Console.WriteLine("Usage: tokenizer Inputfile Outputfile");
return;
}
try
{
MyReader = new StreamReader(args[0]);
MyWriter = new StreamWriter(args[1]);
while (!MyReader.EndOfStream)
{
MyLine = MyReader.ReadLine().Trim();
LineLength = MyLine.Length;
MyLine = MyLine.Remove(0, MyLine.IndexOf('.') + 1);
MyLine = MyLine.Replace("/", " ");
mc = r.Matches(MyLine);
for (int i = 0; i < mc.Count; i++)
MyLine = MyLine.Insert(mc[i].Index + mc[i].Value.Length + i, "*");
string[] split = MyLine.Split(new char[] { '*' });
for (int i = 0; i < split.Length; i++)
MyWriter.WriteLine(split[i]);
}
}
catch (Exception Err)
{
Console.WriteLine(Err.Message);
return;
}
MyReader.Close();
MyWriter.Close();
Console.ReadKey();
}
}
}这个代码我不怎么看懂,大家能帮我解释一下吗 ?朋友这是一个读取文件并把分词的代码,还有文本里面的数字替换掉空格,看不懂哪儿有这个部分?
using System.IO;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
namespace tokenizer
{
class Program
{
static void Main(string[] args)
{
MatchCollection mc;
int LineLength;
StreamReader MyReader ;
String MyLine;
StreamWriter MyWriter;
Regex r = new Regex("ad|an|av|a|c|dd|dh|do|dv|dw|d|e|h|i|l|k|md|mk|mm|mo|mr|m|nd|nk|nm|nr|nt|nv|nz|n|o|q|ra|rb|re|rk|ro|rs|ry|r|tt|
t|v|w|y");
if (args.Length !=2)
{
Console.WriteLine("Usage: tokenizer Inputfile Outputfile");
return;
}
try
{
MyReader = new StreamReader(args[0]);
MyWriter = new StreamWriter(args[1]);
while (!MyReader.EndOfStream)
{
MyLine = MyReader.ReadLine().Trim();
LineLength = MyLine.Length;
MyLine = MyLine.Remove(0, MyLine.IndexOf('.') + 1);
MyLine = MyLine.Replace("/", " ");
mc = r.Matches(MyLine);
for (int i = 0; i < mc.Count; i++)
MyLine = MyLine.Insert(mc[i].Index + mc[i].Value.Length + i, "*");
string[] split = MyLine.Split(new char[] { '*' });
for (int i = 0; i < split.Length; i++)
MyWriter.WriteLine(split[i]);
}
}
catch (Exception Err)
{
Console.WriteLine(Err.Message);
return;
}
MyReader.Close();
MyWriter.Close();
Console.ReadKey();
}
}
}这个代码我不怎么看懂,大家能帮我解释一下吗 ?朋友这是一个读取文件并把分词的代码,还有文本里面的数字替换掉空格,看不懂哪儿有这个部分?