How to parse a string for headwords
I have this line: " Mimi loves Toto and Tata hate Mimi so Toto killed Tata"
I want to write code that only prints words starting with capital letters, avoiding repetition
The output should look like
Mimi
Toto
Tata
I tried to do this, but I'm pretty sure it is wrong, even if no errors are displayed.
The code I wrote:
static void Main(string[] args)
{
string s = "Memi ate Toto and she killed Tata Memi also hate Biso";
Console.WriteLine((spliter(s)));
}
public static string spliter(string s)
{
string x = s;
Regex exp = new Regex(@"[A-Z]");
MatchCollection M = exp.Matches(s);
foreach (Match t in M)
{
while (x != null)
{
x = t.Value;
}
}
return x;
}
}
}
Idea:
What if I split the string into an array and then apply a regex to check them word by word and then print the results? I don't know if anyone can help me in generating this code?
I don't know C # /. NET regex lib at all, but this regex pattern will do it:
\b[A-Z][a-z]+
the \ b means that the match can only start at the beginning of a word. change + to * if you want to allow single capitals.
Edit: do you want to combine "McDonald's"?
\b[A-Z][A-Za-z']+
If you don't want to match "if it only appears at the end of the line, just do this:
\b[A-Z][A-Za-z']+(?<!')
source to share
I'm not sure why I am posting this ...
string[] foo = "Mimi loves Toto and Tata hate Mimi so Toto killed Tata".Split(' ');
HashSet<string> words = new HashSet<string>();
foreach (string word in foo)
{
if (char.IsUpper(word[0]))
{
words.Add(word);
}
}
foreach (string word in words)
{
Console.WriteLine(word);
}
source to share
C # 3
string z = "Mimi loves Toto and Tata hate Mimi so Toto killed Tata";
var wordsWithCapital = z.Split(' ').Where(word => char.IsUpper(word[0])).Distinct();
MessageBox.Show( string.Join(", ", wordsWithCapital.ToArray()) );
C # 2
Dictionary<string,int> distinctWords = new Dictionary<string,int>();
string[] wordsWithInitCaps = z.Split(' ');
foreach (string wordX in wordsWithInitCaps)
if (char.IsUpper(wordX[0]))
if (!distinctWords.ContainsKey(wordX))
distinctWords[wordX] = 1;
else
++distinctWords[wordX];
foreach(string k in distinctWords.Keys)
MessageBox.Show(k + ": " + distinctWords[k].ToString());
source to share
Decision. Note the use of the inline line separator. You can replace the toupper material by checking if the first character is between "A" and "Z". By removing duplicates, I leave it to you (use a hashset if you like).
static void Main(string[] args)
{
string test = " Mimi loves Toto and Tata hate Mimi so Toto killed Tata";
foreach (string j in test.Split(' '))
{
if (j.Length > 0)
{
if (j.ToUpper()[0] == j[0])
{
Console.WriteLine(j);
}
}
}
Console.ReadKey(); //Press any key to continue;
}
source to share
Since others have posted so many answers already, I don't feel like I'm breaking the homework rules to show this:
//set up the string to be searched
string source =
"First The The Quick Red fox jumped oveR A Red Lazy BRown DOg";
//new up a Regex object.
Regex myReg = new Regex(@"(\b[A-Z]\w*)");
//Get the matches, turn then into strings, de-dupe them
IEnumerable<string> results =
myReg.Matches(source)
.OfType<Match>()
.Select(m => m.Value)
.Distinct();
//print out the strings.
foreach (string s in results)
Console.WriteLine(s);
source to share
string foo = "Mimi loves Toto and Tata hate Mimi so Toto killed Tata";
char[] separators = {' '};
IList<string> capitalizedWords = new List<string>();
string[] words = foo.Split(separators);
foreach (string word in words)
{
char c = char.Parse(word.Substring(0, 1));
if (char.IsUpper(c))
{
capitalizedWords.Add(word);
}
}
foreach (string s in capitalizedWords)
{
Console.WriteLine(s);
}
source to share
David B's answer is the best, he takes into account the word stopper. One voice.
To add something to his answer:
Func<string,bool,string> CaptureCaps = (source,caseInsensitive) => string.Join(" ",
new Regex(@"\b[A-Z]\w*").Matches(source).OfType<Match>().Select(match => match.Value).Distinct(new KeisInsensitiveComparer(caseInsensitive) ).ToArray() );
MessageBox.Show(CaptureCaps("First The The Quick Red fox jumped oveR A Red Lazy BRown DOg", false));
MessageBox.Show(CaptureCaps("Mimi loves Toto. Tata hate Mimi, so Toto killed TaTa. A bad one!", false));
MessageBox.Show(CaptureCaps("First The The Quick Red fox jumped oveR A Red Lazy BRown DOg", true));
MessageBox.Show(CaptureCaps("Mimi loves Toto. Tata hate Mimi, so Toto killed TaTa. A bad one!", true));
class KeisInsensitiveComparer : IEqualityComparer<string>
{
public KeisInsensitiveComparer() { }
bool _caseInsensitive;
public KeisInsensitiveComparer(bool caseInsensitive) { _caseInsensitive = caseInsensitive; }
// Products are equal if their names and product numbers are equal.
public bool Equals(string x, string y)
{
// Check whether the compared objects reference the same data.
if (Object.ReferenceEquals(x, y)) return true;
// Check whether any of the compared objects is null.
if (Object.ReferenceEquals(x, null) || Object.ReferenceEquals(y, null))
return false;
return _caseInsensitive ? x.ToUpper() == y.ToUpper() : x == y;
}
// If Equals() returns true for a pair of objects,
// GetHashCode must return the same value for these objects.
public int GetHashCode(string s)
{
// Check whether the object is null.
if (Object.ReferenceEquals(s, null)) return 0;
// Get the hash code for the Name field if it is not null.
int hashS = s == null ? 0 : _caseInsensitive ? s.ToUpper().GetHashCode() : s.GetHashCode();
// Get the hash code for the Code field.
int hashScode = _caseInsensitive ? s.ToUpper().GetHashCode() : s.GetHashCode();
// Calculate the hash code for the product.
return hashS ^ hashScode;
}
}
source to share
static Regex _capitalizedWordPattern = new Regex(@"\b[A-Z][a-z]*\b", RegexOptions.Compiled | RegexOptions.Multiline);
public static IEnumerable<string> GetDistinctOnlyCapitalizedWords(string text)
{
return _capitalizedWordPattern.Matches(text).Cast<Match>().Select(m => m.Value).Distinct();
}
source to share