How to get bold, italic and underlined words from plain text and surround them with HTML tags

What I want to achieve:

Input: (Input text comes from Excel cell)

This is the line includes bold , italic, and underlined words.

Expected Result:

This is a <b>string</b> includes <b>bold</b>, <i>italic</i> and <u>underlined</u> words.

      

What I've tried: (This method iterates over plain text by characters, not words.)

        StringBuilder html = new StringBuilder();
        StringBuilder fontText = new StringBuilder();
        string path = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "Test.xls");
        Application excel = new Application();
        Workbook wb = excel.Workbooks.Open(path);
        Worksheet excelSheet = wb.ActiveSheet;
        //Read the first cell
        Range cell = excelSheet.Cells[1, 1];
          for (int index = 1; index <= cell.Text.ToString().Length; index++)
            {
              //cell here is a Range object
              Characters ch = cell.get_Characters(index, 1);
              bool bold = (bool) ch.Font.Bold;
              if(bold){
                 if (html.Length == 0)
                      html.Append("<b>");
                 html.Append(ch.Text);
           }
      }
      if (html.Length !=0) html.Append("</b>")

      

But this method returns all bold texts surrounded by HTML tags like <b>stringbold</b>

Expected result: <b>string</b>

and<b>bold</b>

Any great thoughts on this?

Thanks in advance.

+3


source to share


2 answers


It took half my day to figure out this solution.

1. Code works with Bold , Italic and Underline characters.

2. The algorithm is a little more complicated. If any optimization is available or anyone comes up with a better solution, please post Reply.

ExcelReader

:

public string ExcelReader(string excelFilePath)
    {
        StringBuilder resultText = new StringBuilder();
        //string excelFilePath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "Test.xls");
        Application excel = new Application();
        Workbook wb = excel.Workbooks.Open(excelFilePath);
        Worksheet excelSheet = wb.ActiveSheet;
        //Read the first cell
        Range cell = excelSheet.Cells[1, 1];

        //Check if one bold or italic WORD.
        bool IfStop = false;
        //Check if character is the start of bold or italic character.
        bool ifFirstSpecialCharacter = true;
        //Initialize a empty tag
        string tag = "";
        //Check if it is the last index
        bool isLastIndex = false;
        for (int index = 1; index <= cell.Text.ToString().Length; index++)
        {
            //Check if the current character is bold or italic
            bool IfSpecialType = false;
            //cell here is a Range object
            Characters ch = cell.get_Characters(index, 1);
            XlUnderlineStyle temp = (XlUnderlineStyle)ch.Font.Underline;
            bool underline = false;

            if (temp == XlUnderlineStyle.xlUnderlineStyleSingle)
                underline = true;

            bool bold = (bool)ch.Font.Bold;
            bool italic = (bool)ch.Font.Italic;

            if (underline)
            {
                if (tag != "" && tag != "<u>")
                {
                    resultText.Append(tag.Insert(1, "/"));
                    ifFirstSpecialCharacter = true;
                    IfStop = true;
                }
                tag = "<u>";
                IfSpecialType = true;
            }
            if (bold)
            {
                if (tag != "" && tag != "<b>")
                {
                    resultText.Append(tag.Insert(1, "/"));
                    ifFirstSpecialCharacter = true;
                    IfStop = true;
                }
                tag = "<b>";
                IfSpecialType = true;
            }
            if (italic)
            {
                if (tag != "" && tag != "<i>")
                {
                    resultText.Append(tag.Insert(1, "/"));
                    ifFirstSpecialCharacter = true;
                    IfStop = true;
                }
                tag = "<i>";
                IfSpecialType = true;
            }
            if (index == cell.Text.ToString().Length)
                isLastIndex = true;
            DetectSpecialCharracterByType(isLastIndex, resultText, ref tag, IfSpecialType, ref IfStop, ref ifFirstSpecialCharacter, ch);
        }
        wb.Close();
        return resultText.ToString();
    }

      



DetectSpecialCharacterByType

:

private static void DetectSpecialCharacterByType(bool isLastIndex, StringBuilder fontText, ref string tag, bool ifSpecialType, ref bool IfStop, ref bool ifFirstSpecialCharacter, Characters ch)
    {
        if (ifSpecialType)
        {
            //If it is the first character of the word, put the <b> or <i> at the beginning.
            if (ifFirstSpecialCharacter)
            {
                fontText.Append(tag);
                ifFirstSpecialCharacter = false;
                IfStop = false;
            }
            //This is a edge case.If the last word of the text is bold or italic, put the </b> or </i>
            if (isLastIndex)
            {
                fontText.Append(ch.Text);
                fontText.Append(tag.Insert(1, "/"));
            }
            else
                fontText.Append(ch.Text);
        }
        else
        {
            //If it is the last character of one word, add </b> or </i> at the end.
            if (!IfStop && tag != "")
            {
                fontText.Append(tag.Insert(1, "/"));
                IfStop = true;
                ifFirstSpecialCharacter = true;
                tag = "";
            }
            fontText.Append(ch.Text);
        }
    }

      

Code works great, just copy paste and add new link Microsoft.Office.Interop.Excel

+2


source


Here's what I will do:

  • Create a helper class that knows about Font styles and their opening and closing tags and that can keep track of the "current" font style

  • Run the class with normal style, and then in a loop, ask the helper class to insert opening and closing tags if the font style has changed before writing the current character

  • At the end of the loop, ask the helper to insert the correct closing tag

I don't have an Excel interop project, so here's an example that you might need to adapt to specific Excel font types.

First the helper class:



static class TextHelper
{
    // You may have to use a different type than `FontStyle` 
    // Hopefully ch.Font has some type of `Style` property you can use
    public static FontStyle CurrentStyle { get; set; }
    public static string OpenTag { get { return GetOpenTag(); } }
    public static string CloseTag { get { return GetCloseTag(); } }

    // This will return the closing tag for the current font style, 
    // followed by the opening tag for the new font style
    public static string ChangeStyleIfNeeded(FontStyle newStyle)
    {
        if (newStyle == CurrentStyle) return string.Empty;

        var transitionStyleTags = GetCloseTag();
        CurrentStyle = newStyle;
        transitionStyleTags += GetOpenTag();

        return transitionStyleTags;
    }

    private static string GetOpenTag()
    {
        switch (CurrentStyle)
        {
            case FontStyle.Bold:
                return "<b>";
            case FontStyle.Italic:
                return "<i>";
            case FontStyle.Underline:
                return "<u>";
            default:
                return "";
        }
    }

    private static string GetCloseTag()
    {
        switch (CurrentStyle)
        {
            case FontStyle.Bold:
                return "</b>";
            case FontStyle.Italic:
                return "</i>";
            case FontStyle.Underline:
                return "</u>";
            default:
                return "";
        }
    }
}

      

Further, the implementation will look something like this:

// Start our helper class with 'Regular' font
TextHelper.CurrentStyle = FontStyle.Regular;
var html = new StringBuilder();

for (int index = 1; index <= cell.Text.ToString().Length; index++)
{
    char ch = cell.get_Characters(index, 1);

    // If the Font of this character is different than the current font, 
    // this will close the old style and open our new style.
    html.Append(TextHelper.ChangeStyleIfNeeded(ch.Font));

    // Append this character
    html.Append(ch.Text);
}

// Close the style at the very end
html.Append(TextHelper.CloseTag);

      

+3


source







All Articles