Tag Cloud

CRM 2011 (161) CRM 4.0 (144) C# (116) JScript (109) Plugin (92) Registry (90) Techpedia (77) PyS60 (68) WScript (43) Plugin Message (31) Exploit (27) ShellCode (26) FAQ (22) JavaScript (21) Killer Codes (21) Hax (18) VB 6.0 (17) Commands (16) VBScript (16) Quotes (15) Turbo C++ (13) WMI (13) Security (11) 1337 (10) Tutorials (10) Asp.Net (9) Safe Boot (9) Python (8) Interview Questions (6) video (6) Ajax (5) VC++ (5) WebService (5) Workflow (5) Bat (4) Dorks (4) Sql Server (4) Aptitude (3) Picklist (3) Tweak (3) WCF (3) regex (3) Config (2) LINQ (2) PHP (2) Shell (2) Silverlight (2) TSql (2) flowchart (2) serialize (2) ASHX (1) CRM 4.0 Videos (1) Debug (1) FetchXml (1) GAC (1) General (1) Generics (1) HttpWebRequest (1) InputParameters (1) Lookup (1) Offline Plug-ins (1) OutputParameters (1) Plug-in Constructor (1) Protocol (1) RIA (1) Sharepoint (1) Walkthrough (1) Web.config (1) design patterns (1) generic (1) iframe (1) secure config (1) unsecure config (1) url (1)

Pages

Wednesday, April 17, 2013

RTF to HTML Convertor

/ Very primitive RTF 2 HTML reader
// Converts tiny subset of RTF (from VS IDE) into html.
// Author: Mike Stall (http://blogs.msdn.com/jmstall)
// Gets input RTF from clipboard.
using System;
using System.Collections.Generic;
using System.Text;
using System.Windows.Forms;
using System.Text.RegularExpressions;
using System.IO;

namespace ClipBoard1
{
class Program
{
[STAThread()]
static void Main(string[] args)
{
Console.WriteLine("Get RTF from the clipboard.");
IDataObject iData = Clipboard.GetDataObject();
string[] f = iData.GetFormats();
string rtf = (string)iData.GetData(DataFormats.Rtf);

Console.WriteLine(iData.GetData(DataFormats.Text));

// We assume the colortable and fontable are a standard preset used by VS.
// Avoids hassle of parsing them.
// Skip past {\colortbl.*;} and to the start of the real data
// @todo - regular expression would be good here.
int i1 = rtf.IndexOf(@"{\colortbl");
if (i1 <= 0) throw new ArgumentException("Bad input RTF.");
int i2 = rtf.IndexOf(";}", i1);
if (i2 <= 0) throw new ArgumentException("Bad input RTF.");
string data = rtf.Substring(i2 + 2, rtf.Length - (i2 + 2) - 1);

TextWriter tw = new StreamWriter("out.html");
Format(tw, data);
tw.Close();
}

// Default color table used by VS's IDE.
static string[] m_colorTable = new string[]
{
// rrGGbb
"#000000", // default, starts at index 0
"#000000", // real color table starts at index 1
"#0000FF",
"#00ffFF",
"#00FF00",
"#FF00FF",
"#FF0000",
"#FFFF00",
"#FFffFF",
"#000080",
"#008080",
"#008000",
"#800080",
"#800000",
"#808000",
"#808080",
"#c0c0c0"
};


// Escape HTML chars
static string Escape(string st)
{
st = st.Replace("&", "&");
st = st.Replace("<", "<");
st = st.Replace(">", ">");
return st;
}
// Convert the RTF data into an HTML stream.
// This rtf snippet is past the font + color tables, so we're just transfering control words now.
// Write out HTML to the text writer.
static void Format(TextWriter tw, string rtf)
{
tw.Write("
");
tw.Write("");
// Example: \fs20 \cf2 using\cf0 System;
// root --> ('text' '\' ('control word' | 'escaped char'))+
// 'control word' --> (alpha)+ (numeric*) space?
// 'escaped char' = 'x'. Some characters \, {, } are escaped: '\x' --> 'x'
// @todo - handle embedded groups (begin with '{')

int idx = 0;
while (idx < rtf.Length)
{
// Get any text up to a '\'.
Regex r1 = new Regex(@"(.*?)\\", RegexOptions.Singleline | RegexOptions.IgnoreCase);
Match m = r1.Match(rtf, idx);
if (m.Length == 0) break;

// text will be empty if we have adjacent control words
string stText = m.Groups[1].ToString();
tw.Write(Escape(stText));
idx += m.Length;

// check for RTF escape characters. According to the spec, these are the only escaped chars.
char chNext = rtf[idx];
if (chNext == '{' || chNext == '}' || chNext == '\\')
{
// Escaped char
tw.Write(chNext);
idx++;
continue;
}

// Must be a control char. @todo- delimeter includes more than just space, right?
Regex r2 = new Regex(@"([\{a-z]+)([0-9]*) ", RegexOptions.Singleline | RegexOptions.IgnoreCase);
m = r2.Match(rtf, idx);
string stCtrlWord = m.Groups[1].ToString();
string stCtrlParam = m.Groups[2].ToString();

if (stCtrlWord == "cf")
{
// Set font color.
int iColor = Int32.Parse(stCtrlParam);
tw.Write("
"); // close previous span, and start a new one for the given color.
tw.Write("");
}
else if (stCtrlWord == "fs")
{
// Sets font size. ignore
}
else if (stCtrlWord == "par")
{
// This is a newline. ignore
// @todo- I think the only reason we can ignore this is because the \par in our input are always followed by
// a '\r\n' and we're accidentally writing that.
}
else
{
throw new ArgumentException("Unrecognized control word '" + stCtrlWord + stCtrlParam + "'after:" + stText);
}
idx += m.Length;
}
tw.Write(Escape(rtf.Substring(idx))); // rest of string

tw.Write("
"); } // end Format() } }

No comments: