To work with documents Word and Excel I use DocumentFormat.OpenXml and to work with pdf I use TallComponents.PDFKit.
Why? Because MS SharePoint server keeps source files (documents) on the server, but in the client part we work with a copy of the document. I will give 4 code examples for MS Word (doc, docx), Excel (xls, xlsx), Pdf and Txt. I hope these examples will help you in the future in your projects.
1. Word (DocumentFormat.OpenXml):
//get file library
SPFile file = item.File;
string value = file.ToString();
int index = (value.LastIndexOf('/') + 1);
string fileName = value.Substring(index);
string FileExtension = fileName.Substring(fileName.LastIndexOf('.') + 1).ToLower();
//condition
if (FileExtension == "doc" || FileExtension == "docx")
{
if (file.Exists)
{
//use file Stream
byte[] byteArray = file.OpenBinary();
using (MemoryStream memStr = new MemoryStream())
{
memStr.Write(byteArray, 0, (int)byteArray.Length);
using (WordprocessingDocument wordDoc = WordprocessingDocument.Open(memStr, false))
{
Document document = wordDoc.MainDocumentPart.Document;
var body = document.MainDocumentPart.Document.Body;
//search in body
foreach (var text in body.Descendants<Text>())
{
//use case sensetive
if (text.Text.IndexOf("test", 0, StringComparison.CurrentCultureIgnoreCase) != -1)
{
Console.WriteLine(file.ToString());
}
}
}
}
}
}
2. Excel (DocumentFormat.OpenXml)://condition
if (FileExtension == "xls" || FileExtension == "xlsx")
{
if (file.Exists)
{
//use file Stream
byte[] byteArray = file.OpenBinary();
using (MemoryStream memStr = new MemoryStream())
{
memStr.Write(byteArray, 0, (int)byteArray.Length);
using (SpreadsheetDocument document = SpreadsheetDocument.Open(memStr, false))
{
SharedStringTable sharedStringTable = document.WorkbookPart.SharedStringTablePart.SharedStringTable;
string cellValue = null;
foreach (WorksheetPart worksheetPart in document.WorkbookPart.WorksheetParts)
{
//get List excel document
foreach (SheetData sheetData in worksheetPart.Worksheet.Elements<SheetData>())
{
if (sheetData.HasChildren)
{
//get Row excel document
foreach (Row row in sheetData.Elements<Row>())
{
//get Cell excel document
foreach (Cell cell in row.Elements<Cell>())
{
cellValue = cell.InnerText;
if (cell.DataType == CellValues.SharedString)
{
cellValue = document.WorkbookPart.GetPartsOfType<SharedStringTablePart>().FirstOrDefault().SharedStringTable.ElementAt(int.Parse(cell.CellValue.Text)).InnerText;
//use case sensetive
if (cellValue.IndexOf("test", 0, StringComparison.CurrentCultureIgnoreCase) != -1)
{
Console.WriteLine(file.ToString());
}
}
}
}
}
}
}
}
}
}
}
3. Pdf (TallComponents.PDFKit):
//condition
if (FileExtension == "pdf")
{
if (file.Exists)
{
//use file Stream
byte[] byteArray = file.OpenBinary();
using (MemoryStream memStr = new MemoryStream())
{
memStr.Write(byteArray, 0, (int)byteArray.Length);
{
TallComponents.PDF.Document document = new TallComponents.PDF.Document(memStr);
TextFindCriteria criteria = new TextFindCriteria("test", false, false);
TextMatchEnumerator enumerator = document.Find(criteria);
foreach (TextMatch match in enumerator)
{
Console.WriteLine(file.ToString());
}
}
}
}
}
4. Txt (System.IO namespace):
//condition
if (FileExtension == "txt")
{
if (file.Exists)
{
byte[] byteArray = file.OpenBinary();
using (StreamReader reader = new StreamReader(file.OpenBinaryStream()))
{
string content = String.Empty;
content = reader.ReadToEnd();
if (content.IndexOf("test", 0, StringComparison.CurrentCultureIgnoreCase) != -1)
{
Console.WriteLine(file.ToString());
}
}
}
}
Happy Coding!
No comments:
Post a Comment