To convert a PDF file to a DOCX file using C#, you can use a third-party library like iTextSharp or Microsoft Word Interop. Here is an example using iTextSharp:
using iTextSharp.text.pdf;
using iTextSharp.text.pdf.parser;
using System.IO;
using System.Text;
namespace PDFToDocxConverter
{
class Program
{
static void Main(string[] args)
{
string inputFilePath = @ "C:\Documents\input.pdf";
string outputFilePath = @ "C:\Documents\output.docx";
string pdfText = ExtractTextFromPDF(inputFilePath);
WriteTextToDOCX(pdfText, outputFilePath);
}
static string ExtractTextFromPDF(string filePath)
{
StringBuilder text = new StringBuilder();
using(PdfReader reader = new PdfReader(filePath))
{
for (int i = 1; i <= reader.NumberOfPages; i++)
{
text.Append(PdfTextExtractor.GetTextFromPage(reader, i));
}
}
return text.ToString();
}
static void WriteTextToDOCX(string text, string filePath)
{
Microsoft.Office.Interop.Word.Application wordApp = new Microsoft.Office.Interop.Word.Application();
Microsoft.Office.Interop.Word.Document doc = wordApp.Documents.Add();
Microsoft.Office.Interop.Word.Range range = doc.Range();
range.Text = text;
doc.SaveAs2(filePath, Microsoft.Office.Interop.Word.WdSaveFormat.wdFormatXMLDocument);
doc.Close();
wordApp.Quit();
}
}
}
Note that you need to add a reference to the iTextSharp and Microsoft Word Interop libraries in your project. Also, the Microsoft Word Interop library requires that Microsoft Word is installed on the machine where the conversion is running.