|
@@ -9,7 +9,8 @@ namespace OASystem.API.OAMethodLib.FileProcessing
|
|
|
/// <summary>
|
|
|
/// 文件处理服务,使用Aspose.Words处理Word文档
|
|
|
/// </summary>
|
|
|
- public class FileProcessingService : IFileProcessingService
|
|
|
+ public class FileProcessingService
|
|
|
+ //: IFileProcessingService
|
|
|
{
|
|
|
private readonly ILogger<FileProcessingService> _logger;
|
|
|
private readonly IConfiguration _configuration;
|
|
@@ -48,338 +49,608 @@ namespace OASystem.API.OAMethodLib.FileProcessing
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- /// <summary>
|
|
|
- /// 处理Word文档并提取结构化信息
|
|
|
- /// </summary>
|
|
|
- public async Task<ProcessingResult> ProcessWordDocumentAsync(IFormFile file)
|
|
|
- {
|
|
|
- var stopwatch = System.Diagnostics.Stopwatch.StartNew();
|
|
|
-
|
|
|
- try
|
|
|
- {
|
|
|
- _logger.LogInformation("开始处理Word文档: {FileName} ({Size} bytes)",
|
|
|
- file.FileName, file.Length);
|
|
|
-
|
|
|
- if (!IsSupportedWordFormat(file.FileName))
|
|
|
- {
|
|
|
- return new ProcessingResult
|
|
|
- {
|
|
|
- Success = false,
|
|
|
- ErrorMessage = $"不支持的文件格式: {Path.GetExtension(file.FileName)}",
|
|
|
- FileSize = file.Length,
|
|
|
- FileType = file.ContentType
|
|
|
- };
|
|
|
- }
|
|
|
-
|
|
|
- using var stream = new MemoryStream();
|
|
|
- await file.CopyToAsync(stream);
|
|
|
- stream.Position = 0;
|
|
|
-
|
|
|
- var documentInfo = await ExtractDocumentInfoAsync(stream, file.FileName);
|
|
|
-
|
|
|
- stopwatch.Stop();
|
|
|
-
|
|
|
- _logger.LogInformation("Word文档处理完成: {FileName}, 耗时: {ElapsedMs}ms",
|
|
|
- file.FileName, stopwatch.ElapsedMilliseconds);
|
|
|
-
|
|
|
- return new ProcessingResult
|
|
|
- {
|
|
|
- Success = true,
|
|
|
- Data = documentInfo,
|
|
|
- FileSize = file.Length,
|
|
|
- FileType = file.ContentType
|
|
|
- };
|
|
|
- }
|
|
|
- catch (Exception ex)
|
|
|
- {
|
|
|
- stopwatch.Stop();
|
|
|
- _logger.LogError(ex, "Word文档处理失败: {FileName}", file.FileName);
|
|
|
-
|
|
|
- return new ProcessingResult
|
|
|
- {
|
|
|
- Success = false,
|
|
|
- ErrorMessage = $"处理失败: {ex.Message}",
|
|
|
- FileSize = file.Length,
|
|
|
- FileType = file.ContentType
|
|
|
- };
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- /// <summary>
|
|
|
- /// 批量处理Word文档
|
|
|
- /// </summary>
|
|
|
- public async Task<List<ProcessingResult>> ProcessWordDocumentsAsync(List<IFormFile> files)
|
|
|
- {
|
|
|
- var results = new List<ProcessingResult>();
|
|
|
- var tasks = files.Select(ProcessWordDocumentAsync).ToList();
|
|
|
-
|
|
|
- var batchResults = await Task.WhenAll(tasks);
|
|
|
- results.AddRange(batchResults);
|
|
|
-
|
|
|
- return results;
|
|
|
- }
|
|
|
-
|
|
|
- /// <summary>
|
|
|
- /// 从流中提取文档信息
|
|
|
- /// </summary>
|
|
|
- public async Task<WordDocumentInfo> ExtractDocumentInfoAsync(Stream stream, string fileName)
|
|
|
- {
|
|
|
- var stopwatch = System.Diagnostics.Stopwatch.StartNew();
|
|
|
-
|
|
|
- try
|
|
|
- {
|
|
|
- // 加载Word文档
|
|
|
- var doc = new Document(stream);
|
|
|
-
|
|
|
- var documentInfo = new WordDocumentInfo
|
|
|
- {
|
|
|
- Title = ExtractDocumentTitle(doc),
|
|
|
- Content = ExtractTextContent(doc),
|
|
|
- Metadata = ExtractMetadata(doc),
|
|
|
- Tables = ExtractTables(doc),
|
|
|
- FormFields = ExtractFormFields(doc),
|
|
|
- Sections = ExtractSections(doc),
|
|
|
- ImagesCount = CountImages(doc)
|
|
|
- };
|
|
|
-
|
|
|
- stopwatch.Stop();
|
|
|
- documentInfo.ProcessingTimeMs = stopwatch.ElapsedMilliseconds;
|
|
|
-
|
|
|
- return documentInfo;
|
|
|
- }
|
|
|
- catch (Exception ex)
|
|
|
- {
|
|
|
- _logger.LogError(ex, "文档信息提取失败: {FileName}", fileName);
|
|
|
- throw;
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- /// <summary>
|
|
|
- /// 提取文档标题
|
|
|
- /// </summary>
|
|
|
- private string ExtractDocumentTitle(Document doc)
|
|
|
- {
|
|
|
- try
|
|
|
- {
|
|
|
- // 首先尝试从文档属性获取标题
|
|
|
- var title = doc.BuiltInDocumentProperties.Title;
|
|
|
- if (!string.IsNullOrEmpty(title))
|
|
|
- return title;
|
|
|
-
|
|
|
- // 如果没有标题,尝试从第一个段落提取
|
|
|
- foreach (Aspose.Words.Paragraph paragraph in doc.GetChildNodes(NodeType.Paragraph, true))
|
|
|
- {
|
|
|
- var text = paragraph.GetText().Trim();
|
|
|
- if (!string.IsNullOrEmpty(text) && text.Length < 100) // 假设标题不会太长
|
|
|
- return text;
|
|
|
- }
|
|
|
-
|
|
|
- return "未命名文档";
|
|
|
- }
|
|
|
- catch (Exception ex)
|
|
|
- {
|
|
|
- _logger.LogWarning(ex, "提取文档标题失败");
|
|
|
- return "未命名文档";
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- /// <summary>
|
|
|
- /// 提取文本内容
|
|
|
- /// </summary>
|
|
|
- private string ExtractTextContent(Document doc)
|
|
|
- {
|
|
|
- try
|
|
|
- {
|
|
|
- // 使用Aspose.Words的GetText方法提取纯文本
|
|
|
- return doc.GetText();
|
|
|
- }
|
|
|
- catch (Exception ex)
|
|
|
- {
|
|
|
- _logger.LogWarning(ex, "提取文本内容失败");
|
|
|
- return string.Empty;
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- /// <summary>
|
|
|
- /// 提取文档元数据
|
|
|
- /// </summary>
|
|
|
- private DocumentMetadata ExtractMetadata(Document doc)
|
|
|
- {
|
|
|
- try
|
|
|
- {
|
|
|
- var props = doc.BuiltInDocumentProperties;
|
|
|
-
|
|
|
- return new DocumentMetadata
|
|
|
- {
|
|
|
- Author = props.Author ?? string.Empty,
|
|
|
- Company = props.Company ?? string.Empty,
|
|
|
- CreatedTime = props.CreatedTime,
|
|
|
- LastSavedTime = props.LastSavedTime,
|
|
|
- PageCount = doc.PageCount,
|
|
|
- WordCount = props.Words,
|
|
|
- CharacterCount = props.Characters,
|
|
|
- Subject = props.Subject ?? string.Empty,
|
|
|
- Keywords = props.Keywords ?? string.Empty
|
|
|
- };
|
|
|
- }
|
|
|
- catch (Exception ex)
|
|
|
- {
|
|
|
- _logger.LogWarning(ex, "提取元数据失败");
|
|
|
- return new DocumentMetadata();
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- /// <summary>
|
|
|
- /// 提取表格数据
|
|
|
- /// </summary>
|
|
|
- private List<DocumentTable> ExtractTables(Document doc)
|
|
|
- {
|
|
|
- var tables = new List<DocumentTable>();
|
|
|
-
|
|
|
- try
|
|
|
- {
|
|
|
- int tableIndex = 1;
|
|
|
- foreach (Aspose.Words.Tables.Table table in doc.GetChildNodes(NodeType.Table, true))
|
|
|
- {
|
|
|
- var docTable = new DocumentTable
|
|
|
- {
|
|
|
- TableName = $"表格_{tableIndex}"
|
|
|
- };
|
|
|
-
|
|
|
- // 提取表头(假设第一行是表头)
|
|
|
- if (table.Rows.Count > 0)
|
|
|
- {
|
|
|
- var firstRow = table.FirstRow;
|
|
|
- foreach (Cell cell in firstRow.Cells)
|
|
|
- {
|
|
|
- docTable.Headers.Add(cell.GetText().Trim());
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- // 提取所有行数据
|
|
|
- foreach (Row row in table.Rows)
|
|
|
- {
|
|
|
- var rowData = new List<string>();
|
|
|
- foreach (Cell cell in row.Cells)
|
|
|
- {
|
|
|
- rowData.Add(cell.GetText().Trim());
|
|
|
- }
|
|
|
- docTable.Rows.Add(rowData);
|
|
|
- }
|
|
|
-
|
|
|
- tables.Add(docTable);
|
|
|
- tableIndex++;
|
|
|
- }
|
|
|
- }
|
|
|
- catch (Exception ex)
|
|
|
- {
|
|
|
- _logger.LogWarning(ex, "提取表格数据失败");
|
|
|
- }
|
|
|
-
|
|
|
- return tables;
|
|
|
- }
|
|
|
-
|
|
|
- /// <summary>
|
|
|
- /// 提取表单字段
|
|
|
- /// </summary>
|
|
|
- private List<FormField> ExtractFormFields(Document doc)
|
|
|
- {
|
|
|
- var formFields = new List<FormField>();
|
|
|
-
|
|
|
- try
|
|
|
- {
|
|
|
- foreach (FormField formField in doc.Range.FormFields)
|
|
|
- {
|
|
|
- formFields.Add(new FormField
|
|
|
- {
|
|
|
- Name = formField.Name,
|
|
|
- Type = formField.Type.ToString()
|
|
|
- //,
|
|
|
- //Value = formField.Result ?? string.Empty,
|
|
|
- //IsChecked = formField.Checked
|
|
|
- });
|
|
|
- }
|
|
|
- }
|
|
|
- catch (Exception ex)
|
|
|
- {
|
|
|
- _logger.LogWarning(ex, "提取表单字段失败");
|
|
|
- }
|
|
|
-
|
|
|
- return formFields;
|
|
|
- }
|
|
|
-
|
|
|
- /// <summary>
|
|
|
- /// 提取章节信息
|
|
|
- /// </summary>
|
|
|
- private List<DocumentSection> ExtractSections(Document doc)
|
|
|
- {
|
|
|
- var sections = new List<DocumentSection>();
|
|
|
-
|
|
|
- try
|
|
|
- {
|
|
|
- int sectionIndex = 1;
|
|
|
- foreach (Section section in doc.Sections)
|
|
|
- {
|
|
|
- var docSection = new DocumentSection
|
|
|
- {
|
|
|
- SectionNumber = sectionIndex,
|
|
|
- Content = section.GetText(),
|
|
|
- ParagraphsCount = section.Body.Paragraphs.Count,
|
|
|
- TablesCount = section.Body.Tables.Count
|
|
|
- };
|
|
|
-
|
|
|
- sections.Add(docSection);
|
|
|
- sectionIndex++;
|
|
|
- }
|
|
|
- }
|
|
|
- catch (Exception ex)
|
|
|
- {
|
|
|
- _logger.LogWarning(ex, "提取章节信息失败");
|
|
|
- }
|
|
|
-
|
|
|
- return sections;
|
|
|
- }
|
|
|
-
|
|
|
- /// <summary>
|
|
|
- /// 统计图片数量
|
|
|
- /// </summary>
|
|
|
- private int CountImages(Document doc)
|
|
|
- {
|
|
|
- try
|
|
|
- {
|
|
|
- int imageCount = 0;
|
|
|
- foreach (Aspose.Words.Drawing.Shape shape in doc.GetChildNodes(NodeType.Shape, true))
|
|
|
- {
|
|
|
- if (shape.HasImage)
|
|
|
- {
|
|
|
- imageCount++;
|
|
|
- }
|
|
|
- }
|
|
|
- return imageCount;
|
|
|
- }
|
|
|
- catch (Exception ex)
|
|
|
- {
|
|
|
- _logger.LogWarning(ex, "统计图片数量失败");
|
|
|
- return 0;
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- /// <summary>
|
|
|
- /// 检查文件是否为支持的Word格式
|
|
|
- /// </summary>
|
|
|
- public bool IsSupportedWordFormat(string fileName)
|
|
|
- {
|
|
|
- var extension = Path.GetExtension(fileName).ToLower();
|
|
|
- return extension switch
|
|
|
- {
|
|
|
- ".doc" => true,
|
|
|
- ".docx" => true,
|
|
|
- ".dot" => true,
|
|
|
- ".dotx" => true,
|
|
|
- ".docm" => true,
|
|
|
- ".dotm" => true,
|
|
|
- _ => false
|
|
|
- };
|
|
|
- }
|
|
|
+ ///// <summary>
|
|
|
+ ///// 处理Word文档并提取结构化信息
|
|
|
+ ///// </summary>
|
|
|
+ //public async Task<ProcessingResult> ProcessWordDocumentAsync(IFormFile file)
|
|
|
+ //{
|
|
|
+ // var stopwatch = System.Diagnostics.Stopwatch.StartNew();
|
|
|
+
|
|
|
+ // try
|
|
|
+ // {
|
|
|
+ // _logger.LogInformation("开始处理Word文档: {FileName} ({Size} bytes)",
|
|
|
+ // file.FileName, file.Length);
|
|
|
+
|
|
|
+ // if (!IsSupportedWordFormat(file.FileName))
|
|
|
+ // {
|
|
|
+ // return new ProcessingResult
|
|
|
+ // {
|
|
|
+ // Success = false,
|
|
|
+ // ErrorMessage = $"不支持的文件格式: {Path.GetExtension(file.FileName)}",
|
|
|
+ // FileSize = file.Length,
|
|
|
+ // FileType = file.ContentType
|
|
|
+ // };
|
|
|
+ // }
|
|
|
+
|
|
|
+ // using var stream = new MemoryStream();
|
|
|
+ // await file.CopyToAsync(stream);
|
|
|
+ // stream.Position = 0;
|
|
|
+
|
|
|
+ // var documentInfo = await ExtractDocumentInfoAsync(stream, file.FileName);
|
|
|
+
|
|
|
+ // stopwatch.Stop();
|
|
|
+
|
|
|
+ // _logger.LogInformation("Word文档处理完成: {FileName}, 耗时: {ElapsedMs}ms",
|
|
|
+ // file.FileName, stopwatch.ElapsedMilliseconds);
|
|
|
+
|
|
|
+ // return new ProcessingResult
|
|
|
+ // {
|
|
|
+ // Success = true,
|
|
|
+ // Data = documentInfo,
|
|
|
+ // FileSize = file.Length,
|
|
|
+ // FileType = file.ContentType
|
|
|
+ // };
|
|
|
+ // }
|
|
|
+ // catch (Exception ex)
|
|
|
+ // {
|
|
|
+ // stopwatch.Stop();
|
|
|
+ // _logger.LogError(ex, "Word文档处理失败: {FileName}", file.FileName);
|
|
|
+
|
|
|
+ // return new ProcessingResult
|
|
|
+ // {
|
|
|
+ // Success = false,
|
|
|
+ // ErrorMessage = $"处理失败: {ex.Message}",
|
|
|
+ // FileSize = file.Length,
|
|
|
+ // FileType = file.ContentType
|
|
|
+ // };
|
|
|
+ // }
|
|
|
+ //}
|
|
|
+
|
|
|
+ ///// <summary>
|
|
|
+ ///// 批量处理Word文档
|
|
|
+ ///// </summary>
|
|
|
+ //public async Task<List<ProcessingResult>> ProcessWordDocumentsAsync(List<IFormFile> files)
|
|
|
+ //{
|
|
|
+ // var results = new List<ProcessingResult>();
|
|
|
+ // var tasks = files.Select(ProcessWordDocumentAsync).ToList();
|
|
|
+
|
|
|
+ // var batchResults = await Task.WhenAll(tasks);
|
|
|
+ // results.AddRange(batchResults);
|
|
|
+
|
|
|
+ // return results;
|
|
|
+ //}
|
|
|
+
|
|
|
+ ///// <summary>
|
|
|
+ ///// 从流中提取文档信息
|
|
|
+ ///// </summary>
|
|
|
+ //public async Task<WordDocumentInfo> ExtractDocumentInfoAsync(Stream stream, string fileName)
|
|
|
+ //{
|
|
|
+ // var stopwatch = System.Diagnostics.Stopwatch.StartNew();
|
|
|
+
|
|
|
+ // try
|
|
|
+ // {
|
|
|
+ // // 加载Word文档
|
|
|
+ // var doc = new Document(stream);
|
|
|
+
|
|
|
+ // var documentInfo = new WordDocumentInfo
|
|
|
+ // {
|
|
|
+ // Title = ExtractDocumentTitle(doc),
|
|
|
+ // Content = ExtractTextContent(doc),
|
|
|
+ // Metadata = ExtractMetadata(doc),
|
|
|
+ // Tables = ExtractTables(doc),
|
|
|
+ // FormFields = ExtractFormFields(doc),
|
|
|
+ // Sections = ExtractSections(doc),
|
|
|
+ // ImagesCount = CountImages(doc)
|
|
|
+ // };
|
|
|
+
|
|
|
+ // stopwatch.Stop();
|
|
|
+ // documentInfo.ProcessingTimeMs = stopwatch.ElapsedMilliseconds;
|
|
|
+
|
|
|
+ // return documentInfo;
|
|
|
+ // }
|
|
|
+ // catch (Exception ex)
|
|
|
+ // {
|
|
|
+ // _logger.LogError(ex, "文档信息提取失败: {FileName}", fileName);
|
|
|
+ // throw;
|
|
|
+ // }
|
|
|
+ //}
|
|
|
+
|
|
|
+ ///// <summary>
|
|
|
+ ///// 提取文档标题
|
|
|
+ ///// </summary>
|
|
|
+ //private string ExtractDocumentTitle(Document doc)
|
|
|
+ //{
|
|
|
+ // try
|
|
|
+ // {
|
|
|
+ // // 首先尝试从文档属性获取标题
|
|
|
+ // var title = doc.BuiltInDocumentProperties.Title;
|
|
|
+ // if (!string.IsNullOrEmpty(title))
|
|
|
+ // return title;
|
|
|
+
|
|
|
+ // // 如果没有标题,尝试从第一个段落提取
|
|
|
+ // foreach (Aspose.Words.Paragraph paragraph in doc.GetChildNodes(NodeType.Paragraph, true))
|
|
|
+ // {
|
|
|
+ // var text = paragraph.GetText().Trim();
|
|
|
+ // if (!string.IsNullOrEmpty(text) && text.Length < 100) // 假设标题不会太长
|
|
|
+ // return text;
|
|
|
+ // }
|
|
|
+
|
|
|
+ // return "未命名文档";
|
|
|
+ // }
|
|
|
+ // catch (Exception ex)
|
|
|
+ // {
|
|
|
+ // _logger.LogWarning(ex, "提取文档标题失败");
|
|
|
+ // return "未命名文档";
|
|
|
+ // }
|
|
|
+ //}
|
|
|
+
|
|
|
+ ///// <summary>
|
|
|
+ ///// 提取文本内容
|
|
|
+ ///// </summary>
|
|
|
+ //private string ExtractTextContent(Document doc)
|
|
|
+ //{
|
|
|
+ // try
|
|
|
+ // {
|
|
|
+ // // 使用Aspose.Words的GetText方法提取纯文本
|
|
|
+ // return doc.GetText();
|
|
|
+ // }
|
|
|
+ // catch (Exception ex)
|
|
|
+ // {
|
|
|
+ // _logger.LogWarning(ex, "提取文本内容失败");
|
|
|
+ // return string.Empty;
|
|
|
+ // }
|
|
|
+ //}
|
|
|
+
|
|
|
+ ///// <summary>
|
|
|
+ ///// 提取文档元数据
|
|
|
+ ///// </summary>
|
|
|
+ //private DocumentMetadata ExtractMetadata(Document doc)
|
|
|
+ //{
|
|
|
+ // try
|
|
|
+ // {
|
|
|
+ // var props = doc.BuiltInDocumentProperties;
|
|
|
+
|
|
|
+ // return new DocumentMetadata
|
|
|
+ // {
|
|
|
+ // Author = props.Author ?? string.Empty,
|
|
|
+ // Company = props.Company ?? string.Empty,
|
|
|
+ // CreatedTime = props.CreatedTime,
|
|
|
+ // LastSavedTime = props.LastSavedTime,
|
|
|
+ // PageCount = doc.PageCount,
|
|
|
+ // WordCount = props.Words,
|
|
|
+ // CharacterCount = props.Characters,
|
|
|
+ // Subject = props.Subject ?? string.Empty,
|
|
|
+ // Keywords = props.Keywords ?? string.Empty
|
|
|
+ // };
|
|
|
+ // }
|
|
|
+ // catch (Exception ex)
|
|
|
+ // {
|
|
|
+ // _logger.LogWarning(ex, "提取元数据失败");
|
|
|
+ // return new DocumentMetadata();
|
|
|
+ // }
|
|
|
+ //}
|
|
|
+
|
|
|
+ ///// <summary>
|
|
|
+ ///// 提取表格数据
|
|
|
+ ///// </summary>
|
|
|
+ //private List<DocumentTable> ExtractTables(Document doc)
|
|
|
+ //{
|
|
|
+ // var tables = new List<DocumentTable>();
|
|
|
+
|
|
|
+ // try
|
|
|
+ // {
|
|
|
+ // int tableIndex = 1;
|
|
|
+ // foreach (Aspose.Words.Tables.Table table in doc.GetChildNodes(NodeType.Table, true))
|
|
|
+ // {
|
|
|
+ // var docTable = new DocumentTable
|
|
|
+ // {
|
|
|
+ // TableName = $"表格_{tableIndex}"
|
|
|
+ // };
|
|
|
+
|
|
|
+ // // 提取表头(假设第一行是表头)
|
|
|
+ // if (table.Rows.Count > 0)
|
|
|
+ // {
|
|
|
+ // var firstRow = table.FirstRow;
|
|
|
+ // foreach (Cell cell in firstRow.Cells)
|
|
|
+ // {
|
|
|
+ // docTable.Headers.Add(cell.GetText().Trim());
|
|
|
+ // }
|
|
|
+ // }
|
|
|
+
|
|
|
+ // // 提取所有行数据
|
|
|
+ // foreach (Row row in table.Rows)
|
|
|
+ // {
|
|
|
+ // var rowData = new List<string>();
|
|
|
+ // foreach (Cell cell in row.Cells)
|
|
|
+ // {
|
|
|
+ // rowData.Add(cell.GetText().Trim());
|
|
|
+ // }
|
|
|
+ // docTable.Rows.Add(rowData);
|
|
|
+ // }
|
|
|
+
|
|
|
+ // tables.Add(docTable);
|
|
|
+ // tableIndex++;
|
|
|
+ // }
|
|
|
+ // }
|
|
|
+ // catch (Exception ex)
|
|
|
+ // {
|
|
|
+ // _logger.LogWarning(ex, "提取表格数据失败");
|
|
|
+ // }
|
|
|
+
|
|
|
+ // return tables;
|
|
|
+ //}
|
|
|
+
|
|
|
+ ///// <summary>
|
|
|
+ ///// 提取表单字段(兼容Aspose.Words 15.12.0)
|
|
|
+ ///// </summary>
|
|
|
+ //private List<FormField> ExtractFormFields(Document doc)
|
|
|
+ //{
|
|
|
+ // var formFields = new List<FormField>();
|
|
|
+
|
|
|
+ // try
|
|
|
+ // {
|
|
|
+ // // 获取文档中的所有表单字段
|
|
|
+ // var formFieldCollection = doc.Range.FormFields;
|
|
|
+
|
|
|
+ // for (int i = 0; i < formFieldCollection.Count; i++)
|
|
|
+ // {
|
|
|
+ // var formField = formFieldCollection[i];
|
|
|
+ // var extractedField = ExtractFormFieldInfo(formField);
|
|
|
+
|
|
|
+ // if (extractedField != null)
|
|
|
+ // {
|
|
|
+ // formFields.Add(extractedField);
|
|
|
+ // }
|
|
|
+ // }
|
|
|
+ // }
|
|
|
+ // catch (Exception ex)
|
|
|
+ // {
|
|
|
+ // _logger.LogWarning(ex, "提取表单字段失败");
|
|
|
+ // }
|
|
|
+
|
|
|
+ // return formFields;
|
|
|
+ //}
|
|
|
+
|
|
|
+ ///// <summary>
|
|
|
+ ///// 提取单个表单字段的详细信息
|
|
|
+ ///// </summary>
|
|
|
+ //private FormField ExtractFormFieldInfo(Aspose.Words.Fields.FormField formField)
|
|
|
+ //{
|
|
|
+ // try
|
|
|
+ // {
|
|
|
+ // var field = new FormField
|
|
|
+ // {
|
|
|
+ // Name = GetFormFieldName(formField),
|
|
|
+ // Type = GetFormFieldType(formField),
|
|
|
+ // Value = GetFormFieldValue(formField),
|
|
|
+ // IsChecked = IsFormFieldChecked(formField),
|
|
|
+ // Status = GetFormFieldStatus(formField),
|
|
|
+ // DefaultValue = GetFormFieldDefaultValue(formField),
|
|
|
+ // MaxLength = GetFormFieldMaxLength(formField),
|
|
|
+ // Options = GetFormFieldOptions(formField)
|
|
|
+ // };
|
|
|
+
|
|
|
+ // return field;
|
|
|
+ // }
|
|
|
+ // catch (Exception ex)
|
|
|
+ // {
|
|
|
+ // _logger.LogWarning(ex, "提取表单字段信息失败: {FieldName}", formField.Name);
|
|
|
+ // return null;
|
|
|
+ // }
|
|
|
+ //}
|
|
|
+
|
|
|
+
|
|
|
+ ///// <summary>
|
|
|
+ ///// 获取表单字段名称
|
|
|
+ ///// </summary>
|
|
|
+ //private string GetFormFieldName(Aspose.Words.Fields.FormField formField)
|
|
|
+ //{
|
|
|
+ // try
|
|
|
+ // {
|
|
|
+ // return !string.IsNullOrEmpty(formField.Name) ? formField.Name : "未命名字段";
|
|
|
+ // }
|
|
|
+ // catch
|
|
|
+ // {
|
|
|
+ // return "未知字段";
|
|
|
+ // }
|
|
|
+ //}
|
|
|
+
|
|
|
+
|
|
|
+ ///// <summary>
|
|
|
+ ///// 获取表单字段类型
|
|
|
+ ///// </summary>
|
|
|
+ //private string GetFormFieldType(Aspose.Words.Fields.FormField formField)
|
|
|
+ //{
|
|
|
+ // try
|
|
|
+ // {
|
|
|
+ // return formField.Type switch
|
|
|
+ // {
|
|
|
+ // FormFieldType.Regular => "常规文本",
|
|
|
+ // FormFieldType.CheckBox => "复选框",
|
|
|
+ // FormFieldType.DropDown => "下拉列表",
|
|
|
+ // _ => "未知类型"
|
|
|
+ // };
|
|
|
+ // }
|
|
|
+ // catch
|
|
|
+ // {
|
|
|
+ // return "未知类型";
|
|
|
+ // }
|
|
|
+ //}
|
|
|
+
|
|
|
+ ///// <summary>
|
|
|
+ ///// 获取表单字段值
|
|
|
+ ///// </summary>
|
|
|
+ //private string GetFormFieldValue(Aspose.Words.Fields.FormField formField)
|
|
|
+ //{
|
|
|
+ // try
|
|
|
+ // {
|
|
|
+ // // 对于15.12.0版本,使用GetFieldCode()和其他方法获取值
|
|
|
+ // switch (formField.Type)
|
|
|
+ // {
|
|
|
+ // case FormFieldType.CheckBox:
|
|
|
+ // return IsFormFieldChecked(formField) ? "选中" : "未选中";
|
|
|
+
|
|
|
+ // case FormFieldType.DropDown:
|
|
|
+ // return GetDropDownSelectedValue(formField);
|
|
|
+
|
|
|
+ // case FormFieldType.Regular:
|
|
|
+ // return GetTextFormFieldValue(formField);
|
|
|
+
|
|
|
+ // default:
|
|
|
+ // return string.Empty;
|
|
|
+ // }
|
|
|
+ // }
|
|
|
+ // catch
|
|
|
+ // {
|
|
|
+ // return string.Empty;
|
|
|
+ // }
|
|
|
+ //}
|
|
|
+
|
|
|
+ ///// <summary>
|
|
|
+ ///// 检查复选框是否被选中
|
|
|
+ ///// </summary>
|
|
|
+ //private bool IsFormFieldChecked(Aspose.Words.Fields.FormField formField)
|
|
|
+ //{
|
|
|
+ // try
|
|
|
+ // {
|
|
|
+ // if (formField.Type != FormFieldType.CheckBox)
|
|
|
+ // return false;
|
|
|
+
|
|
|
+ // // 在15.12.0版本中,通过检查字段代码来判断复选框状态
|
|
|
+ // var fieldCode = formField.GetFieldCode() ?? string.Empty;
|
|
|
+
|
|
|
+ // // 检查常见的复选框选中标记
|
|
|
+ // return fieldCode.Contains("\\checked", StringComparison.OrdinalIgnoreCase) ||
|
|
|
+ // fieldCode.Contains("✓", StringComparison.OrdinalIgnoreCase) ||
|
|
|
+ // fieldCode.Contains("☑", StringComparison.OrdinalIgnoreCase);
|
|
|
+ // }
|
|
|
+ // catch
|
|
|
+ // {
|
|
|
+ // return false;
|
|
|
+ // }
|
|
|
+ //}
|
|
|
+
|
|
|
+ ///// <summary>
|
|
|
+ ///// 获取下拉列表选中的值
|
|
|
+ ///// </summary>
|
|
|
+ //private string GetDropDownSelectedValue(Aspose.Words.Fields.FormField formField)
|
|
|
+ //{
|
|
|
+ // try
|
|
|
+ // {
|
|
|
+ // if (formField.Type != FormFieldType.DropDown)
|
|
|
+ // return string.Empty;
|
|
|
+
|
|
|
+ // // 在15.12.0中,可能需要通过解析字段代码来获取选中的值
|
|
|
+ // var fieldCode = formField.GetFieldCode() ?? string.Empty;
|
|
|
+
|
|
|
+ // // 简单的解析逻辑 - 实际应用中可能需要更复杂的解析
|
|
|
+ // if (fieldCode.Contains("\\s", StringComparison.OrdinalIgnoreCase))
|
|
|
+ // {
|
|
|
+ // var match = System.Text.RegularExpressions.Regex.Match(
|
|
|
+ // fieldCode,
|
|
|
+ // @"\\s\s*""([^""]*)""",
|
|
|
+ // System.Text.RegularExpressions.RegexOptions.IgnoreCase);
|
|
|
+
|
|
|
+ // if (match.Success)
|
|
|
+ // {
|
|
|
+ // return match.Groups[1].Value;
|
|
|
+ // }
|
|
|
+ // }
|
|
|
+
|
|
|
+ // return "未选择";
|
|
|
+ // }
|
|
|
+ // catch
|
|
|
+ // {
|
|
|
+ // return string.Empty;
|
|
|
+ // }
|
|
|
+ //}
|
|
|
+
|
|
|
+ ///// <summary>
|
|
|
+ ///// 获取文本表单字段的值
|
|
|
+ ///// </summary>
|
|
|
+ //private string GetTextFormFieldValue(Aspose.Words.Fields.FormField formField)
|
|
|
+ //{
|
|
|
+ // try
|
|
|
+ // {
|
|
|
+ // if (formField.Type != FormFieldType.Regular)
|
|
|
+ // return string.Empty;
|
|
|
+
|
|
|
+ // // 对于文本字段,尝试获取字段结果文本
|
|
|
+ // // 在15.12.0中,可能需要遍历字段的子节点
|
|
|
+ // var result = string.Empty;
|
|
|
+
|
|
|
+ // // 尝试获取字段的文本内容
|
|
|
+ // var fieldNodes = formField.GetChildNodes(NodeType.Any, true);
|
|
|
+ // foreach (Aspose.Words.Node node in fieldNodes)
|
|
|
+ // {
|
|
|
+ // if (node.NodeType == NodeType.Run)
|
|
|
+ // {
|
|
|
+ // result += ((Run)node).Text;
|
|
|
+ // }
|
|
|
+ // }
|
|
|
+
|
|
|
+ // return result.Trim();
|
|
|
+ // }
|
|
|
+ // catch
|
|
|
+ // {
|
|
|
+ // return string.Empty;
|
|
|
+ // }
|
|
|
+ //}
|
|
|
+
|
|
|
+ ///// <summary>
|
|
|
+ ///// 获取表单字段状态
|
|
|
+ ///// </summary>
|
|
|
+ //private string GetFormFieldStatus(Aspose.Words.Fields.FormField formField)
|
|
|
+ //{
|
|
|
+ // try
|
|
|
+ // {
|
|
|
+ // var fieldCode = formField.GetFieldCode() ?? string.Empty;
|
|
|
+
|
|
|
+ // if (fieldCode.Contains("\\locked", StringComparison.OrdinalIgnoreCase))
|
|
|
+ // return "已锁定";
|
|
|
+
|
|
|
+ // if (fieldCode.Contains("\\disabled", StringComparison.OrdinalIgnoreCase))
|
|
|
+ // return "已禁用";
|
|
|
+
|
|
|
+ // return "活动";
|
|
|
+ // }
|
|
|
+ // catch
|
|
|
+ // {
|
|
|
+ // return "未知";
|
|
|
+ // }
|
|
|
+ //}
|
|
|
+
|
|
|
+ ///// <summary>
|
|
|
+ ///// 获取表单字段默认值
|
|
|
+ ///// </summary>
|
|
|
+ //private string GetFormFieldDefaultValue(Aspose.Words.Fields.FormField formField)
|
|
|
+ //{
|
|
|
+ // try
|
|
|
+ // {
|
|
|
+ // var fieldCode = formField.GetFieldCode() ?? string.Empty;
|
|
|
+
|
|
|
+ // // 解析默认值
|
|
|
+ // var match = System.Text.RegularExpressions.Regex.Match(
|
|
|
+ // fieldCode,
|
|
|
+ // @"\\d\s*""([^""]*)""",
|
|
|
+ // System.Text.RegularExpressions.RegexOptions.IgnoreCase);
|
|
|
+
|
|
|
+ // if (match.Success)
|
|
|
+ // {
|
|
|
+ // return match.Groups[1].Value;
|
|
|
+ // }
|
|
|
+
|
|
|
+ // return string.Empty;
|
|
|
+ // }
|
|
|
+ // catch
|
|
|
+ // {
|
|
|
+ // return string.Empty;
|
|
|
+ // }
|
|
|
+ //}
|
|
|
+
|
|
|
+ ///// <summary>
|
|
|
+ ///// 获取表单字段最大长度
|
|
|
+ ///// </summary>
|
|
|
+ //private int GetFormFieldMaxLength(Aspose.Words.Fields.FormField formField)
|
|
|
+ //{
|
|
|
+ // try
|
|
|
+ // {
|
|
|
+ // var fieldCode = formField.GetFieldCode() ?? string.Empty;
|
|
|
+
|
|
|
+ // var match = System.Text.RegularExpressions.Regex.Match(
|
|
|
+ // fieldCode,
|
|
|
+ // @"\\l\s*(\d+)",
|
|
|
+ // System.Text.RegularExpressions.RegexOptions.IgnoreCase);
|
|
|
+
|
|
|
+ // if (match.Success && int.TryParse(match.Groups[1].Value, out int maxLength))
|
|
|
+ // {
|
|
|
+ // return maxLength;
|
|
|
+ // }
|
|
|
+
|
|
|
+ // return 0;
|
|
|
+ // }
|
|
|
+ // catch
|
|
|
+ // {
|
|
|
+ // return 0;
|
|
|
+ // }
|
|
|
+ //}
|
|
|
+
|
|
|
+ ///// <summary>
|
|
|
+ ///// 获取下拉列表选项
|
|
|
+ ///// </summary>
|
|
|
+ //private List<string> GetFormFieldOptions(Aspose.Words.Fields.FormField formField)
|
|
|
+ //{
|
|
|
+ // var options = new List<string>();
|
|
|
+
|
|
|
+ // try
|
|
|
+ // {
|
|
|
+ // if (formField.Type != FormFieldType.DropDown)
|
|
|
+ // return options;
|
|
|
+
|
|
|
+ // var fieldCode = formField.GetFieldCode() ?? string.Empty;
|
|
|
+
|
|
|
+ // // 解析下拉选项
|
|
|
+ // var matches = System.Text.RegularExpressions.Regex.Matches(
|
|
|
+ // fieldCode,
|
|
|
+ // @"""([^""]*)""",
|
|
|
+ // System.Text.RegularExpressions.RegexOptions.IgnoreCase);
|
|
|
+
|
|
|
+ // foreach (System.Text.RegularExpressions.Match match in matches)
|
|
|
+ // {
|
|
|
+ // if (match.Success)
|
|
|
+ // {
|
|
|
+ // options.Add(match.Groups[1].Value);
|
|
|
+ // }
|
|
|
+ // }
|
|
|
+
|
|
|
+ // return options;
|
|
|
+ // }
|
|
|
+ // catch
|
|
|
+ // {
|
|
|
+ // return options;
|
|
|
+ // }
|
|
|
+
|
|
|
+
|
|
|
+ ///// <summary>
|
|
|
+ ///// 统计图片数量
|
|
|
+ ///// </summary>
|
|
|
+ //private int CountImages(Document doc)
|
|
|
+ //{
|
|
|
+ // try
|
|
|
+ // {
|
|
|
+ // int imageCount = 0;
|
|
|
+ // foreach (Aspose.Words.Drawing.Shape shape in doc.GetChildNodes(NodeType.Shape, true))
|
|
|
+ // {
|
|
|
+ // if (shape.HasImage)
|
|
|
+ // {
|
|
|
+ // imageCount++;
|
|
|
+ // }
|
|
|
+ // }
|
|
|
+ // return imageCount;
|
|
|
+ // }
|
|
|
+ // catch (Exception ex)
|
|
|
+ // {
|
|
|
+ // _logger.LogWarning(ex, "统计图片数量失败");
|
|
|
+ // return 0;
|
|
|
+ // }
|
|
|
+ //}
|
|
|
+
|
|
|
+ ///// <summary>
|
|
|
+ ///// 检查文件是否为支持的Word格式
|
|
|
+ ///// </summary>
|
|
|
+ //public bool IsSupportedWordFormat(string fileName)
|
|
|
+ //{
|
|
|
+ // var extension = Path.GetExtension(fileName).ToLower();
|
|
|
+ // return extension switch
|
|
|
+ // {
|
|
|
+ // ".doc" => true,
|
|
|
+ // ".docx" => true,
|
|
|
+ // ".dot" => true,
|
|
|
+ // ".dotx" => true,
|
|
|
+ // ".docm" => true,
|
|
|
+ // ".dotm" => true,
|
|
|
+ // _ => false
|
|
|
+ // };
|
|
|
+ //}
|
|
|
}
|
|
|
}
|