WordExporter.cs 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101
  1. using Aspose.Words;
  2. using Aspose.Words.Loading;
  3. using Markdig;
  4. using System.Text;
  5. using System.Text.RegularExpressions;
  6. public static class WordExporter
  7. {
  8. private static readonly MarkdownPipeline MarkdownPipeline = new MarkdownPipelineBuilder()
  9. .UseAdvancedExtensions()
  10. .Build();
  11. public static void MarkdownToWord(string markdown, string outputPath)
  12. {
  13. if (string.IsNullOrWhiteSpace(markdown))
  14. throw new ArgumentException("markdown 不能为空", nameof(markdown));
  15. // 清理 AI 常带的 ```markdown 和 ```
  16. markdown = CleanMarkdown(markdown);
  17. // AI 返回内容有时会把表格压成一行,这里做一次轻量修复
  18. markdown = NormalizeSingleLineTableMarkdown(markdown);
  19. // Markdown -> HTML,再由 Aspose 读取 HTML 保留格式
  20. var htmlBody = Markdown.ToHtml(markdown, MarkdownPipeline);
  21. var htmlContent = BuildHtmlDocument(htmlBody);
  22. var htmlBytes = Encoding.UTF8.GetBytes(htmlContent);
  23. using var stream = new MemoryStream(htmlBytes);
  24. stream.Position = 0;
  25. var loadOptions = new LoadOptions
  26. {
  27. LoadFormat = LoadFormat.Html,
  28. Encoding = Encoding.UTF8
  29. };
  30. //判断路径是否存在
  31. var dir = Path.GetDirectoryName(outputPath);
  32. if (!string.IsNullOrWhiteSpace(dir) && !Directory.Exists(dir))
  33. {
  34. Directory.CreateDirectory(dir);
  35. }
  36. var doc = new Document(stream, loadOptions);
  37. doc.Save(outputPath, SaveFormat.Docx);
  38. }
  39. private static string CleanMarkdown(string input)
  40. {
  41. input = input.Trim();
  42. if (input.StartsWith("```markdown", StringComparison.OrdinalIgnoreCase))
  43. input = input.Substring("```markdown".Length);
  44. if (input.StartsWith("```"))
  45. input = input.Substring(3);
  46. if (input.EndsWith("```"))
  47. input = input.Substring(0, input.Length - 3);
  48. return input.Trim();
  49. }
  50. private static string NormalizeSingleLineTableMarkdown(string markdown)
  51. {
  52. if (string.IsNullOrWhiteSpace(markdown))
  53. return markdown;
  54. // 已经是多行内容则不处理
  55. if (markdown.Contains('\n') || markdown.Contains('\r'))
  56. return markdown;
  57. // 仅在明显是表格时处理,避免误伤普通文本
  58. if (!markdown.Contains("|") || !Regex.IsMatch(markdown, @"\|\s*[-:]{3,}"))
  59. return markdown;
  60. // 把单行中的“行结束 + 下一行开始”从 "| |" 规整为换行
  61. var normalized = Regex.Replace(markdown, @"\|\s+\|", "|\n|");
  62. return normalized;
  63. }
  64. private static string BuildHtmlDocument(string body)
  65. {
  66. return $@"<!DOCTYPE html>
  67. <html>
  68. <head>
  69. <meta charset=""utf-8"" />
  70. <style>
  71. body {{ font-family: Calibri, 'Microsoft YaHei', sans-serif; line-height: 1.6; }}
  72. pre, code {{ font-family: Consolas, 'Courier New', monospace; }}
  73. pre {{ background: #f6f8fa; padding: 8px; border-radius: 4px; }}
  74. table {{ border-collapse: collapse; width: 100%; }}
  75. th, td {{ border: 1px solid #d0d7de; padding: 6px 8px; }}
  76. blockquote {{ border-left: 4px solid #d0d7de; margin: 8px 0; padding-left: 12px; color: #57606a; }}
  77. </style>
  78. </head>
  79. <body>
  80. {body}
  81. </body>
  82. </html>";
  83. }
  84. }