Skip to content

Commit

Permalink
Merge pull request #15 from shahedc/WorkerService
Browse files Browse the repository at this point in the history
first working version of DocMaker on Core 3.1
  • Loading branch information
shahedc authored Jun 12, 2020
2 parents c2f8def + 28a9fc9 commit a445d6a
Show file tree
Hide file tree
Showing 9 changed files with 406 additions and 0 deletions.
17 changes: 17 additions & 0 deletions experimental/DocMaker/DocMaker.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
<Project Sdk="Microsoft.NET.Sdk.Worker">

<PropertyGroup>
<TargetFramework>netcoreapp3.1</TargetFramework>
<UserSecretsId>dotnet-DocMaker-7AEA4418-802C-4E43-9C09-C93A92A98059</UserSecretsId>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="HtmlAgilityPack" Version="1.11.24" />
<PackageReference Include="MariGold.OpenXHTML" Version="2.6.2" />
<PackageReference Include="Microsoft.Extensions.Hosting" Version="3.1.5" />
</ItemGroup>

<ItemGroup>
<Folder Include="chapters\" />
</ItemGroup>
</Project>
25 changes: 25 additions & 0 deletions experimental/DocMaker/DocMaker.sln
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 16
VisualStudioVersion = 16.0.30204.135
MinimumVisualStudioVersion = 10.0.40219.1
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DocMaker", "DocMaker.csproj", "{63EDE8C4-A716-449F-AB00-3334FC38F1C3}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Release|Any CPU = Release|Any CPU
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{63EDE8C4-A716-449F-AB00-3334FC38F1C3}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{63EDE8C4-A716-449F-AB00-3334FC38F1C3}.Debug|Any CPU.Build.0 = Debug|Any CPU
{63EDE8C4-A716-449F-AB00-3334FC38F1C3}.Release|Any CPU.ActiveCfg = Release|Any CPU
{63EDE8C4-A716-449F-AB00-3334FC38F1C3}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {3A9445F6-EB0E-4E48-982F-D212325457D0}
EndGlobalSection
EndGlobal
124 changes: 124 additions & 0 deletions experimental/DocMaker/HtmlContent.txt

Large diffs are not rendered by default.

24 changes: 24 additions & 0 deletions experimental/DocMaker/Program.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Hosting;

namespace DocMaker
{
public class Program
{
public static void Main(string[] args)
{
CreateHostBuilder(args).Build().Run();
}

public static IHostBuilder CreateHostBuilder(string[] args) =>
Host.CreateDefaultBuilder(args)
.ConfigureServices((hostContext, services) =>
{
services.AddHostedService<Worker>();
});
}
}
10 changes: 10 additions & 0 deletions experimental/DocMaker/Properties/launchSettings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"profiles": {
"DocMaker": {
"commandName": "Project",
"environmentVariables": {
"DOTNET_ENVIRONMENT": "Development"
}
}
}
}
120 changes: 120 additions & 0 deletions experimental/DocMaker/Utils/DocEngine.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
using HtmlAgilityPack;
using MariGold.OpenXHTML;
using System;
using System.Collections.Generic;
using System.IO;
using System.Net;
using System.Text;

namespace DocMaker.Utils
{
internal class DocEngine
{
public static void MakeDoc(string pageUrl, bool showHtml = false)
{
// Get HTML from website
string htmlContent = string.Empty;
string outputFileName = "_output.docx";

HttpWebRequest request = (HttpWebRequest)WebRequest.Create(pageUrl);

using (HttpWebResponse response = (HttpWebResponse)request.GetResponse())
using (Stream stream = response.GetResponseStream())
using (StreamReader reader = new StreamReader(stream))
{
htmlContent = reader.ReadToEnd();
}
outputFileName = request.RequestUri.AbsolutePath.Substring(1);

if (showHtml)
Console.WriteLine(htmlContent);

// load HTML content, fix formatting
HtmlDocument htmlDoc = new HtmlDocument();
htmlDoc.LoadHtml(htmlContent);
//FixImageDimensions(htmlDoc);

DeleteHtmlContent(htmlDoc, "//footer[@class=\"entry-meta\"]");
DeleteHtmlContent(htmlDoc, "//div[@id=\"comments\"]");
DeleteHtmlContent(htmlDoc, "//nav[@class=\"nav-single\"]");

string htmlNodeContent = FixCodeFormatting(htmlDoc);

// Write html node's content to text file.
File.WriteAllText(@"HtmlContent.txt", htmlNodeContent);

Console.WriteLine("Making your document...");
Console.WriteLine($"Source: {pageUrl}");

// Create DOCX file
WordDocument wordDoc = new WordDocument($"{outputFileName}.docx");
wordDoc.Process(new HtmlParser(htmlNodeContent));
wordDoc.Save();
Console.WriteLine($"Output: {outputFileName}.docx");
}

private static void DeleteHtmlContent(HtmlDocument htmlDoc, string nodeSelector)
{
HtmlNode htmlNode = htmlDoc.DocumentNode.SelectSingleNode(nodeSelector);
htmlNode.Remove();
}

private static string FixCodeFormatting(HtmlDocument htmlDoc)
{
// look for <pre> tags which contain code snippets
var preNodes = htmlDoc.DocumentNode.SelectNodes("//pre");
foreach (HtmlNode htmlPreNode in preNodes)
{
// replace doc's newline with "NEWLINE" placeholder
// ... as HTML tag insertion doesn't seem to work (?)
var replacedText = htmlPreNode.InnerText
.Replace(System.Environment.NewLine, "NEWLINE");

var TextWithFormatting = "<span style=\"color: #808080;font-family: Courier New;\">"
+ replacedText + "</span>";

htmlPreNode.ParentNode.ReplaceChild(
HtmlTextNode.CreateNode(
TextWithFormatting), htmlPreNode);
}
//


HtmlNode htmlNode = htmlDoc.DocumentNode.SelectSingleNode("//*[@id=\"content\"]");
string htmlNodeContent = (htmlNode == null) ? "Error, id not found" : htmlNode.InnerHtml;

// replace placeholders with actual <br> tags
htmlNodeContent = htmlNodeContent.Replace("NEWLINE", "<br />");
return htmlNodeContent;
}

private static void FixImageDimensions(HtmlDocument htmlDoc)
{
// look for <img> tags which contain code snippets
var imgNodes = htmlDoc.DocumentNode.SelectNodes("//img");
foreach (HtmlNode htmlImgNode in imgNodes)
{
if (htmlImgNode.Attributes["class"] != null)
htmlImgNode.Attributes["class"].Remove();

if (htmlImgNode.Attributes["srcset"] != null)
htmlImgNode.Attributes["srcset"].Remove();

if (htmlImgNode.Attributes["sizes"] != null)
htmlImgNode.Attributes["sizes"].Remove();

if (htmlImgNode.Attributes["width"] != null)
htmlImgNode.Attributes["width"].Remove();

if (htmlImgNode.Attributes["height"] != null)
htmlImgNode.Attributes["height"].Remove();

//Create new style attribute to set width?
HtmlAttribute styleAttribute = htmlDoc.CreateAttribute("style");
styleAttribute.Value = "width:100px";
htmlImgNode.Attributes.Add(styleAttribute);

}
}
}
}
68 changes: 68 additions & 0 deletions experimental/DocMaker/Worker.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using DocMaker.Utils;
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;

namespace DocMaker
{
public class Worker : BackgroundService
{
private readonly ILogger<Worker> _logger;

public Worker(ILogger<Worker> logger)
{
_logger = logger;
}

protected override async Task ExecuteAsync(CancellationToken stoppingToken)
{
// string array of URLs *without* any trailing slash
string[] articleUrls = {
"https://wakeupandcode.com/authentication-authorization-in-asp-net-core-razor-pages",
"https://wakeupandcode.com/blazor-full-stack-web-dev-in-asp-net-core",
"https://wakeupandcode.com/cookies-and-consent-in-asp-net-core",
"https://wakeupandcode.com/deploying-asp-net-core-to-azure-app-service",
"https://wakeupandcode.com/ef-core-relationships-in-asp-net-core",
"https://wakeupandcode.com/forms-and-fields-in-asp-net-core",

"https://wakeupandcode.com/generic-host-builder-in-asp-net-core",
"https://wakeupandcode.com/handling-errors-in-asp-net-core",
"https://wakeupandcode.com/iis-hosting-for-asp-net-core-web-apps",
"https://wakeupandcode.com/javascript-css-html-static-files-in-asp-net-core",
"https://wakeupandcode.com/key-vault-for-asp-net-core-web-apps",
"https://wakeupandcode.com/logging-in-asp-net-core",

"https://wakeupandcode.com/middleware-in-asp-net-core",
"https://wakeupandcode.com/net-core-3-vs2019-and-csharp-8",
"https://wakeupandcode.com/organizational-accounts-for-asp-net-core",
"https://wakeupandcode.com/production-tips-for-asp-net-core-web-apps",
"https://wakeupandcode.com/query-tags-in-ef-core-for-asp-net-core",
"https://wakeupandcode.com/razor-pages-in-asp-net-core",

"https://wakeupandcode.com/summarizing-build-2019-signalr-service",
"https://wakeupandcode.com/tag-helper-authoring-in-asp-net-core",
"https://wakeupandcode.com/unit-testing-in-asp-net-core",
"https://wakeupandcode.com/validation-in-asp-net-core",
"https://wakeupandcode.com/worker-service-in-asp-net-core",
"https://wakeupandcode.com/xml-json-serialization-in-asp-net-core",

"https://wakeupandcode.com/yaml-defined-cicd-for-asp-net-core",
"https://wakeupandcode.com/zero-downtime-web-apps-for-asp-net-core"
};


_logger.LogInformation($"Processing {articleUrls.Length} docs at: {DateTimeOffset.Now}");
for (var articleCounter = 0; articleCounter < articleUrls.Length; articleCounter++)
{
_logger.LogInformation($"Making doc {articleCounter + 1} at: {DateTimeOffset.Now}");
DocEngine.MakeDoc(articleUrls[articleCounter]);
}
_logger.LogInformation($"Completed {articleUrls.Length} docs at: {DateTimeOffset.Now}");

}
}
}
9 changes: 9 additions & 0 deletions experimental/DocMaker/appsettings.Development.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"Logging": {
"LogLevel": {
"Default": "Information",
"Microsoft": "Warning",
"Microsoft.Hosting.Lifetime": "Information"
}
}
}
9 changes: 9 additions & 0 deletions experimental/DocMaker/appsettings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"Logging": {
"LogLevel": {
"Default": "Information",
"Microsoft": "Warning",
"Microsoft.Hosting.Lifetime": "Information"
}
}
}

0 comments on commit a445d6a

Please sign in to comment.