-
-
Notifications
You must be signed in to change notification settings - Fork 134
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #15 from shahedc/WorkerService
first working version of DocMaker on Core 3.1
- Loading branch information
Showing
9 changed files
with
406 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
<Project Sdk="Microsoft.NET.Sdk.Worker"> | ||
|
||
<PropertyGroup> | ||
<TargetFramework>netcoreapp3.1</TargetFramework> | ||
<UserSecretsId>dotnet-DocMaker-7AEA4418-802C-4E43-9C09-C93A92A98059</UserSecretsId> | ||
</PropertyGroup> | ||
|
||
<ItemGroup> | ||
<PackageReference Include="HtmlAgilityPack" Version="1.11.24" /> | ||
<PackageReference Include="MariGold.OpenXHTML" Version="2.6.2" /> | ||
<PackageReference Include="Microsoft.Extensions.Hosting" Version="3.1.5" /> | ||
</ItemGroup> | ||
|
||
<ItemGroup> | ||
<Folder Include="chapters\" /> | ||
</ItemGroup> | ||
</Project> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
|
||
Microsoft Visual Studio Solution File, Format Version 12.00 | ||
# Visual Studio Version 16 | ||
VisualStudioVersion = 16.0.30204.135 | ||
MinimumVisualStudioVersion = 10.0.40219.1 | ||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DocMaker", "DocMaker.csproj", "{63EDE8C4-A716-449F-AB00-3334FC38F1C3}" | ||
EndProject | ||
Global | ||
GlobalSection(SolutionConfigurationPlatforms) = preSolution | ||
Debug|Any CPU = Debug|Any CPU | ||
Release|Any CPU = Release|Any CPU | ||
EndGlobalSection | ||
GlobalSection(ProjectConfigurationPlatforms) = postSolution | ||
{63EDE8C4-A716-449F-AB00-3334FC38F1C3}.Debug|Any CPU.ActiveCfg = Debug|Any CPU | ||
{63EDE8C4-A716-449F-AB00-3334FC38F1C3}.Debug|Any CPU.Build.0 = Debug|Any CPU | ||
{63EDE8C4-A716-449F-AB00-3334FC38F1C3}.Release|Any CPU.ActiveCfg = Release|Any CPU | ||
{63EDE8C4-A716-449F-AB00-3334FC38F1C3}.Release|Any CPU.Build.0 = Release|Any CPU | ||
EndGlobalSection | ||
GlobalSection(SolutionProperties) = preSolution | ||
HideSolutionNode = FALSE | ||
EndGlobalSection | ||
GlobalSection(ExtensibilityGlobals) = postSolution | ||
SolutionGuid = {3A9445F6-EB0E-4E48-982F-D212325457D0} | ||
EndGlobalSection | ||
EndGlobal |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
using System.Threading.Tasks; | ||
using Microsoft.Extensions.DependencyInjection; | ||
using Microsoft.Extensions.Hosting; | ||
|
||
namespace DocMaker | ||
{ | ||
public class Program | ||
{ | ||
public static void Main(string[] args) | ||
{ | ||
CreateHostBuilder(args).Build().Run(); | ||
} | ||
|
||
public static IHostBuilder CreateHostBuilder(string[] args) => | ||
Host.CreateDefaultBuilder(args) | ||
.ConfigureServices((hostContext, services) => | ||
{ | ||
services.AddHostedService<Worker>(); | ||
}); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
{ | ||
"profiles": { | ||
"DocMaker": { | ||
"commandName": "Project", | ||
"environmentVariables": { | ||
"DOTNET_ENVIRONMENT": "Development" | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
using HtmlAgilityPack; | ||
using MariGold.OpenXHTML; | ||
using System; | ||
using System.Collections.Generic; | ||
using System.IO; | ||
using System.Net; | ||
using System.Text; | ||
|
||
namespace DocMaker.Utils | ||
{ | ||
internal class DocEngine | ||
{ | ||
public static void MakeDoc(string pageUrl, bool showHtml = false) | ||
{ | ||
// Get HTML from website | ||
string htmlContent = string.Empty; | ||
string outputFileName = "_output.docx"; | ||
|
||
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(pageUrl); | ||
|
||
using (HttpWebResponse response = (HttpWebResponse)request.GetResponse()) | ||
using (Stream stream = response.GetResponseStream()) | ||
using (StreamReader reader = new StreamReader(stream)) | ||
{ | ||
htmlContent = reader.ReadToEnd(); | ||
} | ||
outputFileName = request.RequestUri.AbsolutePath.Substring(1); | ||
|
||
if (showHtml) | ||
Console.WriteLine(htmlContent); | ||
|
||
// load HTML content, fix formatting | ||
HtmlDocument htmlDoc = new HtmlDocument(); | ||
htmlDoc.LoadHtml(htmlContent); | ||
//FixImageDimensions(htmlDoc); | ||
|
||
DeleteHtmlContent(htmlDoc, "//footer[@class=\"entry-meta\"]"); | ||
DeleteHtmlContent(htmlDoc, "//div[@id=\"comments\"]"); | ||
DeleteHtmlContent(htmlDoc, "//nav[@class=\"nav-single\"]"); | ||
|
||
string htmlNodeContent = FixCodeFormatting(htmlDoc); | ||
|
||
// Write html node's content to text file. | ||
File.WriteAllText(@"HtmlContent.txt", htmlNodeContent); | ||
|
||
Console.WriteLine("Making your document..."); | ||
Console.WriteLine($"Source: {pageUrl}"); | ||
|
||
// Create DOCX file | ||
WordDocument wordDoc = new WordDocument($"{outputFileName}.docx"); | ||
wordDoc.Process(new HtmlParser(htmlNodeContent)); | ||
wordDoc.Save(); | ||
Console.WriteLine($"Output: {outputFileName}.docx"); | ||
} | ||
|
||
private static void DeleteHtmlContent(HtmlDocument htmlDoc, string nodeSelector) | ||
{ | ||
HtmlNode htmlNode = htmlDoc.DocumentNode.SelectSingleNode(nodeSelector); | ||
htmlNode.Remove(); | ||
} | ||
|
||
private static string FixCodeFormatting(HtmlDocument htmlDoc) | ||
{ | ||
// look for <pre> tags which contain code snippets | ||
var preNodes = htmlDoc.DocumentNode.SelectNodes("//pre"); | ||
foreach (HtmlNode htmlPreNode in preNodes) | ||
{ | ||
// replace doc's newline with "NEWLINE" placeholder | ||
// ... as HTML tag insertion doesn't seem to work (?) | ||
var replacedText = htmlPreNode.InnerText | ||
.Replace(System.Environment.NewLine, "NEWLINE"); | ||
|
||
var TextWithFormatting = "<span style=\"color: #808080;font-family: Courier New;\">" | ||
+ replacedText + "</span>"; | ||
|
||
htmlPreNode.ParentNode.ReplaceChild( | ||
HtmlTextNode.CreateNode( | ||
TextWithFormatting), htmlPreNode); | ||
} | ||
// | ||
|
||
|
||
HtmlNode htmlNode = htmlDoc.DocumentNode.SelectSingleNode("//*[@id=\"content\"]"); | ||
string htmlNodeContent = (htmlNode == null) ? "Error, id not found" : htmlNode.InnerHtml; | ||
|
||
// replace placeholders with actual <br> tags | ||
htmlNodeContent = htmlNodeContent.Replace("NEWLINE", "<br />"); | ||
return htmlNodeContent; | ||
} | ||
|
||
private static void FixImageDimensions(HtmlDocument htmlDoc) | ||
{ | ||
// look for <img> tags which contain code snippets | ||
var imgNodes = htmlDoc.DocumentNode.SelectNodes("//img"); | ||
foreach (HtmlNode htmlImgNode in imgNodes) | ||
{ | ||
if (htmlImgNode.Attributes["class"] != null) | ||
htmlImgNode.Attributes["class"].Remove(); | ||
|
||
if (htmlImgNode.Attributes["srcset"] != null) | ||
htmlImgNode.Attributes["srcset"].Remove(); | ||
|
||
if (htmlImgNode.Attributes["sizes"] != null) | ||
htmlImgNode.Attributes["sizes"].Remove(); | ||
|
||
if (htmlImgNode.Attributes["width"] != null) | ||
htmlImgNode.Attributes["width"].Remove(); | ||
|
||
if (htmlImgNode.Attributes["height"] != null) | ||
htmlImgNode.Attributes["height"].Remove(); | ||
|
||
//Create new style attribute to set width? | ||
HtmlAttribute styleAttribute = htmlDoc.CreateAttribute("style"); | ||
styleAttribute.Value = "width:100px"; | ||
htmlImgNode.Attributes.Add(styleAttribute); | ||
|
||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
using System.Threading; | ||
using System.Threading.Tasks; | ||
using DocMaker.Utils; | ||
using Microsoft.Extensions.Hosting; | ||
using Microsoft.Extensions.Logging; | ||
|
||
namespace DocMaker | ||
{ | ||
public class Worker : BackgroundService | ||
{ | ||
private readonly ILogger<Worker> _logger; | ||
|
||
public Worker(ILogger<Worker> logger) | ||
{ | ||
_logger = logger; | ||
} | ||
|
||
protected override async Task ExecuteAsync(CancellationToken stoppingToken) | ||
{ | ||
// string array of URLs *without* any trailing slash | ||
string[] articleUrls = { | ||
"https://wakeupandcode.com/authentication-authorization-in-asp-net-core-razor-pages", | ||
"https://wakeupandcode.com/blazor-full-stack-web-dev-in-asp-net-core", | ||
"https://wakeupandcode.com/cookies-and-consent-in-asp-net-core", | ||
"https://wakeupandcode.com/deploying-asp-net-core-to-azure-app-service", | ||
"https://wakeupandcode.com/ef-core-relationships-in-asp-net-core", | ||
"https://wakeupandcode.com/forms-and-fields-in-asp-net-core", | ||
|
||
"https://wakeupandcode.com/generic-host-builder-in-asp-net-core", | ||
"https://wakeupandcode.com/handling-errors-in-asp-net-core", | ||
"https://wakeupandcode.com/iis-hosting-for-asp-net-core-web-apps", | ||
"https://wakeupandcode.com/javascript-css-html-static-files-in-asp-net-core", | ||
"https://wakeupandcode.com/key-vault-for-asp-net-core-web-apps", | ||
"https://wakeupandcode.com/logging-in-asp-net-core", | ||
|
||
"https://wakeupandcode.com/middleware-in-asp-net-core", | ||
"https://wakeupandcode.com/net-core-3-vs2019-and-csharp-8", | ||
"https://wakeupandcode.com/organizational-accounts-for-asp-net-core", | ||
"https://wakeupandcode.com/production-tips-for-asp-net-core-web-apps", | ||
"https://wakeupandcode.com/query-tags-in-ef-core-for-asp-net-core", | ||
"https://wakeupandcode.com/razor-pages-in-asp-net-core", | ||
|
||
"https://wakeupandcode.com/summarizing-build-2019-signalr-service", | ||
"https://wakeupandcode.com/tag-helper-authoring-in-asp-net-core", | ||
"https://wakeupandcode.com/unit-testing-in-asp-net-core", | ||
"https://wakeupandcode.com/validation-in-asp-net-core", | ||
"https://wakeupandcode.com/worker-service-in-asp-net-core", | ||
"https://wakeupandcode.com/xml-json-serialization-in-asp-net-core", | ||
|
||
"https://wakeupandcode.com/yaml-defined-cicd-for-asp-net-core", | ||
"https://wakeupandcode.com/zero-downtime-web-apps-for-asp-net-core" | ||
}; | ||
|
||
|
||
_logger.LogInformation($"Processing {articleUrls.Length} docs at: {DateTimeOffset.Now}"); | ||
for (var articleCounter = 0; articleCounter < articleUrls.Length; articleCounter++) | ||
{ | ||
_logger.LogInformation($"Making doc {articleCounter + 1} at: {DateTimeOffset.Now}"); | ||
DocEngine.MakeDoc(articleUrls[articleCounter]); | ||
} | ||
_logger.LogInformation($"Completed {articleUrls.Length} docs at: {DateTimeOffset.Now}"); | ||
|
||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
{ | ||
"Logging": { | ||
"LogLevel": { | ||
"Default": "Information", | ||
"Microsoft": "Warning", | ||
"Microsoft.Hosting.Lifetime": "Information" | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
{ | ||
"Logging": { | ||
"LogLevel": { | ||
"Default": "Information", | ||
"Microsoft": "Warning", | ||
"Microsoft.Hosting.Lifetime": "Information" | ||
} | ||
} | ||
} |