diff --git a/src/BingImageDownload/BingInteractionAndParsing.cs b/src/BingImageDownload/BingInteractionAndParsing.cs index 77acb64..ebb3c34 100644 --- a/src/BingImageDownload/BingInteractionAndParsing.cs +++ b/src/BingImageDownload/BingInteractionAndParsing.cs @@ -6,7 +6,7 @@ using System.Linq; using System.Net; using System.Threading; -using System.Xml; +using System.Xml.Linq; namespace BingImageDownload { @@ -53,53 +53,52 @@ internal void GetBingImages(CancellationToken cancellationToken) consoleWriter.WriteLine($"Searching for images for {country.Name} - {country.DisplayName}"); var countryImages = 0; var countryDuplicateImages = 0; - var currentIndex = 0; var moreImages = true; - var startDate = string.Empty; - var endDate = string.Empty; + var datePairs = new Dictionary<(string start, string end), XElement>(); while (moreImages) { - var xmlNodeList = GetImages(currentIndex, country.Name); + var imageNodes = GetImages(datePairs.Count, country.Name); - if (xmlNodeList == null) + if (!imageNodes.Any()) { moreImages = false; } else { - foreach (XmlNode xmlNode in xmlNodeList) + foreach (var imageNode in imageNodes) { - var nodeStartDate = xmlNode.SelectSingleNode("startdate")?.InnerText; - var nodeEndDate = xmlNode.SelectSingleNode("enddate")?.InnerText; + var startDate = imageNode.Element("startdate")?.Value; + var endDate = imageNode.Element("enddate")?.Value; - if (startDate == nodeStartDate && endDate == nodeEndDate) + if (datePairs.Any(x => x.Key.start == startDate && x.Key.end == endDate)) { moreImages = false; - break; + continue; } - startDate = nodeStartDate; - endDate = nodeEndDate; - var imageUrl = $"{Url}{xmlNode.SelectSingleNode("urlBase")?.InnerText}_1920x1080.jpg"; - consoleWriter.WriteLine(1, $"Image for: '{country.Name}' on {startDate}-{endDate} index {currentIndex} was: {imageUrl}"); - try - { - if (DownloadAndSaveImage(xmlNode)) - { - countryImages++; - } - else - { - countryDuplicateImages++; - } - } - catch (Exception ex) - { - consoleWriter.WriteLine("There was an error getting image", ex); - } + datePairs.Add((startDate, endDate), imageNode); } + } + } - currentIndex += 1; + foreach (var ((startDate, endDate), imageNode) in datePairs.OrderBy(x => x.Key.start)) + { + var imageUrl = $"{Url}{imageNode.Element("urlBase")?.Value}_1920x1080.jpg"; + consoleWriter.WriteLine(1, $"Image for: '{country.Name}' on {startDate}-{endDate} was: {imageUrl}"); + try + { + if (DownloadAndSaveImage(imageNode, imageUrl)) + { + countryImages++; + } + else + { + countryDuplicateImages++; + } + } + catch (Exception ex) + { + consoleWriter.WriteLine("There was an error getting image", ex); } } @@ -112,28 +111,25 @@ internal void GetBingImages(CancellationToken cancellationToken) consoleWriter.WriteLine($"Found {downloadedImages} new images"); } - internal bool DownloadAndSaveImage(XmlNode xmlNode) + internal bool DownloadAndSaveImage(XElement imageNode, string imageUrl) { - var fileUrl = $"{Url}{xmlNode.SelectSingleNode("urlBase")?.InnerText}_1920x1080.jpg"; - if (urlsRetrieved.Contains(fileUrl)) + if (urlsRetrieved.Contains(imageUrl)) { consoleWriter.WriteLine(2, "Already Downloaded Image URL"); return false; } - var filePath = Path.Combine(paths.SavePath, GetFileName(xmlNode)); + var filePath = Path.Combine(paths.SavePath, GetFileName(imageUrl)); var tempFilename = Path.Combine(paths.DownloadPath, Guid.NewGuid() + ".jpg"); try { - using (var client = new WebClient()) - { - client.DownloadFile(fileUrl, tempFilename); - } + using var client = new WebClient(); + client.DownloadFile(imageUrl, tempFilename); } catch (Exception e) { - consoleWriter.WriteLine(2, $"Error downloading image from url: {fileUrl}", e); + consoleWriter.WriteLine(2, $"Error downloading image from url: {imageUrl}", e); return false; } @@ -145,7 +141,7 @@ internal bool DownloadAndSaveImage(XmlNode xmlNode) consoleWriter.WriteLine(3, "Found New Image"); using (var srcImg = Image.Load(tempFilename)) { - imagePropertyHandling.SetTitleOnImage(xmlNode, srcImg); + imagePropertyHandling.SetImageExifTags(imageNode, srcImg); srcImg.Save(filePath); } imageHashing.AddHash(filePath); @@ -155,17 +151,15 @@ internal bool DownloadAndSaveImage(XmlNode xmlNode) consoleWriter.WriteLine(3, "Identical Image Downloaded"); } - urlsRetrieved.Add(fileUrl); + urlsRetrieved.Add(imageUrl); + SaveUrlBin(); File.Delete(tempFilename); return newImage; } - internal string GetFileName(XmlNode xmlNode) + internal string GetFileName(string imageUrl) { - var nameNode = xmlNode.SelectSingleNode("urlBase"); - if (nameNode == null) throw new Exception("Missing urlBase Node"); - - var name = nameNode.InnerText.Substring(7); + var name = imageUrl.Substring(7 + Url.Length); if (name.Contains("_")) { name = name.Substring(0, name.IndexOf("_", StringComparison.Ordinal)); @@ -181,33 +175,30 @@ internal string GetFileName(XmlNode xmlNode) return Path.GetInvalidFileNameChars().Aggregate(name, (current, invalidChar) => current.Replace(invalidChar, '-')); } - internal XmlNodeList GetImages(int currentIndex, string country) + internal List GetImages(int currentIndex, string country) { - var urlToLoad = $"{Url}/HPImageArchive.aspx?format=xml&idx={currentIndex}&n=1&mkt={country}"; + var urlToLoad = $"{Url}/HPImageArchive.aspx?format=xml&idx={currentIndex}&n=5&mkt={country}"; try { - using (var client = new WebClient()) + using var client = new WebClient(); + var output = client.DownloadString(urlToLoad); + if (output.Length > 0 && output.Contains("")) { - var output = client.DownloadString(urlToLoad); - if (output.Length > 0 && output.Contains("")) + try { - try - { - var xmlDocument = new XmlDocument(); - xmlDocument.LoadXml(output); + var xDocument = XDocument.Parse(output); - return xmlDocument.GetElementsByTagName("image"); - } - catch (Exception e) - { - consoleWriter.WriteLine("Error getting images from XML response", e); - return null; - } + return xDocument.Descendants("image").ToList(); + } + catch (Exception e) + { + consoleWriter.WriteLine("Error getting images from XML response", e); + return null; } - - return null; } + + return null; } catch (Exception e) { @@ -216,7 +207,7 @@ internal XmlNodeList GetImages(int currentIndex, string country) } } - internal void SaveUrlBin() + private void SaveUrlBin() { serializer.Serialize(urlsRetrieved, urlsRetrievedBinFile); } diff --git a/src/BingImageDownload/HistogramHash.cs b/src/BingImageDownload/HistogramHash.cs index 72ccc1a..743b76f 100644 --- a/src/BingImageDownload/HistogramHash.cs +++ b/src/BingImageDownload/HistogramHash.cs @@ -1,3 +1,4 @@ +using System; using System.Collections.Generic; using System.IO; using System.Linq; @@ -7,32 +8,44 @@ namespace BingImageDownload public class HistogramHash { public string FileName { get; } - public List Rgba { get; } + public List Rgb { get; } - public HistogramHash(string fileName, List rgba) + public HistogramHash(string fileName, List rgb) { FileName = fileName; - Rgba = rgba; + Rgb = rgb; } internal bool IsInvalid(Paths paths) { if (string.IsNullOrWhiteSpace(FileName)) return true; if (!File.Exists(Path.Combine(paths.SavePath, FileName)) && !File.Exists(Path.Combine(paths.ArchivePath, FileName))) return true; - if (Rgba == null || !Rgba.Any()) return true; + if (Rgb == null || !Rgb.Any()) return true; return false; } internal bool Equal(HistogramHash other) { - foreach (var val in Rgba) + var differencesOverTolerance = 0f; + + foreach (var val in Rgb) { - var otherVal = other.Rgba.FirstOrDefault(x => x.X.Equals(val.X) && x.Y.Equals(val.Y)); + var otherVal = other.Rgb.FirstOrDefault(x => x.X.Equals(val.X) && x.Y.Equals(val.Y)); if (otherVal == null) return false; - if (!val.RgbaValue.Equals(otherVal.RgbaValue)) return false; + + var differenceR = Math.Abs(val.R - otherVal.R); + var differenceG = Math.Abs(val.G - otherVal.G); + var differenceB = Math.Abs(val.B - otherVal.B); + + if (differenceR > 3 || differenceG > 3 || differenceB > 3) + { + differencesOverTolerance++; + } } - return true; + var differencePercent = differencesOverTolerance / Rgb.Count * 100; + + return differencePercent < 1; } } } diff --git a/src/BingImageDownload/ImageHashing.cs b/src/BingImageDownload/ImageHashing.cs index f9a6fca..a95bf5c 100644 --- a/src/BingImageDownload/ImageHashing.cs +++ b/src/BingImageDownload/ImageHashing.cs @@ -45,7 +45,7 @@ internal bool ImageInHash(string tempFilename, string realFileName) { if (HaveFilePathInHashTable(realFileName)) return true; - var testHash = GetRgbaHistogramHash(tempFilename); + var testHash = GetRgbHistogramHash(tempFilename); return histogramHashTable.Any(hash => hash.Equal(testHash)); } @@ -55,14 +55,19 @@ internal bool HaveFilePathInHashTable(string filePath) return histogramHashTable.Any(x => x.FileName.Equals(fileName, StringComparison.InvariantCultureIgnoreCase)); } - internal void AddHash(string filePath) + internal void AddHash(string filePath, bool saveHashTable = true) { if (HaveFilePathInHashTable(filePath)) return; - histogramHashTable.Add(GetRgbaHistogramHash(filePath)); + histogramHashTable.Add(GetRgbHistogramHash(filePath)); + + if (saveHashTable) + { + SaveHashTableBin(); + } } - internal void SaveHashTableBin() + private void SaveHashTableBin() { serializer.Serialize(histogramHashTable, histogramBinFile); } @@ -81,13 +86,13 @@ private void HashExistingImages(int retryCount = 0) foreach (var file in Directory.GetFiles(paths.SavePath, "*.jpg").Where(x => !HaveFilePathInHashTable(x))) { consoleWriter.WriteLine($"Hashing file: {file}"); - AddHash(file); + AddHash(file, false); } foreach (var file in Directory.GetFiles(paths.ArchivePath, "*.jpg").Where(x => !HaveFilePathInHashTable(x))) { consoleWriter.WriteLine($"Hashing file: {file}"); - AddHash(file); + AddHash(file, false); } } catch (Exception) @@ -103,30 +108,32 @@ private void HashExistingImages(int retryCount = 0) } } - private HistogramHash GetRgbaHistogramHash(string filePath) + private HistogramHash GetRgbHistogramHash(string filePath) { var histogramFile = Path.Combine(paths.HistogramPath, Guid.NewGuid() + ".jpg"); File.Copy(filePath, histogramFile); - var rgba = new List(); + var Rgb = new List(); var fileName = Path.GetFileName(filePath); - using (var image = Image.Load(histogramFile)) + using (var image = Image.Load(histogramFile)) { - image.Mutate(x => x.Resize(new Size(32))); + //Scale down from 1920*1080 to 48*27 - this will pixelate but enough to tell differences. + //This means 1296 total pixels rather than 2073600. + image.Mutate(x => x.Resize(48, 27).Grayscale()); for (var x = 0; x < image.Width; x++) { for (var y = 0; y < image.Height; y++) { var pixel = image[x, y]; - rgba.Add(new RgbaPixelData(x, y, pixel.Rgba)); + Rgb.Add(new RgbPixelData(x, y, pixel.R, pixel.G, pixel.B)); } } } File.Delete(histogramFile); - return new HistogramHash(fileName, rgba); + return new HistogramHash(fileName, Rgb); } } } diff --git a/src/BingImageDownload/ImagePropertyHandling.cs b/src/BingImageDownload/ImagePropertyHandling.cs index ceddc7c..1f0185c 100644 --- a/src/BingImageDownload/ImagePropertyHandling.cs +++ b/src/BingImageDownload/ImagePropertyHandling.cs @@ -2,18 +2,18 @@ using SixLabors.ImageSharp.Metadata.Profiles.Exif; using System; using System.Text; -using System.Xml; +using System.Xml.Linq; namespace BingImageDownload { internal class ImagePropertyHandling { - internal void SetTitleOnImage(XmlNode xmlNode, Image image) + internal void SetImageExifTags(XElement imageNode, Image image) { - var copyright = xmlNode.SelectSingleNode("copyright")?.InnerText; + var copyright = imageNode.Element("copyright")?.Value; var title = copyright; var author = string.Empty; - var headline = xmlNode.SelectSingleNode("headline")?.InnerText; + var headline = imageNode.Element("headline")?.Value; if (copyright != null && copyright.Contains("©")) { @@ -33,7 +33,7 @@ void SetPropertyItemString(ExifTag tag, string value) SetPropertyItemString(ExifTag.XPTitle, title); SetPropertyItemString(ExifTag.XPAuthor, author); - SetPropertyItemString(ExifTag.XPComment, $"Bing Image '{headline}' For {xmlNode.SelectSingleNode("startdate")?.InnerText}-{xmlNode.SelectSingleNode("enddate")?.InnerText}"); + SetPropertyItemString(ExifTag.XPComment, $"Bing Image '{headline}' For {imageNode.Element("startdate")?.Value}-{imageNode.Element("enddate")?.Value}"); SetPropertyItemString(ExifTag.XPKeywords, DateTime.Now.ToShortDateString()); } } diff --git a/src/BingImageDownload/RgbPixelData.cs b/src/BingImageDownload/RgbPixelData.cs new file mode 100644 index 0000000..c0bd2e5 --- /dev/null +++ b/src/BingImageDownload/RgbPixelData.cs @@ -0,0 +1,20 @@ +namespace BingImageDownload +{ + public class RgbPixelData + { + public byte R { get; } + public byte G { get; } + public byte B { get; } + public int X { get; } + public int Y { get; } + + public RgbPixelData(int x, int y, byte r, byte g, byte b) + { + R = r; + G = g; + B = b; + X = x; + Y = y; + } + } +} diff --git a/src/BingImageDownload/RgbaPixelData.cs b/src/BingImageDownload/RgbaPixelData.cs deleted file mode 100644 index 6c78839..0000000 --- a/src/BingImageDownload/RgbaPixelData.cs +++ /dev/null @@ -1,16 +0,0 @@ -namespace BingImageDownload -{ - public class RgbaPixelData - { - public int X { get; } - public int Y { get; } - public uint RgbaValue { get; } - - public RgbaPixelData(int x, int y, uint rgbaValue) - { - X = x; - Y = y; - RgbaValue = rgbaValue; - } - } -} diff --git a/src/BingImageDownload/Runner.cs b/src/BingImageDownload/Runner.cs index 9986fcb..5e49702 100644 --- a/src/BingImageDownload/Runner.cs +++ b/src/BingImageDownload/Runner.cs @@ -33,9 +33,9 @@ public static int Start(RunnerArgs runnerArgs, in CancellationToken cancellation } finally { - Thread.Sleep(TimeSpan.FromSeconds(30)); - bingInteractionAndParsing.SaveUrlBin(); - imageHashing.SaveHashTableBin(); + consoleWriter.WriteLine("Done, waiting 15 seconds before clearing up"); + Thread.Sleep(TimeSpan.FromSeconds(15)); + consoleWriter.WriteLine("Clearing up"); fileClearer.ArchiveOldImages(); fileClearer.ClearLogFiles(); fileClearer.ClearTempFolders();