Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce IBlock and ILettersBlock interfaces #864

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ private AltoDocument.AltoGraphicalElement ToAltoGraphicalElement(PdfPath pdfPath
private AltoDocument.AltoIllustration ToAltoIllustration(IPdfImage pdfImage, double height)
{
illustrationCount++;
var rectangle = pdfImage.Bounds;
var rectangle = pdfImage.BoundingBox;

return new AltoDocument.AltoIllustration
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@ private string GetCode(PdfPath path, double pageHeight, bool subPaths, int level
private string GetCode(IPdfImage pdfImage, double pageHeight, int level)
{
imageCount++;
var bbox = pdfImage.Bounds;
var bbox = pdfImage.BoundingBox;
return GetIndent(level) + "<span class='ocr_image' id='image_" + pageCount + "_"
+ imageCount + "' title='" + GetCode(bbox, pageHeight) + "' />";
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@ private PageXmlDocument.PageXmlLineDrawingRegion ToPageXmlLineDrawingRegion(PdfP
private PageXmlDocument.PageXmlImageRegion ToPageXmlImageRegion(IPdfImage pdfImage, PageXmlData data, double pageWidth, double pageHeight)
{
data.RegionsCount++;
var bbox = pdfImage.Bounds;
var bbox = pdfImage.BoundingBox;
return new PageXmlDocument.PageXmlImageRegion()
{
Coords = ToCoords(bbox, pageWidth, pageHeight),
Expand Down
10 changes: 8 additions & 2 deletions src/UglyToad.PdfPig.DocumentLayoutAnalysis/TextBlock.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,13 @@
/// <summary>
/// A block of text.
/// </summary>
public class TextBlock
{
public class TextBlock: ILettersBlock
{
/// <summary>
/// The letters contained in this TextBlock
/// </summary>
public IReadOnlyList<Letter> Letters { get; }

/// <summary>
/// The separator used between lines in the block.
/// </summary>
Expand Down Expand Up @@ -63,6 +68,7 @@ public TextBlock(IReadOnlyList<TextLine> lines, string separator = "\n")
ReadingOrder = -1;

TextLines = lines;
Letters = lines.SelectMany(tl => tl.Words).SelectMany(w => w.Letters).ToList().AsReadOnly();

if (lines.Count == 1)
{
Expand Down
8 changes: 7 additions & 1 deletion src/UglyToad.PdfPig.DocumentLayoutAnalysis/TextLine.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,13 @@
/// <summary>
/// A line of text.
/// </summary>
public class TextLine
public class TextLine : ILettersBlock
{
/// <summary>
/// The letters contained in this TextLine
/// </summary>
public IReadOnlyList<Letter> Letters { get; }

/// <summary>
/// The separator used between words in the line.
/// </summary>
Expand Down Expand Up @@ -56,6 +61,7 @@ public TextLine(IReadOnlyList<Word> words, string separator = " ")
Separator = separator;

Words = words;
Letters = words.SelectMany(w => w.Letters).ToList().AsReadOnly();

if (Words.Count == 1)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ public static IReadOnlyList<PdfRectangle> GetWhitespaces(IEnumerable<Word> words

if (images?.Any() == true)
{
bboxes.AddRange(images.Where(w => w.Bounds.Width > 0 && w.Bounds.Height > 0).Select(o => o.Bounds));
bboxes.AddRange(images.Where(w => w.BoundingBox.Width > 0 && w.BoundingBox.Height > 0).Select(o => o.BoundingBox));
}

return GetWhitespaces(bboxes,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,29 +26,29 @@ public void ImagesHaveCorrectDimensionsAndLocations()
{
var page = document.GetPage(1);

var images = page.GetImages().OrderBy(x => x.Bounds.Width).ToList();
var images = page.GetImages().OrderBy(x => x.BoundingBox.Width).ToList();

var pdfPigSquare = images[0];

Assert.Equal(148.3d, pdfPigSquare.Bounds.Width, doubleComparer);
Assert.Equal(148.3d, pdfPigSquare.Bounds.Height, doubleComparer);
Assert.Equal(60.1d, pdfPigSquare.Bounds.Left, doubleComparer);
Assert.Equal(765.8d, pdfPigSquare.Bounds.Top, doubleComparer);
Assert.Equal(148.3d, pdfPigSquare.BoundingBox.Width, doubleComparer);
Assert.Equal(148.3d, pdfPigSquare.BoundingBox.Height, doubleComparer);
Assert.Equal(60.1d, pdfPigSquare.BoundingBox.Left, doubleComparer);
Assert.Equal(765.8d, pdfPigSquare.BoundingBox.Top, doubleComparer);


var pdfPigSquished = images[1];

Assert.Equal(206.8d, pdfPigSquished.Bounds.Width, doubleComparer);
Assert.Equal(83.2d, pdfPigSquished.Bounds.Height, doubleComparer);
Assert.Equal(309.8d, pdfPigSquished.Bounds.Left, doubleComparer);
Assert.Equal(552.1d, pdfPigSquished.Bounds.Top, doubleComparer);
Assert.Equal(206.8d, pdfPigSquished.BoundingBox.Width, doubleComparer);
Assert.Equal(83.2d, pdfPigSquished.BoundingBox.Height, doubleComparer);
Assert.Equal(309.8d, pdfPigSquished.BoundingBox.Left, doubleComparer);
Assert.Equal(552.1d, pdfPigSquished.BoundingBox.Top, doubleComparer);

var birthdayPigs = images[2];

Assert.Equal(391d, birthdayPigs.Bounds.Width, doubleComparer);
Assert.Equal(267.1d, birthdayPigs.Bounds.Height, doubleComparer);
Assert.Equal(102.2d, birthdayPigs.Bounds.Left, doubleComparer);
Assert.Equal(426.3d, birthdayPigs.Bounds.Top, doubleComparer);
Assert.Equal(391d, birthdayPigs.BoundingBox.Width, doubleComparer);
Assert.Equal(267.1d, birthdayPigs.BoundingBox.Height, doubleComparer);
Assert.Equal(102.2d, birthdayPigs.BoundingBox.Left, doubleComparer);
Assert.Equal(426.3d, birthdayPigs.BoundingBox.Top, doubleComparer);
}
}

Expand Down
2 changes: 2 additions & 0 deletions src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@ public void OnlyExposedApiIsPublic()
"UglyToad.PdfPig.Content.DocumentInformation",
"UglyToad.PdfPig.Content.EmbeddedFile",
"UglyToad.PdfPig.Content.Hyperlink",
"UglyToad.PdfPig.Content.IBoundingBox",
"UglyToad.PdfPig.Content.ILettersBlock",
"UglyToad.PdfPig.Content.InlineImage",
"UglyToad.PdfPig.Content.IPageFactory`1",
"UglyToad.PdfPig.Content.IPdfImage",
Expand Down
2 changes: 1 addition & 1 deletion src/UglyToad.PdfPig.Tests/TestPdfImage.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

public class TestPdfImage : IPdfImage
{
public PdfRectangle Bounds { get; set; }
public PdfRectangle BoundingBox { get; set; }

public int WidthInSamples { get; set; }

Expand Down
14 changes: 7 additions & 7 deletions src/UglyToad.PdfPig.Tests/Writer/PdfDocumentBuilderTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -563,8 +563,8 @@ public void CanWriteSinglePageWithJpeg()

Assert.NotNull(image);

Assert.Equal(expectedBounds.BottomLeft, image.Bounds.BottomLeft);
Assert.Equal(expectedBounds.TopRight, image.Bounds.TopRight);
Assert.Equal(expectedBounds.BottomLeft, image.BoundingBox.BottomLeft);
Assert.Equal(expectedBounds.TopRight, image.BoundingBox.TopRight);

Assert.Equal(imageBytes, image.RawMemory.ToArray());
}
Expand Down Expand Up @@ -609,18 +609,18 @@ public void CanWrite2PagesSharingJpeg()
Assert.Equal(2, page1Images.Count);

var image1 = page1Images[0];
Assert.Equal(expectedBounds1, image1.Bounds);
Assert.Equal(expectedBounds1, image1.BoundingBox);

var image2 = page1Images[1];
Assert.Equal(expectedBounds2, image2.Bounds);
Assert.Equal(expectedBounds2, image2.BoundingBox);

var page2Doc = document.GetPage(2);

var image3 = Assert.Single(page2Doc.GetImages());

Assert.NotNull(image3);

Assert.Equal(expectedBounds3, image3.Bounds);
Assert.Equal(expectedBounds3, image3.BoundingBox);

Assert.Equal(imageBytes, image1.RawMemory.ToArray());
Assert.Equal(imageBytes, image2.RawMemory.ToArray());
Expand Down Expand Up @@ -696,8 +696,8 @@ public void CanWriteSinglePageWithPng()

Assert.NotNull(image);

Assert.Equal(expectedBounds.BottomLeft, image.Bounds.BottomLeft);
Assert.Equal(expectedBounds.TopRight, image.Bounds.TopRight);
Assert.Equal(expectedBounds.BottomLeft, image.BoundingBox.BottomLeft);
Assert.Equal(expectedBounds.TopRight, image.BoundingBox.TopRight);

Assert.True(image.TryGetPng(out var png));
Assert.NotNull(png);
Expand Down
36 changes: 36 additions & 0 deletions src/UglyToad.PdfPig/Content/IBoundingBox.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
namespace UglyToad.PdfPig.Content
{
using UglyToad.PdfPig.Core;

/// <summary>
/// Interface for classes with a bounding box
/// </summary>
public interface IBoundingBox
{
/// <summary>
/// Gets the Bounding Box: The rectangle completely containing this object
/// </summary>
PdfRectangle BoundingBox { get; }
}

/// <summary>
/// Interface for classes with a bounding box and text
/// </summary>
public interface ILettersBlock : IBoundingBox
{
/// <summary>
/// The text of the block
/// </summary>
string Text { get; }

/// <summary>
/// Text orientation of the block.
/// </summary>
TextOrientation TextOrientation { get; }

/// <summary>
/// The letters contained in the Block
/// </summary>
IReadOnlyList<Letter> Letters { get; }
}
}
7 changes: 1 addition & 6 deletions src/UglyToad.PdfPig/Content/IPdfImage.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,8 @@
/// <summary>
/// An image in a PDF document, may be an <see cref="InlineImage"/> or a PostScript image XObject (<see cref="XObjectImage"/>).
/// </summary>
public interface IPdfImage
public interface IPdfImage : IBoundingBox
{
/// <summary>
/// The placement rectangle of the image in PDF coordinates.
/// </summary>
PdfRectangle Bounds { get; }

/// <summary>
/// The width of the image in samples.
/// </summary>
Expand Down
6 changes: 3 additions & 3 deletions src/UglyToad.PdfPig/Content/InlineImage.cs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ public class InlineImage : IPdfImage
private readonly Lazy<ReadOnlyMemory<byte>>? memoryFactory;

/// <inheritdoc />
public PdfRectangle Bounds { get; }
public PdfRectangle BoundingBox { get; }

/// <inheritdoc />
public int WidthInSamples { get; }
Expand Down Expand Up @@ -69,7 +69,7 @@ internal InlineImage(PdfRectangle bounds, int widthInSamples, int heightInSample
DictionaryToken streamDictionary,
ColorSpaceDetails colorSpaceDetails)
{
Bounds = bounds;
BoundingBox = bounds;
WidthInSamples = widthInSamples;
HeightInSamples = heightInSamples;
Decode = decode;
Expand Down Expand Up @@ -124,7 +124,7 @@ public bool TryGetBytesAsMemory(out ReadOnlyMemory<byte> bytes)
/// <inheritdoc />
public override string ToString()
{
return $"Inline Image (w {Bounds.Width}, h {Bounds.Height})";
return $"Inline Image (w {BoundingBox.Width}, h {BoundingBox.Height})";
}
}
}
13 changes: 9 additions & 4 deletions src/UglyToad.PdfPig/Content/Letter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@
/// <summary>
/// A glyph or combination of glyphs (characters) drawn by a PDF content stream.
/// </summary>
public class Letter
{
public class Letter : IBoundingBox
{
/// <summary>
/// The text for this letter or unicode character.
/// </summary>
public string Value { get; }
public string Value { get; }

/// <summary>
/// Text orientation of the letter.
Expand Down Expand Up @@ -44,7 +44,12 @@ public class Letter
/// For example letters with descenders, p, j, etc., will have a box extending below the <see cref="Location"/> they are placed at.
/// The width of the glyph may also be more or less than the <see cref="Width"/> allocated for the character in the PDF content.
/// </summary>
public PdfRectangle GlyphRectangle { get; }
public PdfRectangle GlyphRectangle { get; }

/// <summary>
/// Gets the Bounding Box: The rectangle completely containing this object. Same as <see cref="GlyphRectangle"/>
/// </summary>
public PdfRectangle BoundingBox => GlyphRectangle;

/// <summary>
/// Size as defined in the PDF file. This is not equivalent to font size in points but is relative to other font sizes on the page.
Expand Down
2 changes: 1 addition & 1 deletion src/UglyToad.PdfPig/Content/Word.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
/// <summary>
/// A word.
/// </summary>
public class Word
public class Word : ILettersBlock
{
/// <summary>
/// The text of the word.
Expand Down
6 changes: 3 additions & 3 deletions src/UglyToad.PdfPig/XObjects/XObjectImage.cs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ public class XObjectImage : IPdfImage
private readonly Lazy<ReadOnlyMemory<byte>>? memoryFactory;

/// <inheritdoc />
public PdfRectangle Bounds { get; }
public PdfRectangle BoundingBox { get; }

/// <inheritdoc />
public int WidthInSamples { get; }
Expand Down Expand Up @@ -81,7 +81,7 @@ internal XObjectImage(PdfRectangle bounds,
Lazy<ReadOnlyMemory<byte>>? bytes,
ColorSpaceDetails? colorSpaceDetails)
{
Bounds = bounds;
BoundingBox = bounds;
WidthInSamples = widthInSamples;
HeightInSamples = heightInSamples;
BitsPerComponent = bitsPerComponent;
Expand Down Expand Up @@ -116,7 +116,7 @@ public bool TryGetBytesAsMemory(out ReadOnlyMemory<byte> bytes)
/// <inheritdoc />
public override string ToString()
{
return $"XObject Image (w {Bounds.Width}, h {Bounds.Height}): {ImageDictionary}";
return $"XObject Image (w {BoundingBox.Width}, h {BoundingBox.Height}): {ImageDictionary}";
}
}
}
Loading