Skip to content

Commit

Permalink
feat: support new site event pages with and without divs
Browse files Browse the repository at this point in the history
Signed-off-by: Sun Seng David TAN <[email protected]>
  • Loading branch information
sunix committed Nov 6, 2023
1 parent e59a134 commit 1da9fed
Show file tree
Hide file tree
Showing 8 changed files with 844 additions and 1,818 deletions.
126 changes: 114 additions & 12 deletions src/main/java/org/parisjug/eventpublisher/eventpage/HtmlEventPage.java
Original file line number Diff line number Diff line change
Expand Up @@ -47,26 +47,121 @@ public String getDetails() {
}

public String getBuffet() {
return doc.select("#buffet").first().html().replaceAll("href=\"/", "href=\"https://www.parisjug.org/");
Elements buffet = doc.select("#buffet");
if (buffet.isEmpty()) {
Elements detailh3 = doc.select("#détails");
if(detailh3.isEmpty()) {
return "";
}
String buffethtml = "";
Elements elements = detailh3.parents().first().children();
// for each element, in elements.stream() start at h3 with id contains buffet and append html until next h3
boolean start = false;
for(int i = 0; i < elements.size(); i++) {
if(elements.get(i).tagName().equals("h3") && elements.get(i).id().contains("buffet")) {
start = true;
}
if(elements.get(i).tagName().equals("h3") && !elements.get(i).id().contains("buffet")) {
start = false;
continue;
}
if(start) {
buffethtml += elements.get(i).outerHtml();
}
}
return buffethtml.replaceAll("href=\"/", "href=\"https://www.parisjug.org/");
}
return buffet.first().html().replaceAll("href=\"/", "href=\"https://www.parisjug.org/");
}

@Override
public String getPart1() {
return doc.select("#part1").first().html().replaceAll("href=\"/", "href=\"https://www.parisjug.org/");
Elements part1 = doc.select("#part1");
if(part1.isEmpty()) {
Elements detailh3 = doc.select("#détails");
if(detailh3.isEmpty()) {
return "";
}
String part1html = "";
Elements elements = detailh3.parents().first().children();
// for each element, in elements.stream() start at h3 with id détail and append html until next h3
boolean start = false;
for(int i = 0; i < elements.size(); i++) {
if(elements.get(i).tagName().equals("h2") && elements.get(i).id().equals("détails")) {
start = true;
continue;
}
if(elements.get(i).tagName().equals("h2") && !elements.get(i).id().equals("détails")) {
start = false;
continue;
}
if(elements.get(i).tagName().equals("h3") && elements.get(i).id().contains("buffet")) {
start = false;
continue;
}
if(start) {
part1html += elements.get(i).outerHtml();
}
}
return part1html.replaceAll("href=\"/", "href=\"https://www.parisjug.org/");

}
return part1.first().html().replaceAll("href=\"/", "href=\"https://www.parisjug.org/");
}

@Override
public String getPart2() {
Elements part2 = doc.select("#part2");
if (part2.isEmpty()) {
return "";
Elements detailh3 = doc.select("#détails");
if(detailh3.isEmpty()) {
return "";
}
String part2html = "";
Elements elements = detailh3.parents().first().children();
// for each element, in elements.stream() start at h3 with id contains buffet and append html until next h3 with id contains "3ème-mi-temps"
boolean start = false;
boolean buffet = false;
for(int i = 0; i < elements.size(); i++) {
if(elements.get(i).tagName().equals("h3") && buffet) {
start = true;
}
if(elements.get(i).tagName().equals("h3") && elements.get(i).id().contains("buffet")) {
buffet = true;
continue;
}
if(elements.get(i).tagName().equals("h3") && elements.get(i).id().contains("3ème-mi-temps")) {
start = false;
continue;
}
if(start) {
part2html += elements.get(i).outerHtml();
}
}
return part2html.replaceAll("href=\"/", "href=\"https://www.parisjug.org/");
}
return part2.first().html().replaceAll("href=\"/", "href=\"https://www.parisjug.org/");
}

@Override
public String getDateTime() {
return doc.select("#datetime").first().text();
Elements dateTimeElement = doc.select("#datetime");
if (dateTimeElement.isEmpty()) {
// in the section starting with h2 id="date-et-lieu", get the first ul li element
Elements elements = doc.select("#date-et-lieu").parents().first().children();
for(int i = 0; i < elements.size(); i++) {
if(elements.get(i).tagName().equals("ul")) {
Elements lis = elements.get(i).children();
for(int j = 0; j < lis.size(); j++) {
if(lis.get(j).tagName().equals("li")) {
return lis.get(j).text();
}
}
}
}

}
return dateTimeElement.first().text();
}

@Override
Expand All @@ -88,7 +183,7 @@ public String getEndTime() {
if (isVirtual()) {
eventDateTime = eventDateTime.plusMinutes(75);
} else {
eventDateTime = eventDateTime.plusMinutes(165);
eventDateTime = eventDateTime.plusMinutes(180);
}
return eventDateTime.format(DateTimeFormatter.ISO_INSTANT).replace(":", "").replace("-", "");
}
Expand All @@ -109,7 +204,19 @@ public String getLongTitle() {

@Override
public String getLocation() {
String attr = doc.select("#location a").first().attr("href");
Elements locationElement = doc.select("#location a");
if (locationElement.isEmpty()) {
// in the section starting with h2 id="date-et-lieu", get the second li element
Elements elements = doc.select("#date-et-lieu").parents().first().getElementsByTag("li");
if(elements.size() > 1) {
locationElement = elements.get(1).getElementsByTag("a");
}
else {
return "";
}
}

String attr = locationElement.first().attr("href");
if (attr.startsWith("/")) {
attr = "https://www.parisjug.org" + attr;
}
Expand Down Expand Up @@ -141,12 +248,7 @@ public String getIntro() {

@Override
public boolean isVirtual() {
if (doc.select("#location").first().text().contains("Dans les locaux de notre chaîne")) {
return true;
}
;

return false;
return getTitle().contains("Soirée Virtuelle");
}

}
108 changes: 74 additions & 34 deletions src/test/java/org/parisjug/eventpublisher/eventpage/EventPageTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,33 +7,33 @@

import java.io.File;

import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;

import io.quarkus.test.junit.QuarkusTest;

@QuarkusTest
public class EventPageTest {
class EventPageTest {

@Test
public void test_working_page() {
File file = new File(this.getClass().getResource("parisjug-20201208.html").getFile());
void test_virtual_event_page() {
File file = new File(this.getClass().getResource("newsite_parisjug-20201208.html").getFile());

EventPage page = EventPage.fromHtmlLocalFile(file);
// EventPage page =
// EventPage.fromUrl("https://www.parisjug.org/xwiki/wiki/oldversion/view/Meeting/20201208");
assertNotNull(page, "should be able to load from a xwiki html file");
assertNotNull(page, "should be able to load from a HUGO html file");

// title
assertEquals("Le Java nouveau est arrivé : Java SE 15", page.getTitle(), "Title from page");
assertEquals("Soirée Virtuelle : Le Java nouveau est arrivé : Java SE 15", page.getTitle(), "Title from page");

String details = page.getDetails();

// details
assertTrue(page.getDetails().contains("<strong>18h45 à 19h00 : Accueil</strong>"));
assertFalse(page.getDetails().contains("Code de Conduite"));
assertTrue(details.contains("18h45 à 19h00 : Accueil</h3>"));
assertTrue(details.contains("Jean-Michel"));
assertFalse(details.contains("Replays"));
assertTrue(
page.getDetails()
.contains("https://www.parisjug.org/xwiki/wiki/oldversion/view/Speaker/DoudouxJeanMichel"),
"should contain link https://www.parisjug.org/xwiki/wiki/oldversion/view/Speaker/DoudouxJeanMichel");
.contains("https://www.parisjug.org/speakers/jean-michel-doudoux"),
"should contain link https://www.parisjug.org/speakers/jean-michel-doudoux");

// is virtual ?
assertTrue(page.isVirtual(), "should be virtual");
Expand All @@ -56,58 +56,98 @@ public void test_working_page() {
}

@Test
public void should_provide_clear_error_if_missing_title() {
File file = new File(this.getClass().getResource("parisjug-20201208_missing_title.html").getFile());
public void test_inRealLifeEvent_with_div_ids() {
File file = new File(this.getClass().getResource("parisjug-iRL-div-id-20231114.html").getFile());

EventPage page = EventPage.fromHtmlLocalFile(file);
assertNotNull(page, "should be able to load from a HUGO html file");

EventPageCheckException ex = Assertions.assertThrows(EventPageCheckException.class, () -> {
page.getTitle();
});
assertEquals(
"The page should contain an element with the id \"title\". For instance: <div id=\"title\">Quarkus World Tour</div>.",
ex.getMessage());
// title
assertEquals("Soirée Loom", page.getTitle(), "Title from page");

}
// details
String details = page.getDetails();
assertTrue(details.contains("19h00 : Accueil"));
assertFalse(page.getDetails().contains("Code de Conduite"));
assertTrue(
page.getDetails()
.contains("https://www.parisjug.org/speakers/david-pequegnot"),
"should contain link https://www.parisjug.org/speakers/david-pequegnot");

// date time
assertEquals("Mardi 14 Novembre 2023 à 19h00", page.getDateTime(), "Date and time");

// is virtual ?
assertFalse(page.isVirtual(), "should be in Real life");

// start time
assertEquals("20231114T180000Z", page.getStartTime(), "start time");

// end time
assertEquals("20231114T210000Z", page.getEndTime(), "end time");

// long title
assertEquals("Paris JUG - Soirée Loom (2023/11/14)",
page.getLongTitle(), "Long title");

// location
assertEquals("https://www.parisjug.org/location/sfeir", page.getLocation(), "location");

}

@Test
public void test_inRealLifeEvent() {
File file = new File(this.getClass().getResource("parisjug-iRL-20211214.html").getFile());
File file = new File(this.getClass().getResource("parisjug-iRL-20230110-yb.html").getFile());

EventPage page = EventPage.fromHtmlLocalFile(file);
// EventPage page =
// EventPage.fromUrl("https://www.parisjug.org/xwiki/wiki/oldversion/view/Meeting/20201208");
assertNotNull(page, "should be able to load from a xwiki html file");
assertNotNull(page, "should be able to load from a HUGO html file");

// title
assertEquals("Développe dans ton cloud pour le cloud - Gitpod et Eclipse Che", page.getTitle(), "Title from page");
assertEquals("Soirée Young Blood X", page.getTitle(), "Title from page");

// assert page.getPart1 should contains "19h30 : 204VS404 le duel du bon code http" but not "21h20 : Tech Lead REX"
assertTrue(page.getPart1().contains("19h30 : 204VS404 le duel du bon code http"), "part1 should contains 19h30 : 204VS404 le duel du bon code http");
assertFalse(page.getPart1().contains("21h20 : Tech Lead REX"), "part1 should not contains 21h20 : Tech Lead REX");
assertFalse(page.getPart1().contains("20h30 à 21h00 : Buffet"), "part1 should not contains 20h30 à 21h00 : Buffet");

// assert page.getPart2 should contains "21h20 : Tech Lead REX" but not "19h30 : 204VS404 le duel du bon code http"
assertTrue(page.getPart2().contains("21h20 : Tech Lead REX"), "part2 should contains 21h20 : Tech Lead REX");
assertFalse(page.getPart2().contains("19h30 : 204VS404 le duel du bon code http"), "part2 should not contains 19h30 : 204VS404 le duel du bon code http");
assertFalse(page.getPart2().contains("figure"), "part2 should not contains figure (part of buffet)");


// details
assertTrue(page.getDetails().contains("<strong>19h15 à 19h30 : Accueil</strong>"));
String details = page.getDetails();
assertTrue(details.contains("18h45 à 19h00: Accueil"));
assertTrue(details.contains("20h30 à 21h00 : Buffet"), "details should contains 20h30 à 21h00 : Buffet");
assertFalse(page.getDetails().contains("Code de Conduite"));
assertTrue(
page.getDetails()
.contains("https://www.parisjug.org/xwiki/wiki/oldversion/view/Speaker/HoracioGonzalez"),
"should contain link https://www.parisjug.org/xwiki/wiki/oldversion/view/Speaker/HoracioGonzalez");
.contains("Pierre Cheucle"),
"should contain speaker Pierre Cheucle");

// date time
assertEquals("Mardi 14 décembre 2021 à 19h15", page.getDateTime(), "Date and time");
assertEquals("Mardi 10 janvier 2023 à 19h00", page.getDateTime(), "Date and time");

// is virtual ?
assertFalse(page.isVirtual(), "should be in Real life");

// start time
assertEquals("20211214T181500Z", page.getStartTime(), "start time");
assertEquals("20230110T180000Z", page.getStartTime(), "start time");

// end time
assertEquals("20211214T210000Z", page.getEndTime(), "end time");
assertEquals("20230110T210000Z", page.getEndTime(), "end time");

// long title
assertEquals("Paris JUG - Soirée en présentiel : Développe dans ton cloud pour le cloud - Gitpod et Eclipse Che (2021/12/14)",
assertEquals("Paris JUG - Soirée Young Blood X (2023/01/10)",
page.getLongTitle(), "Long title");

// location
assertEquals("https://www.parisjug.org/xwiki/wiki/oldversion/view/Location/Datadog", page.getLocation(), "location");
assertEquals("https://www.parisjug.org/location/agorapulse/", page.getLocation(), "location");





}
}
Loading

0 comments on commit 1da9fed

Please sign in to comment.