Page Menu
Home
DevCentral
Search
Configure Global Search
Log In
Files
F3762421
D309.id728.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
3 KB
Referenced Files
None
Subscribers
None
D309.id728.diff
View Options
diff --git a/page.php b/page.php
--- a/page.php
+++ b/page.php
@@ -328,4 +328,27 @@
unlink($cookie_file);
return $data;
}
+
+ ///
+ /// DATES
+ ///
+
+ function extractYYYYMMDDDateFromURL() {
+ $pattern = "@/([12][0-9]{3})\-([0-9]{2})\-([0-9]{2})/@";
+ if (preg_match($pattern, $this->url, $matches)) {
+ $this->yyyy = $matches[1];
+ $this->mm = $matches[2];
+ $this->dd = $matches[3];
+ }
+ }
+
+ function extractYYYYDDMMateFromURL() {
+ $pattern = "@/([12][0-9]{3})\-([0-9]{2})\-([0-9]{2})/@";
+ if (preg_match($pattern, $this->url, $matches)) {
+ $this->yyyy = $matches[1];
+ $this->mm = $matches[3];
+ $this->dd = $matches[2];
+ }
+ }
+
}
diff --git a/pages/archiveslesoir.php b/pages/archiveslesoir.php
--- a/pages/archiveslesoir.php
+++ b/pages/archiveslesoir.php
@@ -1,25 +1,16 @@
<?php
-require 'lesoir.php';
-
/**
* Represents a page from the http://archives.lesoir.be/ site.
*/
class ArchivesLeSoirPage extends LeSoirPage {
+
/**
- * Analyses the page and extracts metadata
+ * Determines if this is the archive
+ * @return bool always true
*/
- function analyse ($skipSpecificProcessing = false) {
- parent::analyse(true);
-
- $authors = $this->between('<p class="st_signature">', '</p>');
- $date = trim($this->between('<p class="st_date">', '</p>'));
-
- $this->processAuthors($authors);
- $this->processDate($date);
+ function isArchive () {
+ return true;
}
- function get_title () {
- return $this->between('<h3 class="story_title main">', '</h3>');
- }
}
diff --git a/pages/lesoir.php b/pages/lesoir.php
--- a/pages/lesoir.php
+++ b/pages/lesoir.php
@@ -2,17 +2,30 @@
//Page analysis for www.lesoir.be
class LeSoirPage extends Page {
- function analyse ($skipSpecificProcessing = false) {
+
+ use DownloadWithWget;
+
+ /**
+ * Determines if the article belongs to thearchives
+ * @return bool
+ */
+ function isArchive () {
+ return strpos($this->url, "//www.lesoir.be/archives") !== false;
+ }
+
+ function analyse () {
parent::analyse();
- //Hardcoded known info
$this->site = "[[Le Soir]]";
- //Allows to skip the analyis for ArchivesLeSoirPage
- if ($skipSpecificProcessing) {
- return;
+ if ($this->isArchive()) {
+ $this->analyseForArchive();
+ } else {
+ $this->analyseForMainSite();
}
+ }
+ function analyseForMainSite () {
//Gets metadata
$meta = $this->between('<div class="meta">', '</div>');
$authors = trim(self::grab($meta, '<strong>', '</strong>'));
@@ -25,6 +38,17 @@
}
}
+ function analyseForArchive () {
+ $authors = $this->between('st_signature">', '</p>');
+ $this->processAuthors($authors);
+
+ if ($date = trim($this->between('<p class="st_date">', '</p>'))) {
+ $this->processDate($date);
+ } else {
+ $this->extractYYYYMMDDDateFromURL();
+ }
+ }
+
protected function processDate ($date) {
$dateFragments = explode(' ', $date);
if (count($dateFragments) == 4) {
@@ -64,12 +88,32 @@
}
/**
+ * Gets page title for archives sites
+ *
+ * @return string
+ */
+ function getTitleForArchive () {
+ $title = $this->between('<h3 class="story_title main">', '</h3>');
+
+ if ($title === false) {
+ $title = $this->between('<h1>', '</h1>');
+ }
+
+ return $title;
+ }
+
+ /**
* Gets page title
*/
function get_title () {
+ if ($this->isArchive()) {
+ return $this->getTitleForArchive();
+ }
+
if (!$title = $this->meta_tags['og:title']) {
$title = parent::get_title();
}
+
return $title;
}
}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Fri, Nov 22, 09:28 (21 m, 58 s)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2256027
Default Alt Text
D309.id728.diff (3 KB)
Attached To
Mode
D309: Le Soir update
Attached
Detach File
Event Timeline
Log In to Comment