Page MenuHomeDevCentral

No OneTemporary

diff --git a/pages/archiveslesoir.php b/pages/archiveslesoir.php
new file mode 100644
index 0000000..04e4e51
--- /dev/null
+++ b/pages/archiveslesoir.php
@@ -0,0 +1,25 @@
+<?php
+
+require 'lesoir.php';
+
+/**
+ * Represents a page from the http://archives.lesoir.be/ site.
+ */
+class ArchivesLeSoirPage extends LeSoirPage {
+ /**
+ * Analyses the page and extracts metadata
+ */
+ function analyse ($skipSpecificProcessing = false) {
+ parent::analyse(true);
+
+ $authors = $this->between('<p class="st_signature">', '</p>');
+ $date = trim($this->between('<p class="st_date">', '</p>'));
+
+ $this->processAuthors($authors);
+ $this->processDate($date);
+ }
+
+ function get_title () {
+ return $this->between('<h3 class="story_title main">', '</h3>');
+ }
+}
diff --git a/pages/index.dat b/pages/index.dat
index 443005f..72ba763 100644
--- a/pages/index.dat
+++ b/pages/index.dat
@@ -1,13 +1,13 @@
-http://archives.lesoir.be/ LeSoir
+http://archives.lesoir.be/ ArchivesLeSoir
http://www.chroniquesautomatiques.com/ LesChroniquesAutomatiques
http://www.erudit.org/ Erudit
http://www.jstor.org/ JSTOR
http://www.lalibre.be/ LaLibreBelgique
http://www.lefigaro.fr/ LeFigaro
http://www.lemonde.fr/ LeMonde
http://www.lesoir.be/ LeSoir
http://www.nytimes.com/ NewYorkTimes
http://www.persee.fr/ Persee
http://www.rue89.com/ Rue89
http://www.tandfonline.com TaylorAndFrancis
http://www.worldcat.org/ WorldCat
diff --git a/pages/lesoir.php b/pages/lesoir.php
index 3cb00e7..e2364da 100644
--- a/pages/lesoir.php
+++ b/pages/lesoir.php
@@ -1,60 +1,75 @@
<?php
//Page analysis for www.lesoir.be
class LeSoirPage extends Page {
- function analyse () {
+ function analyse ($skipSpecificProcessing = false) {
parent::analyse();
//Hardcoded known info
- $this->site = "Le Soir";
- $this->skipYMD = true;
-
- //Gets date
- //meta tag 'archi_id' has t-YYYYMMDD-HHMMhh as format (where hh = AM/PM)
- // e.g. t-20120722-0211PM
- $yyyy = substr($this->meta_tags['archi_id'], 2, 4);
- $mm = substr($this->meta_tags['archi_id'], 6, 2);
- $dd = substr($this->meta_tags['archi_id'], 8, 2);
- $this->unixtime = mktime(0, 0, 0, $mm, $dd, $yyyy);
- $this->date = strftime(LONG_DATE_FORMAT, $this->unixtime);
-
- //Gets author
- $authors = self::between('st_signature">', '</p>');
-
- if ($authors == "R&#233;daction en ligne") {
- $this->skipAuthor = true;
- } else {
- require_once('helpers/namecase.php');
-
- //Some Le Soir articles use firstname name, others name,firstname.
- //When there are several authors, ' ;' is the separator.
- //Authors are in uppercase, so we need to clean case.
-
- $authors = explode('; ', $authors);
- $start = true;
-
- foreach ($authors as $author) {
- if (strpos($author, ',') !== false) {
- $name = explode(',', $author, 2);
- $author = $name[1] . ' ' . $name[0];
- }
- $author = name_case($author);
- if ($start) {
- $this->author = name_case($author);
- $start = false;
- } else {
- $this->coauthors[] = name_case($author);
- }
+ $this->site = "[[Le Soir]]";
+
+ //Allows to skip the analyis for ArchivesLeSoirPage
+ if ($skipSpecificProcessing) {
+ return;
+ }
+
+ //Gets metadata
+ $meta = $this->between('<div class="meta">', '</div>');
+ $authors = trim(self::grab($meta, '<strong>', '</strong>'));
+ $date = self::grab($meta, 'class="prettydate">', ',');
+
+ //Processes metadata
+ $this->processAuthors($authors);
+ if ($date) {
+ $this->processDate($date);
+ }
+ }
+
+ protected function processDate ($date) {
+ $dateFragments = explode(' ', $date);
+ if (count($dateFragments) == 4) {
+ array_shift($dateFragments); //drops day name
+ }
+ list($this->dd, $this->mm, $this->yyyy) = $dateFragments;
+ }
+
+ protected function processAuthors ($authors) {
+ if ($authors == "Rédaction en ligne") {
+ $this->skipAuthor = true;
+ return;
+ }
+
+ require_once('helpers/namecase.php');
+
+ //Some Le Soir articles use firstname name, others name,firstname.
+ //When there are several authors, ' ;' is the separator.
+ //Authors are in uppercase, so we need to clean case.
+
+ $authors = explode('; ', $authors);
+ $start = true;
+
+ foreach ($authors as $author) {
+ if (strpos($author, ',') !== false) {
+ $name = explode(',', $author, 2);
+ $author = $name[1] . ' ' . $name[0];
+ }
+ $author = name_case($author);
+ if ($start) {
+ $this->author = name_case($author);
+ $start = false;
+ } else {
+ $this->coauthors[] = name_case($author);
}
- }
+ }
}
+ /**
+ * Gets page title
+ */
function get_title () {
if (!$title = $this->meta_tags['og:title']) {
$title = parent::get_title();
- }
+ }
return $title;
}
}
-
-?>

File Metadata

Mime Type
text/x-diff
Expires
Tue, Jan 28, 08:44 (1 d, 2 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2375217
Default Alt Text
(5 KB)

Event Timeline