Page Menu
Home
DevCentral
Search
Configure Global Search
Log In
Files
F4060902
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
5 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/pages/archiveslesoir.php b/pages/archiveslesoir.php
new file mode 100644
index 0000000..04e4e51
--- /dev/null
+++ b/pages/archiveslesoir.php
@@ -0,0 +1,25 @@
+<?php
+
+require 'lesoir.php';
+
+/**
+ * Represents a page from the http://archives.lesoir.be/ site.
+ */
+class ArchivesLeSoirPage extends LeSoirPage {
+ /**
+ * Analyses the page and extracts metadata
+ */
+ function analyse ($skipSpecificProcessing = false) {
+ parent::analyse(true);
+
+ $authors = $this->between('<p class="st_signature">', '</p>');
+ $date = trim($this->between('<p class="st_date">', '</p>'));
+
+ $this->processAuthors($authors);
+ $this->processDate($date);
+ }
+
+ function get_title () {
+ return $this->between('<h3 class="story_title main">', '</h3>');
+ }
+}
diff --git a/pages/index.dat b/pages/index.dat
index 443005f..72ba763 100644
--- a/pages/index.dat
+++ b/pages/index.dat
@@ -1,13 +1,13 @@
-http://archives.lesoir.be/ LeSoir
+http://archives.lesoir.be/ ArchivesLeSoir
http://www.chroniquesautomatiques.com/ LesChroniquesAutomatiques
http://www.erudit.org/ Erudit
http://www.jstor.org/ JSTOR
http://www.lalibre.be/ LaLibreBelgique
http://www.lefigaro.fr/ LeFigaro
http://www.lemonde.fr/ LeMonde
http://www.lesoir.be/ LeSoir
http://www.nytimes.com/ NewYorkTimes
http://www.persee.fr/ Persee
http://www.rue89.com/ Rue89
http://www.tandfonline.com TaylorAndFrancis
http://www.worldcat.org/ WorldCat
diff --git a/pages/lesoir.php b/pages/lesoir.php
index 3cb00e7..e2364da 100644
--- a/pages/lesoir.php
+++ b/pages/lesoir.php
@@ -1,60 +1,75 @@
<?php
//Page analysis for www.lesoir.be
class LeSoirPage extends Page {
- function analyse () {
+ function analyse ($skipSpecificProcessing = false) {
parent::analyse();
//Hardcoded known info
- $this->site = "Le Soir";
- $this->skipYMD = true;
-
- //Gets date
- //meta tag 'archi_id' has t-YYYYMMDD-HHMMhh as format (where hh = AM/PM)
- // e.g. t-20120722-0211PM
- $yyyy = substr($this->meta_tags['archi_id'], 2, 4);
- $mm = substr($this->meta_tags['archi_id'], 6, 2);
- $dd = substr($this->meta_tags['archi_id'], 8, 2);
- $this->unixtime = mktime(0, 0, 0, $mm, $dd, $yyyy);
- $this->date = strftime(LONG_DATE_FORMAT, $this->unixtime);
-
- //Gets author
- $authors = self::between('st_signature">', '</p>');
-
- if ($authors == "Rédaction en ligne") {
- $this->skipAuthor = true;
- } else {
- require_once('helpers/namecase.php');
-
- //Some Le Soir articles use firstname name, others name,firstname.
- //When there are several authors, ' ;' is the separator.
- //Authors are in uppercase, so we need to clean case.
-
- $authors = explode('; ', $authors);
- $start = true;
-
- foreach ($authors as $author) {
- if (strpos($author, ',') !== false) {
- $name = explode(',', $author, 2);
- $author = $name[1] . ' ' . $name[0];
- }
- $author = name_case($author);
- if ($start) {
- $this->author = name_case($author);
- $start = false;
- } else {
- $this->coauthors[] = name_case($author);
- }
+ $this->site = "[[Le Soir]]";
+
+ //Allows to skip the analyis for ArchivesLeSoirPage
+ if ($skipSpecificProcessing) {
+ return;
+ }
+
+ //Gets metadata
+ $meta = $this->between('<div class="meta">', '</div>');
+ $authors = trim(self::grab($meta, '<strong>', '</strong>'));
+ $date = self::grab($meta, 'class="prettydate">', ',');
+
+ //Processes metadata
+ $this->processAuthors($authors);
+ if ($date) {
+ $this->processDate($date);
+ }
+ }
+
+ protected function processDate ($date) {
+ $dateFragments = explode(' ', $date);
+ if (count($dateFragments) == 4) {
+ array_shift($dateFragments); //drops day name
+ }
+ list($this->dd, $this->mm, $this->yyyy) = $dateFragments;
+ }
+
+ protected function processAuthors ($authors) {
+ if ($authors == "Rédaction en ligne") {
+ $this->skipAuthor = true;
+ return;
+ }
+
+ require_once('helpers/namecase.php');
+
+ //Some Le Soir articles use firstname name, others name,firstname.
+ //When there are several authors, ' ;' is the separator.
+ //Authors are in uppercase, so we need to clean case.
+
+ $authors = explode('; ', $authors);
+ $start = true;
+
+ foreach ($authors as $author) {
+ if (strpos($author, ',') !== false) {
+ $name = explode(',', $author, 2);
+ $author = $name[1] . ' ' . $name[0];
+ }
+ $author = name_case($author);
+ if ($start) {
+ $this->author = name_case($author);
+ $start = false;
+ } else {
+ $this->coauthors[] = name_case($author);
}
- }
+ }
}
+ /**
+ * Gets page title
+ */
function get_title () {
if (!$title = $this->meta_tags['og:title']) {
$title = parent::get_title();
- }
+ }
return $title;
}
}
-
-?>
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Tue, Jan 28, 08:44 (1 d, 2 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2375217
Default Alt Text
(5 KB)
Attached To
Mode
rSTG Source templates generator
Attached
Detach File
Event Timeline
Log In to Comment