Page Menu
Home
DevCentral
Search
Configure Global Search
Log In
Files
F7515764
D309.id726.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
7 KB
Referenced Files
None
Subscribers
None
D309.id726.diff
View Options
diff --git a/autoload.php b/autoload.php
--- a/autoload.php
+++ b/autoload.php
@@ -12,6 +12,19 @@
case 'ArticleTemplate': require('templates/wikipedia-fr/Article.php'); return;
case 'LienWebTemplate': require('templates/wikipedia-fr/Lien_web.php'); return;
case 'OuvrageTemplate': require('templates/wikipedia-fr/Ouvrage.php'); return;
+
+ case 'DownloadWithWget': require('pages/DownloadWithWget.php'); return;
+ }
+
+ if (substr($class, -4) === "Page") {
+ if (file_exists("pages/$class.php")) {
+ require "pages/$class.php";
+ return;
+ }
+
+ $file = strtolower(substr($class, 0, -4));
+ require "pages/$file.php";
+ return;
}
}
diff --git a/page.php b/page.php
--- a/page.php
+++ b/page.php
@@ -108,11 +108,11 @@
$this->get_data();
if ($this->data) {
$this->analyse();
- }
+ }
}
function get_data () {
- ini_set('user_agent', USER_AGENT);
+ ini_set('user_agent', USER_AGENT);
$data = file_get_contents($this->url);
if (!$data) {
ini_set('user_agent', USER_AGENT_FALLBACK);
@@ -121,11 +121,13 @@
return;
}
}
- $encoding = mb_detect_encoding($data, "ISO-8859-15, ISO-8859-1, UTF-8, ASCII, auto");
+ $this->encodeData();
+ }
+
+ function encodeData () {
+ $encoding = mb_detect_encoding($this->data, "ISO-8859-15, ISO-8859-1, UTF-8, ASCII, auto");
if ($encoding && $encoding != 'UTF-8') {
- $this->data = Encoding::toUTF8($data);
- } else {
- $this->data = $data;
+ $this->data = Encoding::toUTF8($this->data);
}
}
@@ -146,7 +148,6 @@
$file = strtolower(trim($page[1])) . '.php';
$class = trim($page[1]) . 'Page';
- require("pages/$file");
return new $class($url);
}
}
@@ -327,4 +328,27 @@
unlink($cookie_file);
return $data;
}
+
+ ///
+ /// DATES
+ ///
+
+ function extractYYYYMMDDDateFromURL() {
+ $pattern = "@/([12][0-9]{3})\-([0-9]{2})\-([0-9]{2})/@";
+ if (preg_match($pattern, $this->url, $matches)) {
+ $this->yyyy = $matches[1];
+ $this->mm = $matches[2];
+ $this->dd = $matches[3];
+ }
+ }
+
+ function extractYYYYDDMMateFromURL() {
+ $pattern = "@/([12][0-9]{3})\-([0-9]{2})\-([0-9]{2})/@";
+ if (preg_match($pattern, $this->url, $matches)) {
+ $this->yyyy = $matches[1];
+ $this->mm = $matches[3];
+ $this->dd = $matches[2];
+ }
+ }
+
}
diff --git a/pages/DownloadWithWget.php b/pages/DownloadWithWget.php
new file mode 100644
--- /dev/null
+++ b/pages/DownloadWithWget.php
@@ -0,0 +1,37 @@
+<?php
+
+trait DownloadWithWget {
+
+ /**
+ * @return string
+ */
+ private function getTemporaryFilename () {
+ $dir = sys_get_temp_dir();
+ return tempnam($dir, "http-client-wget-");
+ }
+
+ /**
+ * Gets the content of the specified URL, using wget to download it
+ *
+ * @return string
+ */
+ function getFileContents ($url) {
+ $file = $this->getTemporaryFilename();
+ $url = escapeshellarg($url);
+
+ system("wget -q -O $file $url");
+ $data = file_get_contents($file);
+ unlink($file);
+
+ return $data;
+ }
+
+ /**
+ * Downloads the URL through wget and fill data properties
+ */
+ function get_data () {
+ $this->data = $this->getFileContents($this->url);
+ $this->encodeData();
+ }
+
+}
diff --git a/pages/archiveslesoir.php b/pages/archiveslesoir.php
--- a/pages/archiveslesoir.php
+++ b/pages/archiveslesoir.php
@@ -1,25 +1,16 @@
<?php
-require 'lesoir.php';
-
/**
* Represents a page from the http://archives.lesoir.be/ site.
*/
class ArchivesLeSoirPage extends LeSoirPage {
+
/**
- * Analyses the page and extracts metadata
+ * Determines if this is the archive
+ * @return bool always true
*/
- function analyse ($skipSpecificProcessing = false) {
- parent::analyse(true);
-
- $authors = $this->between('<p class="st_signature">', '</p>');
- $date = trim($this->between('<p class="st_date">', '</p>'));
-
- $this->processAuthors($authors);
- $this->processDate($date);
+ function isArchive () {
+ return true;
}
- function get_title () {
- return $this->between('<h3 class="story_title main">', '</h3>');
- }
}
diff --git a/pages/lesoir.php b/pages/lesoir.php
--- a/pages/lesoir.php
+++ b/pages/lesoir.php
@@ -2,17 +2,30 @@
//Page analysis for www.lesoir.be
class LeSoirPage extends Page {
- function analyse ($skipSpecificProcessing = false) {
+
+ use DownloadWithWget;
+
+ /**
+ * Determines if the article belongs to thearchives
+ * @return bool
+ */
+ function isArchive () {
+ return strpos($this->url, "//www.lesoir.be/archives") !== false;
+ }
+
+ function analyse () {
parent::analyse();
- //Hardcoded known info
$this->site = "[[Le Soir]]";
- //Allows to skip the analyis for ArchivesLeSoirPage
- if ($skipSpecificProcessing) {
- return;
+ if ($this->isArchive()) {
+ $this->analyseForArchive();
+ } else {
+ $this->analyseForMainSite();
}
+ }
+ function analyseForMainSite () {
//Gets metadata
$meta = $this->between('<div class="meta">', '</div>');
$authors = trim(self::grab($meta, '<strong>', '</strong>'));
@@ -25,6 +38,17 @@
}
}
+ function analyseForArchive () {
+ $authors = $this->between('st_signature">', '</p>');
+ $this->processAuthors($authors);
+
+ if ($date = trim($this->between('<p class="st_date">', '</p>'))) {
+ $this->processDate($date);
+ } else {
+ $this->extractYYYYMMDDDateFromURL();
+ }
+ }
+
protected function processDate ($date) {
$dateFragments = explode(' ', $date);
if (count($dateFragments) == 4) {
@@ -67,9 +91,20 @@
* Gets page title
*/
function get_title () {
+ if ($this->isArchive()) {
+ $title = $this->between('<h3 class="story_title main">', '</h3>');
+
+ if ($title === false) {
+ $title = $this->between('<h1>', '</h1>');
+ }
+
+ return $title;
+ }
+
if (!$title = $this->meta_tags['og:title']) {
$title = parent::get_title();
}
+
return $title;
}
}
diff --git a/tests/DownloadWithWgetTest.php b/tests/DownloadWithWgetTest.php
new file mode 100644
--- /dev/null
+++ b/tests/DownloadWithWgetTest.php
@@ -0,0 +1,34 @@
+<?php
+
+require 'pages/DownloadWithWget.php';
+
+class DownloadWithWgetTest extends \PHPUnit_Framework_TestCase {
+
+ /**
+ * The object under test.
+ *
+ * @var object
+ */
+ private $instance;
+
+ /**
+ * Sets up the fixture.
+ *
+ * This method is called before a test is executed.
+ *
+ * @return void
+ */
+ public function setUp () {
+ $this->instance = $this->getObjectForTrait('DownloadWithWget');
+ }
+
+ /**
+ * Tests getFileContents method
+ */
+ public function testGetFileContents () {
+ $this->assertContains(
+ "* <----- vous êtes ici",
+ $this->instance->getFileContents("http://www.perdu.com")
+ );
+ }
+}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Mon, Apr 28, 23:58 (4 h, 20 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2613589
Default Alt Text
D309.id726.diff (7 KB)
Attached To
Mode
D309: Le Soir update
Attached
Detach File
Event Timeline
Log In to Comment