Page MenuHomeDevCentral

No OneTemporary

diff --git a/pages/jstor.php b/pages/jstor.php
index e29ecb6..cfc9d85 100644
--- a/pages/jstor.php
+++ b/pages/jstor.php
@@ -1,49 +1,49 @@
<?php
//Page analysis for www.jsor.rog
class JSTORPage extends Page {
/**
* Initializes a new JSTORPage instance. If an error occured, you can read it in $this->error.
*
* @param string $url the page URL
*/
function __construct ($url) {
$this->url = $url;
- $this->data = self::curl_download($url);
+ $this->data = self::curl_download($url);
$this->analyse();
}
function get_title () {
return self::between('<div class="mainCite jnlOverride"><div class="hd title">', '</div>');
}
function analyse () {
parent::analyse();
//From HTML code
$this->author = trim(self::between('<div class="author">', '</div>'));
$this->journal = trim(self::between('<h2>', "\n"));
$this->issn = self::between('<div class="issn">ISSN: ', '</div>');
$this->url = self::between('<div class="stable">Article Stable URL: ', '</div>');
- //Publisher
+ //Publisher
$pub = self::between('<div class="pubString">Published by: ', '</div>');
- $this->publisher = $pub ? self::grab($pub, '>', '</a>') : 'JSTOR';
+ $this->publisher = $pub ? self::grab($pub, '>', '</a>') : 'JSTOR';
//Issue information
$srcInfo = trim(self::between('<!-- Formatting requires these tags be mashed together -->', '</div>'));
$this->volume = self::grab($srcInfo, "Vol. ", ",");
$this->issue = self::grab($srcInfo, "No. ", " ");
- $this->yyyy = self::grab($srcInfo, '(', ')');
+ $this->yyyy = self::grab($srcInfo, '(', ')');
$pos = strpos($srcInfo, "pp. ");
- $this->pages = substr($srcInfo, $pos + 4);
+ $this->pages = substr($srcInfo, $pos + 4);
}
function is_article () {
return true;
}
}
diff --git a/pages/lalibrebelgique.php b/pages/lalibrebelgique.php
index 7f28bfc..dc9b655 100644
--- a/pages/lalibrebelgique.php
+++ b/pages/lalibrebelgique.php
@@ -1,53 +1,53 @@
<?php
//Page analysis for www.lalibre.be
class LaLibreBelgiquePage extends Page {
function analyse () {
//La Libre uses ISO-8859-1 and not UTF-8
$this->data = iconv('iso-8859-1', 'utf-8', $this->data);
- //Calls parent analyzer
+ //Calls parent analyzer
parent::analyse();
//Hardcoded known info
$this->site = "La Libre Belgique";
$this->skipYMD = true;
//Gets date
$date = trim(self::between('Mis en ligne le ', '</p>'));
$yyyy = substr($date, 6, 4);
$mm = substr($date, 3, 2);
$dd = substr($date, 0, 2);
- $this->unixtime = mktime(12, 0, 0, $mm, $dd, $yyyy);
+ $this->unixtime = mktime(12, 0, 0, $mm, $dd, $yyyy);
$this->date = strftime(LONG_DATE_FORMAT, $this->unixtime);
- //Gets authors
+ //Gets authors
$authors = trim(self::between('<p id="writer">', '</p>'));
- if (strpos($authors, 'daction ') > 0) {
+ if (strpos($authors, 'daction ') > 0) {
//"rédaction en ligne", "Rédaction web","Rédaction en ligne (avec afp)", etc.
//(they're not coherent about case).
- $this->skipAuthor = true;
- } else {
+ $this->skipAuthor = true;
+ } else {
$authors = preg_split('/( et |, )/', $authors);
$start = true;
foreach ($authors as $author) {
//Fixes some authors
switch ($author) {
- case 'G. Dt': $author = 'Guy Duplat'; break;
- case 'afp': $author = 'AFP'; break;
+ case 'G. Dt': $author = 'Guy Duplat'; break;
+ case 'afp': $author = 'AFP'; break;
}
if ($start) {
$this->author = $author;
$start = false;
} else {
$this->coauthors[] = $author;
}
}
- }
+ }
//Gets title
if (!$this->title = $this->meta_tags['og:title']) {
$this->title = self::between("<title>Lalibre.be - ", "</title>");
}
}
}
diff --git a/pages/lefigaro.php b/pages/lefigaro.php
index 55f8382..0bb4974 100644
--- a/pages/lefigaro.php
+++ b/pages/lefigaro.php
@@ -1,29 +1,29 @@
<?php
class LeFigaroPage extends Page {
function analyse () {
parent::analyse();
//Hardcoded known info
$this->site = "Le Figaro";
$this->skipYMD = true;
$this->issn = '0182-5852';
//Gets date
//e.g. http://www.lefigaro.fr/actualite-france/2013/05/24/01016-20130524ARTFIG00438-frigide-barjot-ne-pense-pas-manifester-dimanche.php
- $pos = strpos($this->url, "/20") + 1;
+ $pos = strpos($this->url, "/20") + 1;
$yyyy = substr($this->url, $pos, 4);
$mm = substr($this->url, $pos + 5, 2);
$dd = substr($this->url, $pos + 8, 2);
$this->unixtime = mktime(0, 0, 0, $mm, $dd, $yyyy);
$this->date = strftime(LONG_DATE_FORMAT, $this->unixtime);
- //Gets author
- //e.g. <span itemprop="author" class="auteur txt12_120">Stéphanie Le Bars</span>
- //e.g. <a itemprop="name" href="#auteur" class="fig-anchor fig-picto-journaliste-haut">Stéphane Kovacs</a>
- //TODO: ensure no article has more than one author
- $author = self::between('itemprop="name"', '</');
- $pos = strpos($author, '">') + 2;
- $this->author = substr($author, $pos);
+ //Gets author
+ //e.g. <span itemprop="author" class="auteur txt12_120">Stéphanie Le Bars</span>
+ //e.g. <a itemprop="name" href="#auteur" class="fig-anchor fig-picto-journaliste-haut">Stéphane Kovacs</a>
+ //TODO: ensure no article has more than one author
+ $author = self::between('itemprop="name"', '</');
+ $pos = strpos($author, '">') + 2;
+ $this->author = substr($author, $pos);
}
}
diff --git a/pages/lemonde.php b/pages/lemonde.php
index 74fbbe2..bf5e13d 100644
--- a/pages/lemonde.php
+++ b/pages/lemonde.php
@@ -1,28 +1,28 @@
<?php
class LeMondePage extends Page {
function analyse () {
parent::analyse();
//Hardcoded known info
$this->site = "Le Monde";
$this->skipYMD = true;
$this->issn = '1950-6244';
//Gets date
// e.g. http://www.lemonde.fr/ameriques/article/2013/05/25/le-bresil-annule-la-dette-de-douze-pays-africains_3417518_3222.html
- $pos = strpos($this->url, "/article/");
+ $pos = strpos($this->url, "/article/");
$yyyy = substr($this->url, $pos + 9, 4);
$mm = substr($this->url, $pos + 14, 2);
$dd = substr($this->url, $pos + 17, 2);
$this->unixtime = mktime(0, 0, 0, $mm, $dd, $yyyy);
$this->date = strftime(LONG_DATE_FORMAT, $this->unixtime);
- //Gets author
- //e.g. <span itemprop="author" class="auteur txt12_120">Stéphanie Le Bars</span>
- //TODO: ensure no article has more than one author
- $author = self::between('itemprop="author"', '</');
- $pos = strpos($author, '">') + 2;
- $this->author = substr($author, $pos);
+ //Gets author
+ //e.g. <span itemprop="author" class="auteur txt12_120">Stéphanie Le Bars</span>
+ //TODO: ensure no article has more than one author
+ $author = self::between('itemprop="author"', '</');
+ $pos = strpos($author, '">') + 2;
+ $this->author = substr($author, $pos);
}
}
diff --git a/pages/leschroniquesautomatiques.php b/pages/leschroniquesautomatiques.php
index 3409324..7362dc3 100644
--- a/pages/leschroniquesautomatiques.php
+++ b/pages/leschroniquesautomatiques.php
@@ -1,27 +1,27 @@
<?php
//Page analysis for www.chroniquesautomatiques.com
class LesChroniquesAutomatiquesPage extends Page {
function analyse () {
parent::analyse();
//Hardcoded known info
$this->site = "Les Chroniques Automatiques";
$this->author = "Dat’";
$this->skipYMD = true;
//Gets date
$old_tz = date_default_timezone_get();
date_default_timezone_set('Europe/Paris');
- $date = date_parse(trim(self::between('This entry was posted on', 'and is filed under')));
+ $date = date_parse(trim(self::between('This entry was posted on', 'and is filed under')));
$this->unixtime = mktime($date['hour'], $date['minute'], $date['second'], $date['month'], $date['day'], $date['year']);
$this->date = strftime(LONG_DATE_FORMAT, $this->unixtime);
- $new_tz = date_default_timezone_set($old_tz);
+ $new_tz = date_default_timezone_set($old_tz);
}
function get_title () {
$title = parent::get_title();
- $pos = strpos($title, ' &raquo;');
+ $pos = strpos($title, ' &raquo;');
return substr($title, 0, $pos);
}
}
diff --git a/pages/newyorktimes.php b/pages/newyorktimes.php
index 34a0c31..c030bfe 100644
--- a/pages/newyorktimes.php
+++ b/pages/newyorktimes.php
@@ -1,25 +1,25 @@
<?php
//Page analysis for www.nytimes.com
class NewYorkTimesPage extends Page {
function analyse () {
parent::analyse();
//Hardcoded known info
$this->site = "New York Times";
$this->skipYMD = true;
//Gets date from pdate metatag
$yyyy = substr($this->meta_tags['pdate'], 0, 4);
$mm = substr($this->meta_tags['pdate'], 4, 2);
$dd = substr($this->meta_tags['pdate'], 6, 2);
$this->unixtime = mktime(0, 0, 0, $mm, $dd, $yyyy);
$this->date = strftime(LONG_DATE_FORMAT, $this->unixtime);
//Gets author
//TODO: Handle the several authors case
- require('helpers/namecase.php');
+ require('helpers/namecase.php');
$author = substr($this->meta_tags['byl'], 3);
$this->author = name_case($author);
}
}
diff --git a/pages/persee.php b/pages/persee.php
index cbc49ad..8223bc3 100644
--- a/pages/persee.php
+++ b/pages/persee.php
@@ -1,45 +1,44 @@
<?php
//Page analysis for www.persee.org
class PerseePage extends Page {
/**
* Initializes a new JSTORPage instance. If an error occured, you can read it in $this->error.
*
* @param string $url the page URL
*/
function __construct ($url) {
$this->url = $url;
- $this->data = self::curl_download($url, USER_AGENT_FALLBACK_FULL);
+ $this->data = self::curl_download($url, USER_AGENT_FALLBACK_FULL);
$this->analyse();
}
function analyse () {
parent::analyse();
$this->publisher = 'Persée';
}
function get_all_meta_tags () {
$metaTags = parent::get_all_meta_tags();
//Round 2, as persee.fr uses <meta content="..." name="...">
preg_match_all('/<[\s]*meta[\s]*\bcontent\b="?' . '([^>"]*)"?[\s]*' . 'name="?([^>"]*)"?[\s]*[\/]?[\s]*>/si', $this->data, $match);
if (isset($match) && is_array($match) && count($match) == 3) {
$originals = $match[0];
$names = $match[2];
$values = $match[1];
if (count($originals) == count($names) && count($names) == count($values)) {
for ($i=0, $limiti = count($names) ; $i < $limiti ; $i++) {
$metaTags[$names[$i]] = $values[$i];
}
}
}
return $metaTags;
}
function is_article () {
return true;
}
-
}
diff --git a/pages/rue89.php b/pages/rue89.php
index 5377cb0..521726a 100644
--- a/pages/rue89.php
+++ b/pages/rue89.php
@@ -1,33 +1,32 @@
<?php
//Page analysis for www.rue89.com
class Rue89Page extends Page {
function analyse () {
parent::analyse();
//Hardcoded known info
$this->site = "Rue 89";
- $this->skipYMD = true;
$this->issn = '1958-5837';
//Gets date
// http://www.rue89.com/2011/02/26/
$yyyy = substr($this->url, 21, 4);
$mm = substr($this->url, 26, 2);
$dd = substr($this->url, 29, 2);
$this->unixtime = mktime(0, 0, 0, $mm, $dd, $yyyy);
$this->date = strftime(LONG_DATE_FORMAT, $this->unixtime);
- //Gets author
- //TODO: ensure no article has more than one author
+ //Gets author
+ //TODO: ensure no article has more than one author
$pos1 = strpos($this->data, '<div class="authors">');
$pos1 = strpos($this->data, 'class="author">', $pos1) + 15;
$pos2 = strpos($this->data, '/a>', $pos1) - 1;
$this->author = substr($this->data, $pos1, $pos2 - $pos1);
}
function get_title () {
- //Article title is the meta tag name, and not the page title
+ //Article title is the meta tag name, and not the page title
return $this->meta_tags['name'];
}
}
diff --git a/pages/taylorandfrancis.php b/pages/taylorandfrancis.php
index b8c6f73..6b92d90 100644
--- a/pages/taylorandfrancis.php
+++ b/pages/taylorandfrancis.php
@@ -1,55 +1,55 @@
<?php
//Page analysis for www.tandfonline.com
class TaylorAndFrancisPage extends Page {
/**
* Initializes a new TaylorAndFrancisPage instance. If an error occured, you can read it in $this->error.
*
* @param string $url the page URL
*/
function __construct ($url) {
$this->url = $url;
- $this->data = self::curl_download($url);
+ $this->data = self::curl_download($url);
$this->analyse();
}
function analyse () {
parent::analyse();
- $this->publisher = 'Taylor & Francis';
+ $this->publisher = 'Taylor & Francis';
- //DOI
- $this->doi = self::between('meta name="dc.Identifier" scheme="doi" content="', '"');
+ //DOI
+ $this->doi = self::between('meta name="dc.Identifier" scheme="doi" content="', '"');
- //Gets the right dc.Identifier (coden scheme)
- //Expected format: <Issue name>, Vol. <Issue volume>, No. <Issue number>, <Issue date>, pp. <article pages>
- //e.g. Annals of Science, Vol. 68, No. 3, July 2011, pp. 325–350
+ //Gets the right dc.Identifier (coden scheme)
+ //Expected format: <Issue name>, Vol. <Issue volume>, No. <Issue number>, <Issue date>, pp. <article pages>
+ //e.g. Annals of Science, Vol. 68, No. 3, July 2011, pp. 325–350
$identifier = self::between('meta name="dc.Identifier" scheme="coden" content="', '"');
- $identifier_data = explode(', ', $identifier);
+ $identifier_data = explode(', ', $identifier);
$pos = strpos($identifier, ", Vol. ");
$this->journal = substr($identifier, 0, $pos);
$this->volume = self::grab($identifier, "Vol. ", ",");
$this->issue = self::grab($identifier, "No. ", ",");
- $date = explode(' ', $identifier_data[3]);
+ $date = explode(' ', $identifier_data[3]);
$this->yyyy = array_pop($date);
$pos = strpos($identifier, "pp. ");
- $this->pages = substr($identifier, $pos + 4);
-
- //Author
- //TODO: handle several authors
- $author = trim(self::getMetaTag($this->meta_tags, 'dc.Creator'));
- $names = explode(' ', $author);
- if (count($names) == 2) {
- $this->author = "$names[1], $names[0]";
- } else {
- $this->author = $author;
- }
+ $this->pages = substr($identifier, $pos + 4);
+
+ //Author
+ //TODO: handle several authors
+ $author = trim(self::getMetaTag($this->meta_tags, 'dc.Creator'));
+ $names = explode(' ', $author);
+ if (count($names) == 2) {
+ $this->author = "$names[1], $names[0]";
+ } else {
+ $this->author = $author;
+ }
}
function is_article () {
return true;
}
}

File Metadata

Mime Type
text/x-diff
Expires
Mon, Nov 25, 13:34 (1 d, 9 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2260259
Default Alt Text
(15 KB)

Event Timeline