Page Menu
Home
DevCentral
Search
Configure Global Search
Log In
Files
F3769202
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
15 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/pages/jstor.php b/pages/jstor.php
index e29ecb6..cfc9d85 100644
--- a/pages/jstor.php
+++ b/pages/jstor.php
@@ -1,49 +1,49 @@
<?php
//Page analysis for www.jsor.rog
class JSTORPage extends Page {
/**
* Initializes a new JSTORPage instance. If an error occured, you can read it in $this->error.
*
* @param string $url the page URL
*/
function __construct ($url) {
$this->url = $url;
- $this->data = self::curl_download($url);
+ $this->data = self::curl_download($url);
$this->analyse();
}
function get_title () {
return self::between('<div class="mainCite jnlOverride"><div class="hd title">', '</div>');
}
function analyse () {
parent::analyse();
//From HTML code
$this->author = trim(self::between('<div class="author">', '</div>'));
$this->journal = trim(self::between('<h2>', "\n"));
$this->issn = self::between('<div class="issn">ISSN: ', '</div>');
$this->url = self::between('<div class="stable">Article Stable URL: ', '</div>');
- //Publisher
+ //Publisher
$pub = self::between('<div class="pubString">Published by: ', '</div>');
- $this->publisher = $pub ? self::grab($pub, '>', '</a>') : 'JSTOR';
+ $this->publisher = $pub ? self::grab($pub, '>', '</a>') : 'JSTOR';
//Issue information
$srcInfo = trim(self::between('<!-- Formatting requires these tags be mashed together -->', '</div>'));
$this->volume = self::grab($srcInfo, "Vol. ", ",");
$this->issue = self::grab($srcInfo, "No. ", " ");
- $this->yyyy = self::grab($srcInfo, '(', ')');
+ $this->yyyy = self::grab($srcInfo, '(', ')');
$pos = strpos($srcInfo, "pp. ");
- $this->pages = substr($srcInfo, $pos + 4);
+ $this->pages = substr($srcInfo, $pos + 4);
}
function is_article () {
return true;
}
}
diff --git a/pages/lalibrebelgique.php b/pages/lalibrebelgique.php
index 7f28bfc..dc9b655 100644
--- a/pages/lalibrebelgique.php
+++ b/pages/lalibrebelgique.php
@@ -1,53 +1,53 @@
<?php
//Page analysis for www.lalibre.be
class LaLibreBelgiquePage extends Page {
function analyse () {
//La Libre uses ISO-8859-1 and not UTF-8
$this->data = iconv('iso-8859-1', 'utf-8', $this->data);
- //Calls parent analyzer
+ //Calls parent analyzer
parent::analyse();
//Hardcoded known info
$this->site = "La Libre Belgique";
$this->skipYMD = true;
//Gets date
$date = trim(self::between('Mis en ligne le ', '</p>'));
$yyyy = substr($date, 6, 4);
$mm = substr($date, 3, 2);
$dd = substr($date, 0, 2);
- $this->unixtime = mktime(12, 0, 0, $mm, $dd, $yyyy);
+ $this->unixtime = mktime(12, 0, 0, $mm, $dd, $yyyy);
$this->date = strftime(LONG_DATE_FORMAT, $this->unixtime);
- //Gets authors
+ //Gets authors
$authors = trim(self::between('<p id="writer">', '</p>'));
- if (strpos($authors, 'daction ') > 0) {
+ if (strpos($authors, 'daction ') > 0) {
//"rédaction en ligne", "Rédaction web","Rédaction en ligne (avec afp)", etc.
//(they're not coherent about case).
- $this->skipAuthor = true;
- } else {
+ $this->skipAuthor = true;
+ } else {
$authors = preg_split('/( et |, )/', $authors);
$start = true;
foreach ($authors as $author) {
//Fixes some authors
switch ($author) {
- case 'G. Dt': $author = 'Guy Duplat'; break;
- case 'afp': $author = 'AFP'; break;
+ case 'G. Dt': $author = 'Guy Duplat'; break;
+ case 'afp': $author = 'AFP'; break;
}
if ($start) {
$this->author = $author;
$start = false;
} else {
$this->coauthors[] = $author;
}
}
- }
+ }
//Gets title
if (!$this->title = $this->meta_tags['og:title']) {
$this->title = self::between("<title>Lalibre.be - ", "</title>");
}
}
}
diff --git a/pages/lefigaro.php b/pages/lefigaro.php
index 55f8382..0bb4974 100644
--- a/pages/lefigaro.php
+++ b/pages/lefigaro.php
@@ -1,29 +1,29 @@
<?php
class LeFigaroPage extends Page {
function analyse () {
parent::analyse();
//Hardcoded known info
$this->site = "Le Figaro";
$this->skipYMD = true;
$this->issn = '0182-5852';
//Gets date
//e.g. http://www.lefigaro.fr/actualite-france/2013/05/24/01016-20130524ARTFIG00438-frigide-barjot-ne-pense-pas-manifester-dimanche.php
- $pos = strpos($this->url, "/20") + 1;
+ $pos = strpos($this->url, "/20") + 1;
$yyyy = substr($this->url, $pos, 4);
$mm = substr($this->url, $pos + 5, 2);
$dd = substr($this->url, $pos + 8, 2);
$this->unixtime = mktime(0, 0, 0, $mm, $dd, $yyyy);
$this->date = strftime(LONG_DATE_FORMAT, $this->unixtime);
- //Gets author
- //e.g. <span itemprop="author" class="auteur txt12_120">Stéphanie Le Bars</span>
- //e.g. <a itemprop="name" href="#auteur" class="fig-anchor fig-picto-journaliste-haut">Stéphane Kovacs</a>
- //TODO: ensure no article has more than one author
- $author = self::between('itemprop="name"', '</');
- $pos = strpos($author, '">') + 2;
- $this->author = substr($author, $pos);
+ //Gets author
+ //e.g. <span itemprop="author" class="auteur txt12_120">Stéphanie Le Bars</span>
+ //e.g. <a itemprop="name" href="#auteur" class="fig-anchor fig-picto-journaliste-haut">Stéphane Kovacs</a>
+ //TODO: ensure no article has more than one author
+ $author = self::between('itemprop="name"', '</');
+ $pos = strpos($author, '">') + 2;
+ $this->author = substr($author, $pos);
}
}
diff --git a/pages/lemonde.php b/pages/lemonde.php
index 74fbbe2..bf5e13d 100644
--- a/pages/lemonde.php
+++ b/pages/lemonde.php
@@ -1,28 +1,28 @@
<?php
class LeMondePage extends Page {
function analyse () {
parent::analyse();
//Hardcoded known info
$this->site = "Le Monde";
$this->skipYMD = true;
$this->issn = '1950-6244';
//Gets date
// e.g. http://www.lemonde.fr/ameriques/article/2013/05/25/le-bresil-annule-la-dette-de-douze-pays-africains_3417518_3222.html
- $pos = strpos($this->url, "/article/");
+ $pos = strpos($this->url, "/article/");
$yyyy = substr($this->url, $pos + 9, 4);
$mm = substr($this->url, $pos + 14, 2);
$dd = substr($this->url, $pos + 17, 2);
$this->unixtime = mktime(0, 0, 0, $mm, $dd, $yyyy);
$this->date = strftime(LONG_DATE_FORMAT, $this->unixtime);
- //Gets author
- //e.g. <span itemprop="author" class="auteur txt12_120">Stéphanie Le Bars</span>
- //TODO: ensure no article has more than one author
- $author = self::between('itemprop="author"', '</');
- $pos = strpos($author, '">') + 2;
- $this->author = substr($author, $pos);
+ //Gets author
+ //e.g. <span itemprop="author" class="auteur txt12_120">Stéphanie Le Bars</span>
+ //TODO: ensure no article has more than one author
+ $author = self::between('itemprop="author"', '</');
+ $pos = strpos($author, '">') + 2;
+ $this->author = substr($author, $pos);
}
}
diff --git a/pages/leschroniquesautomatiques.php b/pages/leschroniquesautomatiques.php
index 3409324..7362dc3 100644
--- a/pages/leschroniquesautomatiques.php
+++ b/pages/leschroniquesautomatiques.php
@@ -1,27 +1,27 @@
<?php
//Page analysis for www.chroniquesautomatiques.com
class LesChroniquesAutomatiquesPage extends Page {
function analyse () {
parent::analyse();
//Hardcoded known info
$this->site = "Les Chroniques Automatiques";
$this->author = "Dat’";
$this->skipYMD = true;
//Gets date
$old_tz = date_default_timezone_get();
date_default_timezone_set('Europe/Paris');
- $date = date_parse(trim(self::between('This entry was posted on', 'and is filed under')));
+ $date = date_parse(trim(self::between('This entry was posted on', 'and is filed under')));
$this->unixtime = mktime($date['hour'], $date['minute'], $date['second'], $date['month'], $date['day'], $date['year']);
$this->date = strftime(LONG_DATE_FORMAT, $this->unixtime);
- $new_tz = date_default_timezone_set($old_tz);
+ $new_tz = date_default_timezone_set($old_tz);
}
function get_title () {
$title = parent::get_title();
- $pos = strpos($title, ' »');
+ $pos = strpos($title, ' »');
return substr($title, 0, $pos);
}
}
diff --git a/pages/newyorktimes.php b/pages/newyorktimes.php
index 34a0c31..c030bfe 100644
--- a/pages/newyorktimes.php
+++ b/pages/newyorktimes.php
@@ -1,25 +1,25 @@
<?php
//Page analysis for www.nytimes.com
class NewYorkTimesPage extends Page {
function analyse () {
parent::analyse();
//Hardcoded known info
$this->site = "New York Times";
$this->skipYMD = true;
//Gets date from pdate metatag
$yyyy = substr($this->meta_tags['pdate'], 0, 4);
$mm = substr($this->meta_tags['pdate'], 4, 2);
$dd = substr($this->meta_tags['pdate'], 6, 2);
$this->unixtime = mktime(0, 0, 0, $mm, $dd, $yyyy);
$this->date = strftime(LONG_DATE_FORMAT, $this->unixtime);
//Gets author
//TODO: Handle the several authors case
- require('helpers/namecase.php');
+ require('helpers/namecase.php');
$author = substr($this->meta_tags['byl'], 3);
$this->author = name_case($author);
}
}
diff --git a/pages/persee.php b/pages/persee.php
index cbc49ad..8223bc3 100644
--- a/pages/persee.php
+++ b/pages/persee.php
@@ -1,45 +1,44 @@
<?php
//Page analysis for www.persee.org
class PerseePage extends Page {
/**
* Initializes a new JSTORPage instance. If an error occured, you can read it in $this->error.
*
* @param string $url the page URL
*/
function __construct ($url) {
$this->url = $url;
- $this->data = self::curl_download($url, USER_AGENT_FALLBACK_FULL);
+ $this->data = self::curl_download($url, USER_AGENT_FALLBACK_FULL);
$this->analyse();
}
function analyse () {
parent::analyse();
$this->publisher = 'Persée';
}
function get_all_meta_tags () {
$metaTags = parent::get_all_meta_tags();
//Round 2, as persee.fr uses <meta content="..." name="...">
preg_match_all('/<[\s]*meta[\s]*\bcontent\b="?' . '([^>"]*)"?[\s]*' . 'name="?([^>"]*)"?[\s]*[\/]?[\s]*>/si', $this->data, $match);
if (isset($match) && is_array($match) && count($match) == 3) {
$originals = $match[0];
$names = $match[2];
$values = $match[1];
if (count($originals) == count($names) && count($names) == count($values)) {
for ($i=0, $limiti = count($names) ; $i < $limiti ; $i++) {
$metaTags[$names[$i]] = $values[$i];
}
}
}
return $metaTags;
}
function is_article () {
return true;
}
-
}
diff --git a/pages/rue89.php b/pages/rue89.php
index 5377cb0..521726a 100644
--- a/pages/rue89.php
+++ b/pages/rue89.php
@@ -1,33 +1,32 @@
<?php
//Page analysis for www.rue89.com
class Rue89Page extends Page {
function analyse () {
parent::analyse();
//Hardcoded known info
$this->site = "Rue 89";
- $this->skipYMD = true;
$this->issn = '1958-5837';
//Gets date
// http://www.rue89.com/2011/02/26/
$yyyy = substr($this->url, 21, 4);
$mm = substr($this->url, 26, 2);
$dd = substr($this->url, 29, 2);
$this->unixtime = mktime(0, 0, 0, $mm, $dd, $yyyy);
$this->date = strftime(LONG_DATE_FORMAT, $this->unixtime);
- //Gets author
- //TODO: ensure no article has more than one author
+ //Gets author
+ //TODO: ensure no article has more than one author
$pos1 = strpos($this->data, '<div class="authors">');
$pos1 = strpos($this->data, 'class="author">', $pos1) + 15;
$pos2 = strpos($this->data, '/a>', $pos1) - 1;
$this->author = substr($this->data, $pos1, $pos2 - $pos1);
}
function get_title () {
- //Article title is the meta tag name, and not the page title
+ //Article title is the meta tag name, and not the page title
return $this->meta_tags['name'];
}
}
diff --git a/pages/taylorandfrancis.php b/pages/taylorandfrancis.php
index b8c6f73..6b92d90 100644
--- a/pages/taylorandfrancis.php
+++ b/pages/taylorandfrancis.php
@@ -1,55 +1,55 @@
<?php
//Page analysis for www.tandfonline.com
class TaylorAndFrancisPage extends Page {
/**
* Initializes a new TaylorAndFrancisPage instance. If an error occured, you can read it in $this->error.
*
* @param string $url the page URL
*/
function __construct ($url) {
$this->url = $url;
- $this->data = self::curl_download($url);
+ $this->data = self::curl_download($url);
$this->analyse();
}
function analyse () {
parent::analyse();
- $this->publisher = 'Taylor & Francis';
+ $this->publisher = 'Taylor & Francis';
- //DOI
- $this->doi = self::between('meta name="dc.Identifier" scheme="doi" content="', '"');
+ //DOI
+ $this->doi = self::between('meta name="dc.Identifier" scheme="doi" content="', '"');
- //Gets the right dc.Identifier (coden scheme)
- //Expected format: <Issue name>, Vol. <Issue volume>, No. <Issue number>, <Issue date>, pp. <article pages>
- //e.g. Annals of Science, Vol. 68, No. 3, July 2011, pp. 325–350
+ //Gets the right dc.Identifier (coden scheme)
+ //Expected format: <Issue name>, Vol. <Issue volume>, No. <Issue number>, <Issue date>, pp. <article pages>
+ //e.g. Annals of Science, Vol. 68, No. 3, July 2011, pp. 325–350
$identifier = self::between('meta name="dc.Identifier" scheme="coden" content="', '"');
- $identifier_data = explode(', ', $identifier);
+ $identifier_data = explode(', ', $identifier);
$pos = strpos($identifier, ", Vol. ");
$this->journal = substr($identifier, 0, $pos);
$this->volume = self::grab($identifier, "Vol. ", ",");
$this->issue = self::grab($identifier, "No. ", ",");
- $date = explode(' ', $identifier_data[3]);
+ $date = explode(' ', $identifier_data[3]);
$this->yyyy = array_pop($date);
$pos = strpos($identifier, "pp. ");
- $this->pages = substr($identifier, $pos + 4);
-
- //Author
- //TODO: handle several authors
- $author = trim(self::getMetaTag($this->meta_tags, 'dc.Creator'));
- $names = explode(' ', $author);
- if (count($names) == 2) {
- $this->author = "$names[1], $names[0]";
- } else {
- $this->author = $author;
- }
+ $this->pages = substr($identifier, $pos + 4);
+
+ //Author
+ //TODO: handle several authors
+ $author = trim(self::getMetaTag($this->meta_tags, 'dc.Creator'));
+ $names = explode(' ', $author);
+ if (count($names) == 2) {
+ $this->author = "$names[1], $names[0]";
+ } else {
+ $this->author = $author;
+ }
}
function is_article () {
return true;
}
}
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Mon, Nov 25, 13:34 (1 d, 9 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2260259
Default Alt Text
(15 KB)
Attached To
Mode
rSTG Source templates generator
Attached
Detach File
Event Timeline
Log In to Comment