Page Menu
Home
DevCentral
Search
Configure Global Search
Log In
Files
F11722922
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
13 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/pages/erudit.php b/pages/erudit.php
index 4e733bb..3094fea 100644
--- a/pages/erudit.php
+++ b/pages/erudit.php
@@ -1,15 +1,13 @@
<?php
//Page analysis for www.erudit.org
class EruditPage extends Page {
function analyse () {
parent::analyse();
//Fixing citation_doi metatag bug
//<meta name="citation_doi" content=" 10.7202/012719ar" />
$doi = str_replace(' ', '', $this->meta_tags['citation_doi']);
$this->meta_tags['citation_doi'] = trim($doi);
}
}
-
-?>
diff --git a/pages/jstor.php b/pages/jstor.php
index e0d3ed0..e29ecb6 100644
--- a/pages/jstor.php
+++ b/pages/jstor.php
@@ -1,51 +1,49 @@
<?php
//Page analysis for www.jsor.rog
class JSTORPage extends Page {
/**
* Initializes a new JSTORPage instance. If an error occured, you can read it in $this->error.
*
* @param string $url the page URL
*/
function __construct ($url) {
$this->url = $url;
$this->data = self::curl_download($url);
$this->analyse();
}
function get_title () {
return self::between('<div class="mainCite jnlOverride"><div class="hd title">', '</div>');
}
function analyse () {
parent::analyse();
//From HTML code
$this->author = trim(self::between('<div class="author">', '</div>'));
$this->journal = trim(self::between('<h2>', "\n"));
$this->issn = self::between('<div class="issn">ISSN: ', '</div>');
$this->url = self::between('<div class="stable">Article Stable URL: ', '</div>');
//Publisher
$pub = self::between('<div class="pubString">Published by: ', '</div>');
$this->publisher = $pub ? self::grab($pub, '>', '</a>') : 'JSTOR';
//Issue information
$srcInfo = trim(self::between('<!-- Formatting requires these tags be mashed together -->', '</div>'));
$this->volume = self::grab($srcInfo, "Vol. ", ",");
$this->issue = self::grab($srcInfo, "No. ", " ");
$this->yyyy = self::grab($srcInfo, '(', ')');
$pos = strpos($srcInfo, "pp. ");
$this->pages = substr($srcInfo, $pos + 4);
}
function is_article () {
return true;
}
}
-
-?>
diff --git a/pages/lalibrebelgique.php b/pages/lalibrebelgique.php
index e418360..7f28bfc 100644
--- a/pages/lalibrebelgique.php
+++ b/pages/lalibrebelgique.php
@@ -1,55 +1,53 @@
<?php
//Page analysis for www.lalibre.be
class LaLibreBelgiquePage extends Page {
function analyse () {
//La Libre uses ISO-8859-1 and not UTF-8
$this->data = iconv('iso-8859-1', 'utf-8', $this->data);
//Calls parent analyzer
parent::analyse();
//Hardcoded known info
$this->site = "La Libre Belgique";
$this->skipYMD = true;
//Gets date
$date = trim(self::between('Mis en ligne le ', '</p>'));
$yyyy = substr($date, 6, 4);
$mm = substr($date, 3, 2);
$dd = substr($date, 0, 2);
$this->unixtime = mktime(12, 0, 0, $mm, $dd, $yyyy);
$this->date = strftime(LONG_DATE_FORMAT, $this->unixtime);
//Gets authors
$authors = trim(self::between('<p id="writer">', '</p>'));
if (strpos($authors, 'daction ') > 0) {
//"rédaction en ligne", "Rédaction web","Rédaction en ligne (avec afp)", etc.
//(they're not coherent about case).
$this->skipAuthor = true;
} else {
$authors = preg_split('/( et |, )/', $authors);
$start = true;
foreach ($authors as $author) {
//Fixes some authors
switch ($author) {
case 'G. Dt': $author = 'Guy Duplat'; break;
case 'afp': $author = 'AFP'; break;
}
if ($start) {
$this->author = $author;
$start = false;
} else {
$this->coauthors[] = $author;
}
}
}
//Gets title
if (!$this->title = $this->meta_tags['og:title']) {
$this->title = self::between("<title>Lalibre.be - ", "</title>");
}
}
}
-
-?>
diff --git a/pages/lefigaro.php b/pages/lefigaro.php
index dfd7891..55f8382 100644
--- a/pages/lefigaro.php
+++ b/pages/lefigaro.php
@@ -1,31 +1,29 @@
<?php
class LeFigaroPage extends Page {
function analyse () {
parent::analyse();
//Hardcoded known info
$this->site = "Le Figaro";
$this->skipYMD = true;
$this->issn = '0182-5852';
//Gets date
//e.g. http://www.lefigaro.fr/actualite-france/2013/05/24/01016-20130524ARTFIG00438-frigide-barjot-ne-pense-pas-manifester-dimanche.php
$pos = strpos($this->url, "/20") + 1;
$yyyy = substr($this->url, $pos, 4);
$mm = substr($this->url, $pos + 5, 2);
$dd = substr($this->url, $pos + 8, 2);
$this->unixtime = mktime(0, 0, 0, $mm, $dd, $yyyy);
$this->date = strftime(LONG_DATE_FORMAT, $this->unixtime);
//Gets author
//e.g. <span itemprop="author" class="auteur txt12_120">Stéphanie Le Bars</span>
//e.g. <a itemprop="name" href="#auteur" class="fig-anchor fig-picto-journaliste-haut">Stéphane Kovacs</a>
//TODO: ensure no article has more than one author
$author = self::between('itemprop="name"', '</');
$pos = strpos($author, '">') + 2;
$this->author = substr($author, $pos);
}
}
-
-?>
diff --git a/pages/lemonde.php b/pages/lemonde.php
index 853747e..74fbbe2 100644
--- a/pages/lemonde.php
+++ b/pages/lemonde.php
@@ -1,30 +1,28 @@
<?php
class LeMondePage extends Page {
function analyse () {
parent::analyse();
//Hardcoded known info
$this->site = "Le Monde";
$this->skipYMD = true;
$this->issn = '1950-6244';
//Gets date
// e.g. http://www.lemonde.fr/ameriques/article/2013/05/25/le-bresil-annule-la-dette-de-douze-pays-africains_3417518_3222.html
$pos = strpos($this->url, "/article/");
$yyyy = substr($this->url, $pos + 9, 4);
$mm = substr($this->url, $pos + 14, 2);
$dd = substr($this->url, $pos + 17, 2);
$this->unixtime = mktime(0, 0, 0, $mm, $dd, $yyyy);
$this->date = strftime(LONG_DATE_FORMAT, $this->unixtime);
//Gets author
//e.g. <span itemprop="author" class="auteur txt12_120">Stéphanie Le Bars</span>
//TODO: ensure no article has more than one author
$author = self::between('itemprop="author"', '</');
$pos = strpos($author, '">') + 2;
$this->author = substr($author, $pos);
}
}
-
-?>
diff --git a/pages/leschroniquesautomatiques.php b/pages/leschroniquesautomatiques.php
index cf9f05e..3409324 100644
--- a/pages/leschroniquesautomatiques.php
+++ b/pages/leschroniquesautomatiques.php
@@ -1,29 +1,27 @@
<?php
//Page analysis for www.chroniquesautomatiques.com
class LesChroniquesAutomatiquesPage extends Page {
function analyse () {
parent::analyse();
//Hardcoded known info
$this->site = "Les Chroniques Automatiques";
$this->author = "Dat’";
$this->skipYMD = true;
//Gets date
$old_tz = date_default_timezone_get();
date_default_timezone_set('Europe/Paris');
$date = date_parse(trim(self::between('This entry was posted on', 'and is filed under')));
$this->unixtime = mktime($date['hour'], $date['minute'], $date['second'], $date['month'], $date['day'], $date['year']);
$this->date = strftime(LONG_DATE_FORMAT, $this->unixtime);
$new_tz = date_default_timezone_set($old_tz);
}
function get_title () {
$title = parent::get_title();
$pos = strpos($title, ' »');
return substr($title, 0, $pos);
}
}
-
-?>
diff --git a/pages/newyorktimes.php b/pages/newyorktimes.php
index d82b6d3..34a0c31 100644
--- a/pages/newyorktimes.php
+++ b/pages/newyorktimes.php
@@ -1,27 +1,25 @@
<?php
//Page analysis for www.nytimes.com
class NewYorkTimesPage extends Page {
function analyse () {
parent::analyse();
//Hardcoded known info
$this->site = "New York Times";
$this->skipYMD = true;
//Gets date from pdate metatag
$yyyy = substr($this->meta_tags['pdate'], 0, 4);
$mm = substr($this->meta_tags['pdate'], 4, 2);
$dd = substr($this->meta_tags['pdate'], 6, 2);
$this->unixtime = mktime(0, 0, 0, $mm, $dd, $yyyy);
$this->date = strftime(LONG_DATE_FORMAT, $this->unixtime);
//Gets author
//TODO: Handle the several authors case
require('helpers/namecase.php');
$author = substr($this->meta_tags['byl'], 3);
$this->author = name_case($author);
}
}
-
-?>
diff --git a/pages/persee.php b/pages/persee.php
index 397c609..2347630 100644
--- a/pages/persee.php
+++ b/pages/persee.php
@@ -1,47 +1,45 @@
<?php
//Page analysis for www.persee.org
class PerseePage extends Page {
/**
* Initializes a new JSTORPage instance. If an error occured, you can read it in $this->error.
*
* @param string $url the page URL
*/
function __construct ($url) {
$this->url = $url;
$this->data = self::curl_download($url, USER_AGENT_FALLBACK_FULL);
$this->analyse();
}
function analyse () {
parent::analyse();
$this->publisher = 'Persée';
}
function get_all_meta_tags () {
$metaTags = parent::get_all_meta_tags();
//Round 2, as persee.fr uses <meta content="..." name="...">
preg_match_all('/<[\s]*meta[\s]*\bcontent\b="?' . '([^>"]*)"?[\s]*' . 'name="?([^>"]*)"?[\s]*[\/]?[\s]*>/si', $this->data, $match);
if (isset($match) && is_array($match) && count($match) == 3) {
$originals = $match[0];
$names = $match[2];
$values = $match[1];
if (count($originals) == count($names) && count($names) == count($values)) {
for ($i=0, $limiti = count($names) ; $i < $limiti ; $i++) {
$metaTags[$names[$i]] = $values[$i];
}
}
}
return $metaTags;
}
function is_article () {
return true;
}
}
-
-?>
diff --git a/pages/rue89.php b/pages/rue89.php
index a2e07c2..5377cb0 100644
--- a/pages/rue89.php
+++ b/pages/rue89.php
@@ -1,35 +1,33 @@
<?php
//Page analysis for www.rue89.com
class Rue89Page extends Page {
function analyse () {
parent::analyse();
//Hardcoded known info
$this->site = "Rue 89";
$this->skipYMD = true;
$this->issn = '1958-5837';
//Gets date
// http://www.rue89.com/2011/02/26/
$yyyy = substr($this->url, 21, 4);
$mm = substr($this->url, 26, 2);
$dd = substr($this->url, 29, 2);
$this->unixtime = mktime(0, 0, 0, $mm, $dd, $yyyy);
$this->date = strftime(LONG_DATE_FORMAT, $this->unixtime);
//Gets author
//TODO: ensure no article has more than one author
$pos1 = strpos($this->data, '<div class="authors">');
$pos1 = strpos($this->data, 'class="author">', $pos1) + 15;
$pos2 = strpos($this->data, '/a>', $pos1) - 1;
$this->author = substr($this->data, $pos1, $pos2 - $pos1);
}
function get_title () {
//Article title is the meta tag name, and not the page title
return $this->meta_tags['name'];
}
}
-
-?>
diff --git a/pages/taylorandfrancis.php b/pages/taylorandfrancis.php
index 9aa2e75..b8c6f73 100644
--- a/pages/taylorandfrancis.php
+++ b/pages/taylorandfrancis.php
@@ -1,57 +1,55 @@
<?php
//Page analysis for www.tandfonline.com
class TaylorAndFrancisPage extends Page {
/**
* Initializes a new TaylorAndFrancisPage instance. If an error occured, you can read it in $this->error.
*
* @param string $url the page URL
*/
function __construct ($url) {
$this->url = $url;
$this->data = self::curl_download($url);
$this->analyse();
}
function analyse () {
parent::analyse();
$this->publisher = 'Taylor & Francis';
//DOI
$this->doi = self::between('meta name="dc.Identifier" scheme="doi" content="', '"');
//Gets the right dc.Identifier (coden scheme)
//Expected format: <Issue name>, Vol. <Issue volume>, No. <Issue number>, <Issue date>, pp. <article pages>
//e.g. Annals of Science, Vol. 68, No. 3, July 2011, pp. 325–350
$identifier = self::between('meta name="dc.Identifier" scheme="coden" content="', '"');
$identifier_data = explode(', ', $identifier);
$pos = strpos($identifier, ", Vol. ");
$this->journal = substr($identifier, 0, $pos);
$this->volume = self::grab($identifier, "Vol. ", ",");
$this->issue = self::grab($identifier, "No. ", ",");
$date = explode(' ', $identifier_data[3]);
$this->yyyy = array_pop($date);
$pos = strpos($identifier, "pp. ");
$this->pages = substr($identifier, $pos + 4);
//Author
//TODO: handle several authors
$author = trim(self::getMetaTag($this->meta_tags, 'dc.Creator'));
$names = explode(' ', $author);
if (count($names) == 2) {
$this->author = "$names[1], $names[0]";
} else {
$this->author = $author;
}
}
function is_article () {
return true;
}
}
-
-?>
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Thu, Sep 18, 06:27 (21 h, 3 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2990127
Default Alt Text
(13 KB)
Attached To
Mode
rSTG Source templates generator
Attached
Detach File
Event Timeline
Log In to Comment