|
- <?php
- /*
- * Generates slice data for statistics of posts by date and sender
- * Copyright (C) 2020 Polyna <https://wandystan.eu/B196>
- *
- * This file is part of LDMW statistical scripts.
- *
- * LDMW statistical scripts are free software: you can redistribute them
- * and/or modify them under the terms of the GNU Affero General Public
- * License as published by the Free Software Foundation, either version 3
- * of the License, or (at your option) any later version.
- *
- * LDMW statistical scripts are distributed in the hope that they will be
- * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public
- * License along with LDMW statistical scripts. If not, see
- * <https://www.gnu.org/licenses/>.
- */
- require_once 'common.php';
-
- // get request data and validate it
- $param_date_start = $_GET['date-start'];
- $param_date_end = $_GET['date-end'];
-
- if (! $param_date_start) {
- exit_error('No start date specified.');
- } elseif (! $param_date_end) {
- exit_error('No end date specified.');
- } elseif (! mb_ereg_match('^' . REGEXP_ISO8601_DATE_OR_INTERVAL . '$', $param_date_start)) {
- exit_error('Invalid start date.');
- } elseif (! mb_ereg_match('^' . REGEXP_ISO8601_DATE_OR_INTERVAL . '$', $param_date_end)) {
- exit_error('Invalid end date.');
- } elseif (starts_with($param_date_start, 'P') && starts_with($param_date_end, 'P')) {
- exit_error('Both start and end dates cannot be periods.');
- }
-
- // create start and end date objects
- $tz = new DateTimeZone('UTC');
-
- try {
- if (! starts_with($param_date_start, 'P'))
- $date_start = new DateTimeImmutable($param_date_start, $tz);
-
- if (! starts_with($param_date_end, 'P'))
- $date_end = new DateTimeImmutable($param_date_end, $tz);
-
- if (starts_with($param_date_start, 'P')) {
- $interval = new DateInterval($param_date_start);
- $date_start = $date_end->sub($interval);
- }
-
- if (starts_with($param_date_end, 'P')) {
- $interval = new DateInterval($param_date_end);
- $date_end = $date_start->add($interval);
- }
- } catch (Exception $e) {
- exit_error('Invalid date specification: ' . $e->getMessage());
- }
-
- // get page with the search results for the given period
- try {
- $document = load_remote_dom_document('https://wandystan.groups.io/g/wandystan/search?p=Created,,,10000,2,0,0&d=6&startdate=' . $date_start->format('m/d/Y') . '&enddate=' . $date_end->modify('-1 second')->format('m/d/Y'));
- } catch (Exception $e) {
- exit_error($e->getMessage());
- }
-
- $xpath = new DOMXPath($document);
-
- // find all messages in search results and summarise them
- $messages = $xpath->query('//div[@id = "maincontent"]/table//td[span[@class = "subject"]]');
- $summary = [];
-
- foreach ($messages as $message) {
- $sender = trim($xpath->evaluate('substring-before(substring-after(normalize-space(div[@class = "hidden-xs"]/following-sibling::text()[1]), "By "), " · #")', $message), '"');
- $timestamp = $xpath->evaluate('substring-before(substring-after(.//script[@class = "timedisp"], "DisplayShortTime("), ",")', $message);
-
- $date = new DateTime(null, $tz);
- $date->setTimestamp($timestamp / 1000000000);
-
- $key = serialize([
- 'date' => $date->format('Y-m-d'),
- 'sender' => $sender
- ]);
- $summary[$key]++;
- }
-
- // construct a linked data object corresponding to gathered data
- $ld = [
- '@context' => 'https://wandystan.eu/statistics/context.jsonld',
- '@type' => 'Slice',
- 'key' => 'key/by-period',
- 'period' => [
- '@type' => 'Interval',
- 'hasBeginning' => [
- '@type' => 'Instant',
- 'timestamp' => $date_start->format(DATE_ATOM)
- ],
- 'hasEnd' => [
- '@type' => 'Instant',
- 'timestamp' => $date_end->format(DATE_ATOM)
- ]
- ]
- ];
-
- $observations = [];
- foreach ($summary as $key => $entry) {
- $key = unserialize($key);
- $nameHash = hash('fnv1a32', $key['sender']);
- $observations []= [
- '@id' => 'by-period/' . $key['date'] . '/P1D#' . $nameHash,
- '@type' => 'Observation',
- 'dataset' => "by-period/",
- 'date' => $key['date'],
- 'sender' => [
- '@id' => 'sender/' . $nameHash,
- '@type' => 'Agent',
- 'name' => $key['sender'],
- 'name_fnv1a32sum' => $nameHash
- ],
- 'posts' => $entry
- ];
- }
-
- $ld['observations'] = $observations;
-
- header('Content-type: application/ld+json');
- print json_encode($ld);
|