Skrypty statystyk Listy Dyskusyjnej Mandragoratu Wandystanu https://wandystan.eu/statistics/doc/
Nie możesz wybrać więcej, niż 25 tematów Tematy muszą się zaczynać od litery lub cyfry, mogą zawierać myślniki ('-') i mogą mieć do 35 znaków.

131 wiersze
4.2 KiB

  1. <?php
  2. /*
  3. * Generates slice data for statistics of posts by date and sender
  4. * Copyright (C) 2020 Polyna <https://wandystan.eu/B196>
  5. *
  6. * This file is part of LDMW statistical scripts.
  7. *
  8. * LDMW statistical scripts are free software: you can redistribute them
  9. * and/or modify them under the terms of the GNU Affero General Public
  10. * License as published by the Free Software Foundation, either version 3
  11. * of the License, or (at your option) any later version.
  12. *
  13. * LDMW statistical scripts are distributed in the hope that they will be
  14. * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16. * GNU Affero General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU Affero General Public
  19. * License along with LDMW statistical scripts. If not, see
  20. * <https://www.gnu.org/licenses/>.
  21. */
  22. require_once 'common.php';
  23. // get request data and validate it
  24. $param_date_start = $_GET['date-start'];
  25. $param_date_end = $_GET['date-end'];
  26. if (! $param_date_start) {
  27. exit_error('No start date specified.');
  28. } elseif (! $param_date_end) {
  29. exit_error('No end date specified.');
  30. } elseif (! mb_ereg_match('^' . REGEXP_ISO8601_DATE_OR_INTERVAL . '$', $param_date_start)) {
  31. exit_error('Invalid start date.');
  32. } elseif (! mb_ereg_match('^' . REGEXP_ISO8601_DATE_OR_INTERVAL . '$', $param_date_end)) {
  33. exit_error('Invalid end date.');
  34. } elseif (starts_with($param_date_start, 'P') && starts_with($param_date_end, 'P')) {
  35. exit_error('Both start and end dates cannot be periods.');
  36. }
  37. // create start and end date objects
  38. $tz = new DateTimeZone('UTC');
  39. try {
  40. if (! starts_with($param_date_start, 'P'))
  41. $date_start = new DateTimeImmutable($param_date_start, $tz);
  42. if (! starts_with($param_date_end, 'P'))
  43. $date_end = new DateTimeImmutable($param_date_end, $tz);
  44. if (starts_with($param_date_start, 'P')) {
  45. $interval = new DateInterval($param_date_start);
  46. $date_start = $date_end->sub($interval);
  47. }
  48. if (starts_with($param_date_end, 'P')) {
  49. $interval = new DateInterval($param_date_end);
  50. $date_end = $date_start->add($interval);
  51. }
  52. } catch (Exception $e) {
  53. exit_error('Invalid date specification: ' . $e->getMessage());
  54. }
  55. // get page with the search results for the given period
  56. try {
  57. $document = load_remote_dom_document('https://wandystan.groups.io/g/wandystan/search?p=Created,,,10000,2,0,0&d=6&startdate=' . $date_start->format('m/d/Y') . '&enddate=' . $date_end->modify('-1 second')->format('m/d/Y'));
  58. } catch (Exception $e) {
  59. exit_error($e->getMessage());
  60. }
  61. $xpath = new DOMXPath($document);
  62. // find all messages in search results and summarise them
  63. $messages = $xpath->query('//div[@id = "maincontent"]/table//td[span[@class = "subject"]]');
  64. $summary = [];
  65. foreach ($messages as $message) {
  66. $sender = trim($xpath->evaluate('substring-before(substring-after(normalize-space(div[@class = "hidden-xs"]/following-sibling::text()[1]), "By "), " · #")', $message), '"');
  67. $timestamp = $xpath->evaluate('substring-before(substring-after(.//script[@class = "timedisp"], "DisplayShortTime("), ",")', $message);
  68. $date = new DateTime(null, $tz);
  69. $date->setTimestamp($timestamp / 1000000000);
  70. $key = serialize([
  71. 'date' => $date->format('Y-m-d'),
  72. 'sender' => $sender
  73. ]);
  74. $summary[$key]++;
  75. }
  76. // construct a linked data object corresponding to gathered data
  77. $ld = [
  78. '@context' => 'https://wandystan.eu/statistics/context.jsonld',
  79. '@type' => 'Slice',
  80. 'key' => 'key/by-period',
  81. 'period' => [
  82. '@type' => 'Interval',
  83. 'hasBeginning' => [
  84. '@type' => 'Instant',
  85. 'timestamp' => $date_start->format(DATE_ATOM)
  86. ],
  87. 'hasEnd' => [
  88. '@type' => 'Instant',
  89. 'timestamp' => $date_end->format(DATE_ATOM)
  90. ]
  91. ]
  92. ];
  93. $observations = [];
  94. foreach ($summary as $key => $entry) {
  95. $key = unserialize($key);
  96. $nameHash = hash('fnv1a32', $key['sender']);
  97. $observations []= [
  98. '@id' => 'by-period/' . $key['date'] . '/P1D#' . $nameHash,
  99. '@type' => 'Observation',
  100. 'dataset' => "by-period/",
  101. 'date' => $key['date'],
  102. 'sender' => [
  103. '@id' => 'sender/' . $nameHash,
  104. '@type' => 'Agent',
  105. 'name' => $key['sender'],
  106. 'name_fnv1a32sum' => $nameHash
  107. ],
  108. 'posts' => $entry
  109. ];
  110. }
  111. $ld['observations'] = $observations;
  112. header('Content-type: application/ld+json');
  113. print json_encode($ld);