Skip to content
This repository was archived by the owner on Jul 7, 2024. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# EditorConfig is awesome: https://EditorConfig.org

# top-most EditorConfig file
root = true

[*]
indent_style = tab
indent_size = 2
end_of_line = lf
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = false
9 changes: 9 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
/phpstan.neon.dist export-ignore
/phpstan-baseline.neon export-ignore
/phpstan-constants.php export-ignore
/.gitignore export-ignore
/.gitattributes export-ignore
/tests/ export-ignore
/phpunit.xml.dist export-ignore
/.php-cs-fixer.dist.php export-ignore
/.editorconfig export-ignore
10 changes: 10 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
/vendor/
/composer.lock
/.phpunit.cache
/phpunit.xml
/.php-cs-fixer.cache
/.php-cs-fixer.php
/phpstan.neon
/bin/
/.php_cs.cache
/.php_cs.txt
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Changelog

All notable changes to this project will be documented in this file.

The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## Unreleased
674 changes: 674 additions & 0 deletions LICENSE

Large diffs are not rendered by default.

11 changes: 7 additions & 4 deletions cli_indexer.php
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
<?php

if (isset($_SERVER['HTTP_HOST']))
if (isset($_SERVER['HTTP_HOST'])) {
die('ce script ne fonctionne qu\'en ligne de commande');
}

define('_CLI_', true);
define('_HTTPS', 'https'); ## seenthis
Expand All @@ -22,9 +23,11 @@

$command = $argv[1];

if (in_array('debug', $argv)) define ('_CLI_DEBUG', true);
if (in_array('debug', $argv)) {
define('_CLI_DEBUG', true);
}

switch(true) {
switch (true) {
case $command == 'tout':
seenthis_indexer_tout();
break;
Expand All @@ -33,7 +36,7 @@
break;
case $command == 'recents':
default:
seenthis_indexer_recent(30*24*3600);
seenthis_indexer_recent(30 * 24 * 3600);
break;
}

Expand Down
27 changes: 27 additions & 0 deletions composer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
{
"name": "seenthis/sphinx",
"description": "Sphinx pour Seenthis",
"license": "GPL-3.0-or-later",
"type": "spip-plugin",
"authors": [
{
"name": "Fil",
"email": "fil@rezo.net"
}
],
"require-dev": {
"dealerdirect/phpcodesniffer-composer-installer": "^1.0",
"phpcompatibility/php-compatibility": "10.x-dev",
"spip/coding-standards": "^1.3"
},
"config": {
"allow-plugins": {
"dealerdirect/phpcodesniffer-composer-installer": true
}
},
"extra": {
"branch-alias": {
"dev-master": "1.0.x-dev"
}
}
}
109 changes: 61 additions & 48 deletions indexer_sphinx.php
Original file line number Diff line number Diff line change
@@ -1,45 +1,46 @@
<?php

function indexer_sphinx($id_me) {
spip_log('indexer '.var_export($id_me,true), 'sphinx');
spip_log('indexer ' . var_export($id_me, true), 'sphinx');
seenthis_indexer_un($id_me);
}

include_spip('sphinxql');

function seenthis_indexer_un($id) {
seenthis_indexer_conditionnel("me.id_me=".intval($id));
seenthis_indexer_conditionnel('me.id_me=' . intval($id));
}

/* indexer les messages recemment modifies ; ca ne prend pas en compte les
* nouveaux partages etc, mais ca permet de rattraper */
function seenthis_indexer_recent($delais = 86400) {
include_spip('base/abstract_sql');
$r = sql_allfetsel('DISTINCT(IF(id_parent>0,id_parent,id_me)) as id', 'spip_me', array('date_modif > DATE_SUB(NOW(),INTERVAL '.$delais.' SECOND)'));
$r = sql_allfetsel('DISTINCT(IF(id_parent>0,id_parent,id_me)) as id', 'spip_me', ['date_modif > DATE_SUB(NOW(),INTERVAL ' . $delais . ' SECOND)']);
if (count($r)) {
$in = sql_in('me.id_me', array_map('array_shift', $r));
seenthis_indexer_conditionnel($in);
}
}


function seenthis_indexer_tout( ) {
function seenthis_indexer_tout() {
$req = spip_query('SELECT MIN(id_me) AS mini,MAX(id_me) AS maxi FROM spip_me');
$t = sql_fetch($req);

$step = 1000;

for ($i = intval($t['mini']); $i<= intval($t['maxi']); $i+=$step) {
for ($i = intval($t['mini']); $i <= intval($t['maxi']); $i += $step) {
spip_timer('indexer_cond');
seenthis_indexer_conditionnel("(me.id_me>=".$i." AND me.id_me<".($i+$step).")");
if (defined('_CLI_') AND _CLI_) echo spip_timer('indexer_cond'),"\n";
seenthis_indexer_conditionnel('(me.id_me>=' . $i . ' AND me.id_me<' . ($i + $step) . ')');
if (defined('_CLI_') and _CLI_) { echo spip_timer('indexer_cond'),"\n";
}
}

sphinxql_query('OPTIMIZE INDEX '._SPHINXQL_INDEX);
sphinxql_query('OPTIMIZE INDEX ' . _SPHINXQL_INDEX);
}

function seenthis_indexer_conditionnel($where = '1=0') {
mb_internal_encoding("UTF-8"); # pour mb_strtolower ci-dessous
mb_internal_encoding('UTF-8'); # pour mb_strtolower ci-dessous
spip_query('SET SESSION group_concat_max_len = 1000000');

$query = '
Expand Down Expand Up @@ -71,7 +72,7 @@ function seenthis_indexer_conditionnel($where = '1=0') {
LEFT JOIN spip_me_share AS sh ON sh.id_me = me.id_me
WHERE
me.id_parent = 0
AND '.$where.'
AND ' . $where . '
AND (
1=1
OR me.date_modif > "2014-05-15"
Expand All @@ -89,78 +90,90 @@ function seenthis_indexer_conditionnel($where = '1=0') {
return false;
}

while($t = sql_fetch($req)) {
$b = array(
while ($t = sql_fetch($req)) {
$b = [
'id' => $t['id_me'],
'title' => ($t['statut'] == 'publi')
? seenthis_titre_me($t['texte'])
: '',
'uri' => _HTTPS . '://' . _HOST . '/messages/'.$t['id_me'],
'summary' => '@'.$t['login'].': '.$t['texte'],
'uri' => _HTTPS . '://' . _HOST . '/messages/' . $t['id_me'],
'summary' => '@' . $t['login'] . ': ' . $t['texte'],
'date' => strtotime($t['date']),
'content' => ($t['statut'] == 'publi')
? $t['texte'] . "\n\n----\n\n".$t['rept']
? $t['texte'] . "\n\n----\n\n" . $t['rept']
: '',
'properties' => array(
'properties' => [
'objet' => 'me',
'id_objet' => $t['id_me'],
'date' => $t['date'],
'auteurs' => array_values(array_unique(array_map('intval',
array_merge(array($t['id_auteur']), explode(',',$t['rauteurs']))
'auteurs' => array_values(array_unique(array_map(
'intval',
array_merge([$t['id_auteur']], explode(',', $t['rauteurs']))
))),
'authors' => array_values(array_unique(array_filter(array_map('strval',
array_merge(array($t['login']), explode(',',$t['rlogins']))
'authors' => array_values(array_unique(array_filter(array_map(
'strval',
array_merge([$t['login']], explode(',', $t['rlogins']))
)))),
'id_auteur' => intval($t['id_auteur']),
'login' => strval($t['login']),
'share' => array_values(array_unique(array_filter(array_map('intval',
array_merge(array($t['id_auteur']),explode(',',$t['share'])))))),
'share' => array_values(array_unique(array_filter(array_map(
'intval',
array_merge([$t['id_auteur']], explode(',', $t['share']))
)))),
'published' => intval($t['statut'] == 'publi'),
),
],
'signature' => '',
);
];

// gestion des tags
$censure = array('#for', '#via_google_reader');
$tags = array('tags' => array(), 'oc' => array(), 'url' => array());
foreach( array_values(array_unique(array_filter(
array_map('mb_strtolower',array_map('strval',
array_merge(explode(' | ',$t['tags']), explode(' | ',$t['rtags']))))))) as $m ) {
if (in_array($m, $censure)) {}
else if ($m[0] == '#') $tags['tags'][] = $m;
else if (preg_match(',^https?://,i', $m)) $tags['url'][] = $m;
else if (preg_match(',^(.+):(.+),', $m)) $tags['oc'][] = $m;
$censure = ['#for', '#via_google_reader'];
$tags = ['tags' => [], 'oc' => [], 'url' => []];
foreach (
array_values(array_unique(array_filter(
array_map('mb_strtolower', array_map(
'strval',
array_merge(explode(' | ', $t['tags']), explode(' | ', $t['rtags']))
))
))) as $m
) {
if (in_array($m, $censure)) {
}
elseif ($m[0] == '#') { $tags['tags'][] = $m;
} elseif (preg_match(',^https?://,i', $m)) { $tags['url'][] = $m;
} elseif (preg_match(',^(.+):(.+),', $m)) { $tags['oc'][] = $m;
}
}

#if (defined('_CLI_') AND _CLI_) var_dump($tags);
foreach($tags as $k => &$v) {
if (count($v))
foreach ($tags as $k => &$v) {
if (count($v)) {
$b['properties'][$k] = $v;
}
}

// normaliser les liens, on ne veut pas les indexer dans le fulltext
if (function_exists('seenthissphinx_normaliser_url')) {
foreach(array('title','content','summary') as $k)
$b[$k] = preg_replace_callback("/"._REG_URL."/uiS", 'seenthissphinx_normaliser_url', $b[$k]);
foreach (['title','content','summary'] as $k) {
$b[$k] = preg_replace_callback('/' . _REG_URL . '/uiS', 'seenthissphinx_normaliser_url', $b[$k]);
}
}

$b['properties'] = json_encode($b['properties']);


$c = array_map('sphinxql_escape_query', $b);

$query = "REPLACE INTO "._SPHINXQL_INDEX
." (id,title,uri, summary, date, content,properties,signature) VALUES ($c[id], $c[title], $c[uri], $c[summary], $c[date], $c[content], $c[properties], $c[signature])";
spip_log($query,'sphinx');
$query = 'REPLACE INTO ' . _SPHINXQL_INDEX
. " (id,title,uri, summary, date, content,properties,signature) VALUES ($c[id], $c[title], $c[uri], $c[summary], $c[date], $c[content], $c[properties], $c[signature])";

spip_log($query, 'sphinx');

$a = sphinxql_query($query);

if (defined('_CLI_') AND _CLI_) {
echo $a ? "+" : "-",$b['title'],' ', $b['uri'],"\n";
if (defined('_CLI_DEBUG') AND _CLI_DEBUG) echo $query,"\n";
}

if (defined('_CLI_') and _CLI_) {
echo $a ? '+' : '-',$b['title'],' ', $b['uri'],"\n";
if (defined('_CLI_DEBUG') and _CLI_DEBUG) { echo $query,"\n";
}
}
}

}
18 changes: 18 additions & 0 deletions paquet.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
<?xml version="1.0" encoding="UTF-8"?>
Comment thread
JamesRezo marked this conversation as resolved.
<paquet
prefix="seenthissphinx"
version="1.0.0"
etat="stable"
compatibilite="[4.1.0;4.1.*]"
>
<nom>Seenthis - Sphinx</nom>
<!-- Sphinx pour Seenthis -->
<auteur>Fil</auteur>

<pipeline nom="seenthis_instance_objet" action="indexer_me" />
<pipeline nom="cache_message" action="indexer_me" />

<necessite nom="seenthis" />
<necessite nom="indexer" />
<necessite nom="queue" />
</paquet>
14 changes: 14 additions & 0 deletions phpcs.xml.dist
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<?xml version="1.0"?>
<ruleset>
<file>.</file>
<exclude-pattern>lang/*</exclude-pattern>
<exclude-pattern>vendor/**/*</exclude-pattern>

<rule ref="SPIP41"/>

<config name="ignore_warnings_on_exit" value="1"/>
<arg name="cache" value=".php_cs.cache"/>
<arg name="report-full" value=".php_cs.txt"/>
<arg name="report-summary"/>
<arg value="s"/>
</ruleset>
Loading