<?php
/**
* Class Similarity Matrix :
* Get Similar Words Senetences in Matrix;
* This Class Can Be used For "Searching Similar News" Where each Senteces is single news record with ID ;
* pashkovdenis@gmail.com
* 2013 ;
* rommie Project;
**/
interface IMatrix {
public function AddString(MatrixString $string);
public static function matrix();
public function setLimit($l = 1.0);
public function setMulti($m = 10);
public function getStrings();
public function getMatrix();
public function getWidth();
public function getHeight();
public function __toString();
// final methods ;
public function hasSimilar($word);
public function getSimilar($word);
public function getAttached($word);
public function getSimilarTo($id);
// return
}
/*
* MAtrix Class
*/
class Matrix implements IMatrix {
private static $self;
private $matrix = [ ];
public $strings = [ ];
private $matrix_width;
private $matrix_height;
private $multi;
private $limit;
// counstructor ;
public function __construct() {
$this->matrix_height = 0;
$this->matrix_height = 0;
$this->multi = 10;
$this->limit = 1.0;
self::$self = $this;
}
public static function matrix() {
if (! isset ( self::$self ))
self::$self = new self ();
return self::$self;
}
// Add String
public function AddString(MatrixString $string) {
$words = $string->getWords ();
$this->strings [] = $string;
foreach ( $words as $w )
$w->accept ( $this );
$this->resizeMatrix ();
return $this;
}
public function setLimit($l = 1.0) {
$this->limit = ( double ) $l;
return $this;
}
// Recalculate Matrix;
private function reCalculateSimilarity() {
for($x = 0; $x <= $this->matrix_height; $x ++) {
for($i = 0; $i <= $this->matrix_width; $i ++) {
if (isset ( $this->matrix [$x] [$i] ))
if (isset ( $this->strings [$x] ) && isset ( $this->strings [$x]->getWords ()[$i] )) {
$Word = $this->strings [$x]->getWords ()[$i];
$returned_word = $this->calculateToWord ( $Word, $x * $this->multi, $i );
if (is_object ( $returned_word )) {
$w1_int = $Word->getNumber ();
$w2_int = $returned_word->getNumber ();
$score = 0.0;
for($y = 0; $y <= strlen ( $w1_int ); $y ++) {
if (isset ( $w2_int [$y] )) {
if (isset ( $w1_int [$y] ) && $w1_int [$y] == $w2_int [$y])
$score = $score + 1.0;
else {
if (isset ( $w1_int [$y] ) && $w1_int [$y] - 1 == $w1_int [$y])
$score = $score + 0.3;
if (isset ( $w1_int [$y] ) && $w1_int [$y] + 1 == $w1_int [$y])
$score = $score + 0.3;
}
} else
$score = $score - 0.3;
$total = ((strlen ( $w1_int ) + strlen ( $w2_int )) / 100) * $score;
}
$Word->assigned_words [] = $returned_word;
if ($total >= $this->limit)
$Word->addSimilar ( $returned_word );
}
}
}
}
}
// Set Multiplier
public function setMulti($m = 10) {
$this->multi = ( double ) $m;
return $this;
}
// Calc
private function calculateToWord($w1, $x1, $y1) {
$x0 = 0;
$y0 = 0;
$d = 0.0;
$a = 0.0;
$h = 0.0;
$Word_return = null;
for($x = 0; $x <= $this->matrix_height; $x ++) {
for($i = 0; $i <= $this->matrix_width; $i ++) {
if (isset ( $this->matrix [$x] [$i] ))
if (isset ( $this->strings [$x] ) && isset ( $this->strings [$x]->getWords ()[$i] )) {
$Word = $this->strings [$x]->getWords ()[$i];
// Claculate Beetwen w1 and Word ;
$d = (pow ( abs ( $x1 - $x ), 2 ) + pow ( abs ( $y1 - $i ), 2 ));
if ($d > ($w1->radius + $Word->radius))
continue;
$div = 3 * $d;
if ($div <= 0)
$div = 1;
$a = ($w1->radius * $w1->radius - $Word->radius * $Word->radius) / $div;
$h = sqrt ( (pow ( $w1->radius, 2 ) - pow ( $a, 2 )) );
$Word_return = $Word;
}
}
}
return $Word_return;
}
// Somw Strings stuff ;
public function getStrings() {
return $this->strings;
}
public function getMatrix() {
return $this->matrix;
}
public function getWidth() {
return $this->matrix_width;
}
public function getHeight() {
return $this->matrix_height;
}
// resize Matrix When adding new Strings
private function resizeMatrix() {
$words = $this->strings [count ( $this->strings ) - 1]->getWords ();
$tmp = [ ];
foreach ( $words as $w )
$tmp [] = $w->getNumber ();
$this->matrix [] = $tmp;
$this->matrix_height = count ( $this->matrix );
$last = 0;
foreach ( $this->matrix as $string ) {
if (count ( $string ) > $last)
$last = count ( $string );
}
$this->matrix_width = $last;
$this->reCalculateSimilarity ();
}
// Format nice Output;
public function __toString() {
$str = "<table class='tracert'>";
for($x = 0; $x <= $this->matrix_height; $x ++) {
$str .= "<tr> <td> $x </td> ";
for($i = 0; $i <= $this->matrix_width; $i ++) {
$str .= "<td>";
if (isset ( $this->matrix [$x] [$i] ))
$str .= $this->matrix [$x] [$i];
if (isset ( $this->strings [$x] ) && isset ( $this->strings [$x]->getWords ()[$i] )) {
$str .= "<br>";
$words = $this->strings [$x]->getWords ()[$i];
$str .= "<p> R: {$words->radius}</p> ";
$str .= "<p> R: {$words->string}</p> ";
foreach ( $words->similars_words as $w )
$str .= " <p style='color:blue;'> Assigned TO : : " . $w->string . "</p>";
foreach ( $words->assigned_words as $w )
$str .= " <p style='color:green;'> in Touch With : " . $w->string . "</p>";
}
$str .= "</td>";
}
$str .= "</tr>";
}
return $str . "</table>";
}
// check
public function hasSimilar($wordin) {
if (! $wordin)
throw new Exception ( "Empty Word" );
if ($this->matrix_height == 0 && $this->matrix_width == 0)
throw new Exception ( "Empty Matrix " );
for($x = 0; $x <= $this->matrix_height; $x ++) {
for($i = 0; $i <= $this->matrix_width; $i ++) {
if (isset ( $this->matrix [$x] [$i] ))
if (isset ( $this->strings [$x] ) && isset ( $this->strings [$x]->getWords ()[$i] )) {
$Word = $this->strings [$x]->getWords ()[$i];
foreach ( $Word->similars_words as $wordsimilar ){
if ($wordsimilar->string == $wordin)
return true;
}
}
}
}
return false;
}
// Get Similar
public function getSimilar($word) {
if (! $word)
throw new Exception ( "Empty Word" );
if ($this->matrix_height == 0 && $this->matrix_width == 0)
throw new Exception ( "Empty Matrix " );
for($x = 0; $x <= $this->matrix_height; $x ++) {
for($i = 0; $i <= $this->matrix_width; $i ++) {
if (isset ( $this->matrix [$x] [$i] ))
if (isset ( $this->strings [$x] ) && isset ( $this->strings [$x]->getWords ()[$i] )) {
$Word = $this->strings [$x]->getWords ();
foreach ( $Word as $w )
if ($word == $w->string)
return $w->similars_words;
}
}
}
}
// getAttached ;
public function getAttached($word) {
if (! $word)
throw new Exception ( "Empty Word" );
if ($this->matrix_height == 0 && $this->matrix_width == 0)
throw new Exception ( "Empty Matrix " );
for($x = 0; $x <= $this->matrix_height; $x ++) {
for($i = 0; $i <= $this->matrix_width; $i ++) {
if (isset ( $this->matrix [$x] [$i] ))
if (isset ( $this->strings [$x] ) && isset ( $this->strings [$x]->getWords ()[$i] )) {
$Word = $this->strings [$x]->getWords ()[$i];
foreach ( $Word as $w )
if ($word == $w->string)
return $w->assigned_words;
}
}
}
}
// Get Similar string ;
public function getSimilarTo($id) {
$similar = [ ];
if ($this->matrix_height == 0 && $this->matrix_width == 0)
throw new Exception ( "Empty Matrix " );
// get Similar Strings arary;
$loaded = null;
foreach ( $this->strings as $s )
if ($s->getId () == $id)
$loaded = $s;
if ($loaded == null)
throw new Exception ( "Not Found String" );
$current_words = $s->getWords ();
// Begin Compare here ;
$int = 0;
$currentstring = null;
for($x = 0; $x <= $this->matrix_height; $x ++) {
$currentstring = null;
$int = 0;
for($i = 0; $i <= $this->matrix_width; $i ++) {
if (isset ( $this->matrix [$x] [$i] ))
if (isset ( $this->strings [$x] ) && isset ( $this->strings [$x]->getWords ()[$i] )) {
if ($this->strings [$x]->getId () == $id)
continue;
$words = $this->strings [$x]->getWords ();
$currentstring = $this->strings [$x];
foreach ( $words as $w ){
foreach ( $current_words as $cw ){
if ($cw->isSimilar ( $w->string ))
$int ++;
}
}
}
}
if ($int > 0) {
$similar [$int] = $currentstring->id;
}
}
ksort($similar);
return $similar;
}
}
// Single Word For Matrix
class MatrixWord {
private $number;
public $string;
public $similars_words = [ ];
public $assigned_words = [ ];
public $radius; // Radius is (length+pos) * count * pi / 10
private $count;
public function __construct($word = '') {
$this->string = $word;
// Break into letters ;
$letters = str_split ( $word );
foreach ( $letters as $l )
$this->number .= ord ( $l );
$this->count = 1;
}
// Check is word similar to
public function isSimilar($word) {
foreach ( $this->similars_words as $sim ){
if (strtolower ( $sim->string ) == strtolower ( $sim->string )){
return true;
}
}
return false;
}
// return Number
public function getNumber() {
return $this->number;
}
public function addSimilar(MatrixWord $sima) {
$this->similars_words [] = $sima;
return $this;
}
// Delegate Accept methods
public function accept(Matrix $matrix) {
$allstrings = $matrix->strings;
$mystring = $matrix->strings [count ( $matrix->strings ) - 1];
foreach ( $allstrings as $string ) {
if (! $this->string)
continue;
if (strstr ( $string->getRaw (), $this->string ))
$this->count ++;
}
$position = 0;
foreach ( $mystring->words as $word ) {
if ($word->string == $this->string)
break;
$position ++;
}
$this->radius = ((strlen ( $this->string ) + $position) * $this->count * M_PI) / 10;
return $this;
}
}
// Single Matrix String ;
class MatrixString {
public $id;
public $words = [ ];
private $rawstring;
// Construct single String ;
public function getId() {
return $this->id;
}
public function __construct($string, $id = 0) {
$this->id = $id;
if (strlen ( $string ) < 3)
throw new Exception ( "To short string" );
$string = strtolower ( $string );
$this->rawstring = $string;
$words = explode ( " ", $string );
foreach ( $words as $w ) {
$word = new MatrixWord ( $w );
$this->words [] = $word;
}
}
public function getRaw() {
return $this->rawstring;
}
public function getWords() {
return $this->words;
}
}
|