Forum Moderators: coopster
$uri_txt = preg_replace("/
^[\s-]+ | // opening whitespace
" |
// bad words
\b(
[a-z]'(?:s|t|d|ve|re|ll|m) | // contractions
an? |
about |
are |
a[st] |
b[ey] |
de |
for |
from |
how |
i[nst]? |
la |
o[fnr] |
th(?:at|e|is) |
to |
was |
what |
when |
where |
who |
will |
with
)\b |
// http, www, com
https?:\/\/ |
www\. |
\.(com|net|org|co|us) |
[^a-zA-Z0-9\s-] |
[\s-]+$ // trailing whitespace
/x", '', strtolower($text)); $text = str_replace(array('a', 'an'), '', $text; // Test 1
$test = false;
$start_time = microtime(TRUE);
function urify($text) {
return
trim(
preg_replace("#[^a-z\d]+#", '-',
preg_replace("#
\b(
an?d?|
about|
are|
a[st]|
b[ey]|
for|
from|
how|
i[nst]?|
o[fnr]|
th(?:at|e|is)|
to|
was|
what|
when|
where|
who|
will|
with |
de|
el |
la
)\b\s* |
" |
https?:// |
www\. |
@(?:gmail|yahoo|outlook|hotmail) |
\.(?:com|net|org|co|us)\b |
[^a-z\d_\s\"`~@\#&*()=+\;:/.?!]
#mx", '', strtolower($text)
)
), '\s-');
}
for ($i = 0; $i < 10000; $i++) {
$test = urify("this is a temp about test... for -csdude- 100% and isn't csdude's version of csdude@gmail.com, https://www.example.rest, and we're OK with it ");
}
$end_time = microtime(TRUE);
echo 'Test 1: ';
echo "#$test#<br>\n";
echo $end_time - $start_time;
echo "<br>\n<br>\n\n";
// Test 2
$test = false;
$start_time = microtime(TRUE);
function urifyTwo($text) {
return
trim(
preg_replace("#[^a-z\d]+#", '-',
preg_replace("#
\b(
an?d?|
about|
are|
a[st]|
b[ey]|
for|
from|
how|
i[nst]?|
o[fnr]|
th(?:at|e|is)|
to|
was|
what|
when|
where|
who|
will|
with |
de|
el |
la
)\b\s* |
[^a-z\d_\s\"`~@\#&*()=+\;:/.?!]
#mx", '', strtolower(
str_replace([
'"',
'http://',
'https://',
'www.',
'@gmail',
'@yahoo',
'@outlook',
'@hotmail',
'.com',
'.net',
'.org',
// these next 2 could have false positives, so maybe move them back to regex
'.co',
'.us'
], '', $text))
)
), '\s-');
}
for ($i = 0; $i < 1000; $i++) {
$test = urifyTwo("this is a temp about test... for -csdude- 100% and isn't csdude's version of csdude@gmail.com, https://www.example.rest, and we're OK with it ");
}
$end_time = microtime(TRUE);
echo 'Test 2: ';
echo "#$test#<br>\n";
echo $end_time - $start_time;
$text = "this is a temp about test... for -csdude- 100% and isn't csdude's version of example@gmail.com, https://www.example.rest, and we're OK with it ";
// $text = "hey guys!What should we talk about?";
function urify($text) {
$text = str_replace([
// common punctuation
'.',
'?',
'!',
'"', // double-quote
':',
',',
// bad words
' a ',
' an ',
' and ',
' about ',
' are ',
' as ',
' at ',
' be ',
' by ',
' for ',
' from ',
' how ',
' i ',
' in ',
' is ',
' it ',
' of ',
' on ',
' or ',
' that ',
' the ',
' this ',
' to ',
' was ',
' what ',
' when ',
' where ',
' who ',
' will ',
' with ',
' de ',
' el ',
' la ',
'"',
'http //',
'https //',
'www ',
'@gmail',
'@yahoo',
'@outlook',
'@hotmail',
' com ',
' net ',
' org ',
' co ',
' us '
], ' ',
// Add space to the beginning and end
' ' .
// Convert to lower case
strtolower(
// Remove single quote
str_replace(["'", '`'], '', $text)
)
. ' ');
// Replace any non-letter or non-digit character remaining with a -, then
// trim from ends
return
trim(
preg_replace("/[^a-z\d]+/", '-', $text)
, '\s-');
}