
Do you want to extract the main domain of a URL? Here is a function that works well and can be modified easily. I found this function while searching for a good Regular Expression to fulfill such result.
<?php
function get_base_domain($url) {
$debug = 0;
$base_domain = '';
$G_TLD = array(
'biz','com','edu','gov','info','int','mil','name','net','org','aero','asia','cat','coop','jobs','mobi','museum',
'pro','tel','travel','arpa','root','berlin','bzh','cym','gal','geo','kid','kids','lat','mail','nyc','post','sco','web','xxx',
'nato','example','invalid','localhost','test','bitnet','csnet','ip','local','onion','uucp',
'co' );
$C_TLD = array(
'ac','ad','ae','af','ag','ai','al','am','an','ao','aq','ar','as','at','au','aw','ax','az',
'ba','bb','bd','be','bf','bg','bh','bi','bj','bm','bn','bo','br','bs','bt','bw','by','bz',
'ca','cc','cd','cf','cg','ch','ci','ck','cl','cm','cn','co','cr','cu','cv','cx','cy','cz',
'de','dj','dk','dm','do','dz','ec','ee','eg','er','es','et','eu','fi','fj','fk','fm','fo',
'fr','ga','gd','ge','gf','gg','gh','gi','gl','gm','gn','gp','gq','gr','gs','gt','gu','gw',
'gy','hk','hm','hn','hr','ht','hu','id','ie','il','im','in','io','iq','ir','is','it','je',
'jm','jo','jp','ke','kg','kh','ki','km','kn','kr','kw','ky','kz','la','lb','lc','li','lk',
'lr','ls','lt','lu','lv','ly','ma','mc','md','mg','mh','mk','ml','mm','mn','mo','mp','mq',
'mr','ms','mt','mu','mv','mw','mx','my','mz','na','nc','ne','nf','ng','ni','nl','no','np',
'nr','nu','nz','om','pa','pe','pf','pg','ph','pk','pl','pn','pr','ps','pt','pw','py','qa',
're','ro','ru','rw','sa','sb','sc','sd','se','sg','sh','si','sk','sl','sm','sn','sr','st',
'sv','sy','sz','tc','td','tf','tg','th','tj','tk','tl','tm','tn','to','tr','tt','tv','tw',
'tz','ua','ug','uk','us','uy','uz','va','vc','ve','vg','vi','vn','vu','wf','ws','ye','yu',
'za','zm','zw','eh','kp','me','rs','um','bv','gb','pm','sj','so','yt','su','tp','bu','cs','dd','zr'
);
>if ( !$full_domain = get_url_domain($url) ) {
return $base_domain;
}
$DOMAIN = explode('.', $full_domain);
if ( $debug ) print_r($DOMAIN);
$DOMAIN = array_reverse($DOMAIN);
if ( $debug ) print_r($DOMAIN);
>if ( count($DOMAIN) == 4 && is_numeric($DOMAIN[0]) && is_numeric($DOMAIN[3]) ) {
return $full_domain;
}
>if ( count($DOMAIN) <= 2 ) return $full_domain;
if ( in_array($DOMAIN[0], $C_TLD) && in_array($DOMAIN[1], $G_TLD) && $DOMAIN[2] != 'www' ) {
$full_domain = $DOMAIN[2] . '.' . $DOMAIN[1] . '.' . $DOMAIN[0];
}
else {
$full_domain = $DOMAIN[1] . '.' . $DOMAIN[0];;
}
>return $full_domain;
}
function get_url_domain($url) {
$domain = '';
$_URL = parse_url($url);
>if ( empty($_URL) || empty($_URL['host']) ) {
$domain = '';
}
else {
$domain = $_URL['host'];
}
return $domain;
}
?>
To test the code we can use the function,
$url = 'http:;
echo get_base_domain($url) ; // icpep.org
Click here to download.
This code really helped me a lot. There are a lot of Regular expressions out there but this simple approach can break all those head breaking expressions. Btw, this function is free of use and is under GNU licensing. Hope this functions is of big help to you also, happy coding!
Did find the post very useful? Maybe you want to buy me a glass of beer!