@@ -341,15 +341,26 @@ protected function parse_readme_contents( $contents ) {
341
341
if ( ! empty ( $ headers ['license ' ] ) ) {
342
342
// Handle the many cases of "License: GPLv2 - http://..."
343
343
if ( empty ( $ headers ['license_uri ' ] ) && preg_match ( '!(https?://\S+)!i ' , $ headers ['license ' ], $ url ) ) {
344
- $ headers ['license_uri ' ] = $ url [1 ];
345
- $ headers ['license ' ] = trim ( str_replace ( $ url [1 ], '' , $ headers ['license ' ] ), " -* \t\n\r\n" );
344
+ $ headers ['license_uri ' ] = trim ( $ url [1 ], " -* \t\n\r\n ( " ) ;
345
+ $ headers ['license ' ] = trim ( str_replace ( $ url [1 ], '' , $ headers ['license ' ] ), " -* \t\n\r\n( " );
346
346
}
347
+
347
348
$ this ->license = $ headers ['license ' ];
348
349
}
349
350
if ( ! empty ( $ headers ['license_uri ' ] ) ) {
350
351
$ this ->license_uri = $ headers ['license_uri ' ];
351
352
}
352
353
354
+ // Validate the license specified.
355
+ if ( ! $ this ->license ) {
356
+ $ this ->warnings ['license_missing ' ] = true ;
357
+ } else {
358
+ $ license_error = $ this ->validate_license ( $ this ->license );
359
+ if ( true !== $ license_error ) {
360
+ $ this ->warnings [ $ license_error ] = $ this ->license ;
361
+ }
362
+ }
363
+
353
364
// Parse the short description.
354
365
while ( ( $ line = array_shift ( $ contents ) ) !== null ) {
355
366
$ trimmed = trim ( $ line );
@@ -919,4 +930,95 @@ protected function parse_markdown( $text ) {
919
930
return $ markdown ->transform ( $ text );
920
931
}
921
932
933
+ /**
934
+ * Validate whether the license specified appears to be valid or not.
935
+ *
936
+ * NOTE: This does not require a SPDX license to be specified, but it should be a valid license nonetheless.
937
+ *
938
+ * @param string $license The specified license.
939
+ * @return string|bool True if it looks good, error code on failure.
940
+ */
941
+ public function validate_license ( $ license ) {
942
+ /*
943
+ * This is a shortlist of keywords that are expected to be found in a valid license field.
944
+ * See https://www.gnu.org/licenses/license-list.en.html for possible compatible licenses.
945
+ */
946
+ $ probably_compatible = [
947
+ 'GPL ' , 'General Public License ' ,
948
+ // 'GNU 2', 'GNU Public', 'GNU Version 2' explicitely not included, as it's not a specific license.
949
+ 'MIT ' ,
950
+ 'ISC ' ,
951
+ 'Expat ' ,
952
+ 'Apache 2 ' , 'Apache License 2 ' ,
953
+ 'X11 ' , 'Modified BSD ' , 'New BSD ' , '3 Clause BSD ' , 'BSD 3 ' ,
954
+ 'FreeBSD ' , 'Simplified BSD ' , '2 Clause BSD ' , 'BSD 2 ' ,
955
+ 'MPL ' , 'Mozilla Public License ' ,
956
+ strrev ( 'LPFTW ' ), strrev ( 'kcuf eht tahw od ' ), // To avoid some code scanners..
957
+ 'Public Domain ' , 'CC0 ' , 'Unlicense ' ,
958
+ 'CC BY ' , // Note: BY-NC & BY-ND are a no-no. See below.
959
+ 'zlib ' ,
960
+ ];
961
+
962
+ /*
963
+ * This is a shortlist of keywords that are likely related to a non-GPL compatible license.
964
+ * See https://www.gnu.org/licenses/license-list.en.html for possible explanations.
965
+ */
966
+ $ probably_incompatible = [
967
+ '4 Clause BSD ' , 'BSD 4 Clause ' ,
968
+ 'Apache 1 ' ,
969
+ 'CC BY-NC ' , 'CC-NC ' , 'NonCommercial ' ,
970
+ 'CC BY-ND ' , 'NoDerivative ' ,
971
+ 'EUPL ' ,
972
+ 'OSL ' ,
973
+ 'Personal use ' , 'without permission ' , 'without prior auth ' , 'you may not ' ,
974
+ 'Proprietery ' , 'proprietary ' ,
975
+ ];
976
+
977
+ $ sanitize_license = static function ( $ license ) {
978
+ $ license = strtolower ( $ license );
979
+
980
+ // Localised or verbose licences.
981
+ $ license = str_replace ( 'licence ' , 'license ' , $ license );
982
+ $ license = str_replace ( 'clauses ' , 'clause ' , $ license ); // BSD
983
+ $ license = str_replace ( 'creative commons ' , 'cc ' , $ license );
984
+
985
+ // If it looks like a full GPL statement, trim it back, for this function.
986
+ if ( 0 === stripos ( $ license , 'GNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989 ' ) ) {
987
+ $ license = 'gplv2 ' ;
988
+ }
989
+
990
+ // Replace 'Version 9' & v9 with '9' for simplicity.
991
+ $ license = preg_replace ( '/(version |v)([0-9])/i ' , '$2 ' , $ license );
992
+
993
+ // Remove unexpected characters
994
+ $ license = preg_replace ( '/(\s*[^a-z0-9. ]+\s*)/i ' , '' , $ license );
995
+
996
+ // Remove all spaces
997
+ $ license = preg_replace ( '/\s+/ ' , '' , $ license );
998
+
999
+ return $ license ;
1000
+ };
1001
+
1002
+ $ probably_compatible = array_map ( $ sanitize_license , $ probably_compatible );
1003
+ $ probably_incompatible = array_map ( $ sanitize_license , $ probably_incompatible );
1004
+ $ license = $ sanitize_license ( $ license );
1005
+
1006
+ // First check to see if it's most probably an incompatible license.
1007
+ foreach ( $ probably_incompatible as $ match ) {
1008
+ if ( str_contains ( $ license , $ match ) ) {
1009
+ return 'invalid_license ' ;
1010
+ }
1011
+ }
1012
+
1013
+ // Check to see if it's likely compatible.
1014
+ foreach ( $ probably_compatible as $ match ) {
1015
+ if ( str_contains ( $ license , $ match ) ) {
1016
+ return true ;
1017
+ }
1018
+ }
1019
+
1020
+ // If we've made it this far, it's neither likely incompatible, or likely compatible, so unknown.
1021
+ return 'unknown_license ' ;
1022
+ }
1023
+
922
1024
}
0 commit comments